Merge remote-tracking branch 'upstream/master' into fix-tuple-formatting

This commit is contained in:
Nikolay Degterinsky 2024-07-28 11:37:19 +00:00
commit e84221605d
1127 changed files with 19658 additions and 14677 deletions

168
.github/actions/release/action.yml vendored Normal file
View File

@ -0,0 +1,168 @@
name: Release
description: Makes patch releases and creates new release branch
inputs:
ref:
description: 'Git reference (branch or commit sha) from which to create the release'
required: true
type: string
type:
description: 'The type of release: "new" for a new release or "patch" for a patch release'
required: true
type: choice
options:
- patch
- new
dry-run:
description: 'Dry run'
required: false
default: true
type: boolean
token:
required: true
type: string
runs:
using: "composite"
steps:
- name: Prepare Release Info
shell: bash
run: |
python3 ./tests/ci/create_release.py --prepare-release-info \
--ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \
${{ inputs.dry-run && '--dry-run' || '' }}
echo "::group::Release Info"
python3 -m json.tool /tmp/release_info.json
echo "::endgroup::"
release_tag=$(jq -r '.release_tag' /tmp/release_info.json)
commit_sha=$(jq -r '.commit_sha' /tmp/release_info.json)
echo "Release Tag: $release_tag"
echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV"
echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV"
- name: Download All Release Artifacts
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --download-packages ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push Git Tag for the Release
shell: bash
run: |
python3 ./tests/ci/create_release.py --push-release-tag ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push New Release Branch
if: ${{ inputs.type == 'new' }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --push-new-release-branch ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Bump CH Version and Update Contributors' List
shell: bash
run: |
python3 ./tests/ci/create_release.py --create-bump-version-pr ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Bump Docker versions, Changelog, Security
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
git checkout master
python3 ./tests/ci/create_release.py --set-progress-started --progress "update changelog, docker version, security"
echo "List versions"
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
echo "Update docker version"
./utils/list-versions/update-docker-version.sh
echo "Generate ChangeLog"
export CI=1
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \
--volume=".:/ClickHouse" clickhouse/style-test \
/ClickHouse/tests/ci/changelog.py -v --debug-helpers \
--gh-user-or-token=${{ inputs.token }} --jobs=5 \
--output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }}
git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md
echo "Generate Security"
python3 ./utils/security-generator/generate_security.py > SECURITY.md
git diff HEAD
- name: Create ChangeLog PR
if: ${{ inputs.type == 'patch' && ! inputs.dry-run }}
uses: peter-evans/create-pull-request@v6
with:
author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
token: ${{ inputs.token }}
committer: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
branch: auto/${{ env.RELEASE_TAG }}
assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher
delete-branch: true
title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }}
labels: do not test
body: |
Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
### Changelog category (leave one):
- Not for changelog (changelog entry is not required)
- name: Complete previous steps and Restore git state
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --set-progress-completed
git reset --hard HEAD
git checkout "$GITHUB_REF_NAME"
- name: Create GH Release
shell: bash
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/create_release.py --create-gh-release ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export TGZ Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-tgz ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test TGZ Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-tgz ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export RPM Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-rpm ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test RPM Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-rpm ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export Debian Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-debian ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test Debian Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-debian ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Docker clickhouse/clickhouse-server building
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
cd "./tests/ci"
python3 ./create_release.py --set-progress-started --progress "docker server release"
export CHECK_NAME="Docker server image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
python3 ./create_release.py --set-progress-completed
- name: Docker clickhouse/clickhouse-keeper building
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
cd "./tests/ci"
python3 ./create_release.py --set-progress-started --progress "docker keeper release"
export CHECK_NAME="Docker keeper image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
python3 ./create_release.py --set-progress-completed
- name: Set current Release progress to Completed with OK
shell: bash
run: |
python3 ./tests/ci/create_release.py --set-progress-started --progress "completed"
python3 ./tests/ci/create_release.py --set-progress-completed
- name: Post Slack Message
if: ${{ !cancelled() }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --post-status ${{ inputs.dry-run && '--dry-run' || '' }}

View File

@ -1,44 +1,110 @@
name: AutoRelease
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
DRY_RUN: true
concurrency:
group: auto-release
group: release
on: # yamllint disable-line rule:truthy
# schedule:
# - cron: '0 10-16 * * 1-5'
# Workflow uses a test bucket for packages and dry run mode (no real releases)
schedule:
- cron: '0 9 * * *'
- cron: '0 15 * * *'
workflow_dispatch:
inputs:
dry-run:
description: 'Dry run'
required: false
default: true
type: boolean
jobs:
CherryPick:
runs-on: [self-hosted, style-checker-aarch64]
AutoRelease:
runs-on: [self-hosted, release-maker]
steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Set envs
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/cherry_pick
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
REPO_OWNER=ClickHouse
REPO_NAME=ClickHouse
REPO_TEAM=core
EOF
- name: Set DRY_RUN for schedule
if: ${{ github.event_name == 'schedule' }}
run: echo "DRY_RUN=true" >> "$GITHUB_ENV"
- name: Set DRY_RUN for dispatch
if: ${{ github.event_name == 'workflow_dispatch' }}
run: echo "DRY_RUN=${{ github.event.inputs.dry-run }}" >> "$GITHUB_ENV"
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
fetch-depth: 0
- name: Auto-release
- name: Auto Release Prepare
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --release-after-days=3
- name: Cleanup
if: always()
python3 auto_release.py --prepare
echo "::group::Auto Release Info"
python3 -m json.tool /tmp/autorelease_info.json
echo "::endgroup::"
{
echo 'AUTO_RELEASE_PARAMS<<EOF'
cat /tmp/autorelease_info.json
echo 'EOF'
} >> "$GITHUB_ENV"
- name: Post Release Branch statuses
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --post-status
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0] && fromJson(env.AUTO_RELEASE_PARAMS).releases[0].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[1].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0] && fromJson(env.AUTO_RELEASE_PARAMS).releases[1].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[1].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2] && fromJson(env.AUTO_RELEASE_PARAMS).releases[2].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3] && fromJson(env.AUTO_RELEASE_PARAMS).releases[3].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4] && fromJson(env.AUTO_RELEASE_PARAMS).releases[4].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Post Slack Message
if: ${{ !cancelled() }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }}
- name: Clean up
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:

View File

@ -36,10 +36,6 @@ jobs:
cd "$GITHUB_WORKSPACE/tests/ci"
echo "Testing the main ci directory"
python3 -m unittest discover -s . -p 'test_*.py'
for dir in *_lambda/; do
echo "Testing $dir"
python3 -m unittest discover -s "$dir" -p 'test_*.py'
done
- name: PrepareRunConfig
id: runconfig
run: |
@ -245,8 +241,9 @@ jobs:
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
FinishCheck:
if: ${{ !failure() && !cancelled() }}
if: ${{ !cancelled() }}
needs:
- RunConfig
- Builds_Report
- FunctionalStatelessTestAsan
- FunctionalStatefulTestDebug
@ -261,6 +258,7 @@ jobs:
with:
clear-repository: true
- name: Finish label
if: ${{ !failure() }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
# update mergeable check
@ -268,3 +266,10 @@ jobs:
# update overall ci report
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
python3 merge_pr.py
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -2,7 +2,6 @@ name: CreateRelease
concurrency:
group: release
'on':
workflow_dispatch:
inputs:
@ -31,136 +30,15 @@ jobs:
steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Set envs
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
run: |
cat >> "$GITHUB_ENV" << 'EOF'
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
RELEASE_INFO_FILE=${{ runner.temp }}/release_info.json
EOF
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
fetch-depth: 0
- name: Prepare Release Info
run: |
python3 ./tests/ci/create_release.py --prepare-release-info \
--ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \
--outfile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
echo "::group::Release Info"
python3 -m json.tool "$RELEASE_INFO_FILE"
echo "::endgroup::"
release_tag=$(jq -r '.release_tag' "$RELEASE_INFO_FILE")
commit_sha=$(jq -r '.commit_sha' "$RELEASE_INFO_FILE")
echo "Release Tag: $release_tag"
echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV"
echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV"
- name: Download All Release Artifacts
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/create_release.py --infile "$RELEASE_INFO_FILE" --download-packages ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push Git Tag for the Release
run: |
python3 ./tests/ci/create_release.py --push-release-tag --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push New Release Branch
if: ${{ inputs.type == 'new' }}
run: |
python3 ./tests/ci/create_release.py --push-new-release-branch --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Bump CH Version and Update Contributors' List
run: |
python3 ./tests/ci/create_release.py --create-bump-version-pr --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Checkout master
run: |
git checkout master
- name: Bump Docker versions, Changelog, Security
if: ${{ inputs.type == 'patch' }}
run: |
[ "$(git branch --show-current)" != "master" ] && echo "not on the master" && exit 1
echo "List versions"
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
echo "Update docker version"
./utils/list-versions/update-docker-version.sh
echo "Generate ChangeLog"
export CI=1
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \
--volume=".:/ClickHouse" clickhouse/style-test \
/ClickHouse/tests/ci/changelog.py -v --debug-helpers \
--gh-user-or-token="$GH_TOKEN" --jobs=5 \
--output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }}
git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md
echo "Generate Security"
python3 ./utils/security-generator/generate_security.py > SECURITY.md
git diff HEAD
- name: Create ChangeLog PR
if: ${{ inputs.type == 'patch' && ! inputs.dry-run }}
uses: peter-evans/create-pull-request@v6
- name: Call Release Action
uses: ./.github/actions/release
with:
author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
token: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
committer: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
branch: auto/${{ env.RELEASE_TAG }}
assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher
delete-branch: true
title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }}
labels: do not test
body: |
Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
### Changelog category (leave one):
- Not for changelog (changelog entry is not required)
- name: Reset changes if Dry-run
if: ${{ inputs.dry-run }}
run: |
git reset --hard HEAD
- name: Checkout back to GITHUB_REF
run: |
git checkout "$GITHUB_REF_NAME"
- name: Create GH Release
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/create_release.py --create-gh-release \
--infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export TGZ Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --export-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test TGZ Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --test-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export RPM Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --export-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test RPM Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --test-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export Debian Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --export-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test Debian Packages
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/artifactory.py --test-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Docker clickhouse/clickhouse-server building
if: ${{ inputs.type == 'patch' }}
run: |
cd "./tests/ci"
export CHECK_NAME="Docker server image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
- name: Docker clickhouse/clickhouse-keeper building
if: ${{ inputs.type == 'patch' }}
run: |
cd "./tests/ci"
export CHECK_NAME="Docker keeper image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
- name: Post Slack Message
if: always()
run: |
echo Slack Message
ref: ${{ inputs.ref }}
type: ${{ inputs.type }}
dry-run: ${{ inputs.dry-run }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}

View File

@ -9,19 +9,64 @@ on: # yamllint disable-line rule:truthy
- cron: '0 */6 * * *'
workflow_dispatch:
jobs:
KeeperJepsenRelease:
uses: ./.github/workflows/reusable_simple_job.yml
RunConfig:
runs-on: [self-hosted, style-checker-aarch64]
outputs:
data: ${{ steps.runconfig.outputs.CI_DATA }}
steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
test_name: Jepsen keeper check
runner_type: style-checker
report_required: true
clear-repository: true # to ensure correct digests
fetch-depth: 0 # to get version
filter: tree:0
- name: PrepareRunConfig
id: runconfig
run: |
echo "::group::configure CI run"
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --workflow "$GITHUB_WORKFLOW" --outfile ${{ runner.temp }}/ci_run_data.json
echo "::endgroup::"
echo "::group::CI run configure results"
python3 -m json.tool ${{ runner.temp }}/ci_run_data.json
echo "::endgroup::"
{
echo 'CI_DATA<<EOF'
cat ${{ runner.temp }}/ci_run_data.json
echo 'EOF'
} >> "$GITHUB_OUTPUT"
KeeperJepsenRelease:
needs: [RunConfig]
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse Keeper Jepsen
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 jepsen_check.py keeper
# ServerJepsenRelease:
# uses: ./.github/workflows/reusable_simple_job.yml
# with:
# test_name: Jepsen server check
# runner_type: style-checker
# run_command: |
# cd "$REPO_COPY/tests/ci"
# python3 jepsen_check.py server
ServerJepsenRelease:
if: false # skip for server
needs: [RunConfig]
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse Server Jepsen
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 jepsen_check.py server
CheckWorkflow:
if: ${{ !cancelled() }}
needs: [RunConfig, ServerJepsenRelease, KeeperJepsenRelease]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -33,10 +33,6 @@ jobs:
# cd "$GITHUB_WORKSPACE/tests/ci"
# echo "Testing the main ci directory"
# python3 -m unittest discover -s . -p 'test_*.py'
# for dir in *_lambda/; do
# echo "Testing $dir"
# python3 -m unittest discover -s "$dir" -p 'test_*.py'
# done
- name: PrepareRunConfig
id: runconfig
run: |
@ -125,34 +121,6 @@ jobs:
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
MarkReleaseReady:
if: ${{ !failure() && !cancelled() }}
needs: [RunConfig, Builds_1, Builds_2]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Debug
run: |
echo need with different filters
cat << 'EOF'
${{ toJSON(needs) }}
${{ toJSON(needs.*.result) }}
no failures ${{ !contains(needs.*.result, 'failure') }}
no skips ${{ !contains(needs.*.result, 'skipped') }}
no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
EOF
- name: Not ready
# fail the job to be able to restart it
if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }}
run: exit 1
- name: Check out repository code
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
uses: ClickHouse/checkout@v1
- name: Mark Commit Release Ready
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 mark_release_ready.py
FinishCheck:
if: ${{ !cancelled() }}
needs: [RunConfig, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3]
@ -164,3 +132,10 @@ jobs:
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -30,10 +30,6 @@ jobs:
cd "$GITHUB_WORKSPACE/tests/ci"
echo "Testing the main ci directory"
python3 -m unittest discover -s . -p 'test_*.py'
for dir in *_lambda/; do
echo "Testing $dir"
python3 -m unittest discover -s "$dir" -p 'test_*.py'
done
- name: PrepareRunConfig
id: runconfig
run: |
@ -97,7 +93,7 @@ jobs:
data: ${{ needs.RunConfig.outputs.data }}
CheckReadyForMerge:
if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }}
if: ${{ !cancelled() }}
# Test_2 or Test_3 must not have jobs required for Mergeable check
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Tests_1]
runs-on: [self-hosted, style-checker-aarch64]
@ -105,6 +101,14 @@ jobs:
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Check and set merge status
if: ${{ needs.StyleCheck.result == 'success' }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -44,3 +44,17 @@ jobs:
with:
data: "${{ needs.RunConfig.outputs.data }}"
set_latest: true
CheckWorkflow:
if: ${{ !cancelled() }}
needs: [RunConfig, BuildDockers]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -48,10 +48,6 @@ jobs:
cd "$GITHUB_WORKSPACE/tests/ci"
echo "Testing the main ci directory"
python3 -m unittest discover -s . -p 'test_*.py'
for dir in *_lambda/; do
echo "Testing $dir"
python3 -m unittest discover -s "$dir" -p 'test_*.py'
done
- name: PrepareRunConfig
id: runconfig
run: |
@ -155,9 +151,10 @@ jobs:
data: ${{ needs.RunConfig.outputs.data }}
CheckReadyForMerge:
if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }}
# Test_2 or Test_3 must not have jobs required for Mergeable check
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1]
if: ${{ !cancelled() }}
# Test_2 or Test_3 do not have the jobs required for Mergeable check,
# however, set them as "needs" to get all checks results before the automatic merge occurs.
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
@ -165,9 +162,17 @@ jobs:
with:
filter: tree:0
- name: Check and set merge status
if: ${{ needs.StyleCheck.result == 'success' }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status
################################# Stage Final #################################
#

View File

@ -33,10 +33,6 @@ jobs:
cd "$GITHUB_WORKSPACE/tests/ci"
echo "Testing the main ci directory"
python3 -m unittest discover -s . -p 'test_*.py'
for dir in *_lambda/; do
echo "Testing $dir"
python3 -m unittest discover -s "$dir" -p 'test_*.py'
done
- name: PrepareRunConfig
id: runconfig
run: |
@ -445,8 +441,9 @@ jobs:
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
FinishCheck:
if: ${{ !failure() && !cancelled() }}
if: ${{ !cancelled() }}
needs:
- RunConfig
- DockerServerImage
- DockerKeeperImage
- Builds_Report
@ -482,9 +479,18 @@ jobs:
with:
clear-repository: true
- name: Finish label
if: ${{ !failure() }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
# update mergeable check
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
# update overall ci report
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -14,3 +14,9 @@ rules:
comments:
min-spaces-from-content: 1
document-start: disable
colons: disable
indentation: disable
line-length: disable
trailing-spaces: disable
truthy: disable
new-line-at-end-of-file: disable

View File

@ -1,4 +1,5 @@
### Table of Contents
**[ClickHouse release v24.7, 2024-07-30](#247)**<br/>
**[ClickHouse release v24.6, 2024-07-01](#246)**<br/>
**[ClickHouse release v24.5, 2024-05-30](#245)**<br/>
**[ClickHouse release v24.4, 2024-04-30](#244)**<br/>
@ -9,6 +10,178 @@
# 2024 Changelog
### <a id="247"></a> ClickHouse release 24.7, 2024-07-30
#### Backward Incompatible Change
* Forbid `CRATE MATERIALIZED VIEW ... ENGINE Replicated*MergeTree POPULATE AS SELECT ...` with Replicated databases. [#63963](https://github.com/ClickHouse/ClickHouse/pull/63963) ([vdimir](https://github.com/vdimir)).
* `clickhouse-keeper-client` will only accept paths in string literals, such as `ls '/hello/world'`, not bare strings such as `ls /hello/world`. [#65494](https://github.com/ClickHouse/ClickHouse/pull/65494) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Metric `KeeperOutstandingRequets` was renamed to `KeeperOutstandingRequests`. [#66206](https://github.com/ClickHouse/ClickHouse/pull/66206) ([Robert Schulze](https://github.com/rschu1ze)).
* Remove `is_deterministic` field from the `system.functions` table. [#66630](https://github.com/ClickHouse/ClickHouse/pull/66630) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Function `tuple` will now try to construct named tuples in query (controlled by `enable_named_columns_in_function_tuple`). Introduce function `tupleNames` to extract names from tuples. [#54881](https://github.com/ClickHouse/ClickHouse/pull/54881) ([Amos Bird](https://github.com/amosbird)).
#### New Feature
* Add `ASOF JOIN` support for `full_sorting_join` algorithm. [#55051](https://github.com/ClickHouse/ClickHouse/pull/55051) ([vdimir](https://github.com/vdimir)).
* Add new window function `percent_rank`. [#62747](https://github.com/ClickHouse/ClickHouse/pull/62747) ([lgbo](https://github.com/lgbo-ustc)).
* Support JWT authentication in `clickhouse-client` (will be available only in ClickHouse Cloud). [#62829](https://github.com/ClickHouse/ClickHouse/pull/62829) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Add SQL functions `changeYear`, `changeMonth`, `changeDay`, `changeHour`, `changeMinute`, `changeSecond`. For example, `SELECT changeMonth(toDate('2024-06-14'), 7)` returns date `2024-07-14`. [#63186](https://github.com/ClickHouse/ClickHouse/pull/63186) ([cucumber95](https://github.com/cucumber95)).
* Introduce startup scripts, which allow the execution of preconfigured queries at the startup stage. [#64889](https://github.com/ClickHouse/ClickHouse/pull/64889) ([pufit](https://github.com/pufit)).
* Support accept_invalid_certificate in client's config in order to allow for client to connect over secure TCP to a server running with self-signed certificate - can be used as a shorthand for corresponding `openSSL` client settings `verificationMode=none` + `invalidCertificateHandler.name=AcceptCertificateHandler`. [#65238](https://github.com/ClickHouse/ClickHouse/pull/65238) ([peacewalker122](https://github.com/peacewalker122)).
* Add system.error_log which contains history of error values from table system.errors, periodically flushed to disk. [#65381](https://github.com/ClickHouse/ClickHouse/pull/65381) ([Pablo Marcos](https://github.com/pamarcos)).
* Add aggregate function `groupConcat`. About the same as `arrayStringConcat( groupArray(column), ',')` Can receive 2 parameters: a string delimiter and the number of elements to be processed. [#65451](https://github.com/ClickHouse/ClickHouse/pull/65451) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Add AzureQueue storage. [#65458](https://github.com/ClickHouse/ClickHouse/pull/65458) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add a new setting to disable/enable writing page index into parquet files. [#65475](https://github.com/ClickHouse/ClickHouse/pull/65475) ([lgbo](https://github.com/lgbo-ustc)).
* Introduce `logger.console_log_level` server config to control the log level to the console (if enabled). [#65559](https://github.com/ClickHouse/ClickHouse/pull/65559) ([Azat Khuzhin](https://github.com/azat)).
* Automatically append a wildcard `*` to the end of a directory path with table function `file`. [#66019](https://github.com/ClickHouse/ClickHouse/pull/66019) ([Zhidong (David) Guo](https://github.com/Gun9niR)).
* Add `--memory-usage` option to client in non interactive mode. [#66393](https://github.com/ClickHouse/ClickHouse/pull/66393) ([vdimir](https://github.com/vdimir)).
* Make an interactive client for clickhouse-disks, add local disk from the local directory. [#64446](https://github.com/ClickHouse/ClickHouse/pull/64446) ([Daniil Ivanik](https://github.com/divanik)).
* When lightweight delete happens on a table with projection(s), users have choices either throw an exception (by default) or drop the projection [#65594](https://github.com/ClickHouse/ClickHouse/pull/65594) ([jsc0218](https://github.com/jsc0218)).
#### Experimental Feature
* Change binary serialization of Variant data type: add `compact` mode to avoid writing the same discriminator multiple times for granules with single variant or with only NULL values. Add MergeTree setting `use_compact_variant_discriminators_serialization` that is enabled by default. Note that Variant type is still experimental and backward-incompatible change in serialization is ok. [#62774](https://github.com/ClickHouse/ClickHouse/pull/62774) ([Kruglov Pavel](https://github.com/Avogar)).
* Support rocksdb as backend storage of keeper. [#56626](https://github.com/ClickHouse/ClickHouse/pull/56626) ([Han Fei](https://github.com/hanfei1991)).
* Refactor JSONExtract functions, support more types including experimental Dynamic type. [#66046](https://github.com/ClickHouse/ClickHouse/pull/66046) ([Kruglov Pavel](https://github.com/Avogar)).
* Support null map subcolumn for Variant and Dynamic subcolumns. [#66178](https://github.com/ClickHouse/ClickHouse/pull/66178) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix reading dynamic subcolumns from altered Memory table. Previously if `max_types` parameter of a Dynamic type was changed in Memory table via alter, further subcolumns reading can return wrong result. [#66066](https://github.com/ClickHouse/ClickHouse/pull/66066) ([Kruglov Pavel](https://github.com/Avogar)).
* Add support for `cluster_for_parallel_replicas` when using custom key parallel replicas. It allows you to use parallel replicas with custom key with MergeTree tables. [#65453](https://github.com/ClickHouse/ClickHouse/pull/65453) ([Antonio Andelic](https://github.com/antonio2368)).
#### Performance Improvement
* Enable `optimize_functions_to_subcolumns` by default. [#58661](https://github.com/ClickHouse/ClickHouse/pull/58661) ([Anton Popov](https://github.com/CurtizJ)).
* Replace int to string algorithm with a faster one (from a modified amdn/itoa to a modified jeaiii/itoa). [#61661](https://github.com/ClickHouse/ClickHouse/pull/61661) ([Raúl Marín](https://github.com/Algunenano)).
* Sizes of hash tables created by join (`parallel_hash` algorithm) is collected and cached now. This information will be used to preallocate space in hash tables for subsequent query executions and save time on hash table resizes. [#64553](https://github.com/ClickHouse/ClickHouse/pull/64553) ([Nikita Taranov](https://github.com/nickitat)).
* Optimized queries with `ORDER BY` primary key and `WHERE` that have a condition with high selectivity by using of buffering. It is controlled by setting `read_in_order_use_buffering` (enabled by default) and can increase memory usage of query. [#64607](https://github.com/ClickHouse/ClickHouse/pull/64607) ([Anton Popov](https://github.com/CurtizJ)).
* Improve performance of loading `plain_rewritable` metadata. [#65634](https://github.com/ClickHouse/ClickHouse/pull/65634) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Attaching tables on read-only disks will use fewer resources by not loading outdated parts. [#65635](https://github.com/ClickHouse/ClickHouse/pull/65635) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Support minmax hyperrectangle for Set indices. [#65676](https://github.com/ClickHouse/ClickHouse/pull/65676) ([AntiTopQuark](https://github.com/AntiTopQuark)).
* Unload primary index of outdated parts to reduce total memory usage. [#65852](https://github.com/ClickHouse/ClickHouse/pull/65852) ([Anton Popov](https://github.com/CurtizJ)).
* Functions `replaceRegexpAll` and `replaceRegexpOne` are now significantly faster if the pattern is trivial, i.e. contains no metacharacters, pattern classes, flags, grouping characters etc. (Thanks to Taiyang Li). [#66185](https://github.com/ClickHouse/ClickHouse/pull/66185) ([Robert Schulze](https://github.com/rschu1ze)).
* s3 requests: Reduce retry time for queries, increase retries count for backups. 8.5 minutes and 100 retires for queries, 1.2 hours and 1000 retries for backup restore. [#65232](https://github.com/ClickHouse/ClickHouse/pull/65232) ([Sema Checherinda](https://github.com/CheSema)).
* Support query plan LIMIT optimization. Support LIMIT pushdown for PostgreSQL storage and table function. [#65454](https://github.com/ClickHouse/ClickHouse/pull/65454) ([Maksim Kita](https://github.com/kitaisreal)).
* Improved ZooKeeper load balancing. The current session doesn't expire until the optimal nodes become available despite `fallback_session_lifetime`. Added support for AZ-aware balancing. [#65570](https://github.com/ClickHouse/ClickHouse/pull/65570) ([Alexander Tokmakov](https://github.com/tavplubix)).
* DatabaseCatalog drops tables faster by using up to database_catalog_drop_table_concurrency threads. [#66065](https://github.com/ClickHouse/ClickHouse/pull/66065) ([Sema Checherinda](https://github.com/CheSema)).
#### Improvement
* The setting `optimize_trivial_insert_select` is disabled by default. In most cases, it should be beneficial. Nevertheless, if you are seeing slower INSERT SELECT or increased memory usage, you can enable it back or `SET compatibility = '24.6'`. [#58970](https://github.com/ClickHouse/ClickHouse/pull/58970) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Print stacktrace and diagnostic info if `clickhouse-client` or `clickhouse-local` crashes. [#61109](https://github.com/ClickHouse/ClickHouse/pull/61109) ([Alexander Tokmakov](https://github.com/tavplubix)).
* The result of `SHOW INDEX | INDEXES | INDICES | KEYS` was previously sorted by the primary key column names. Since this was unintuitive, the result is now sorted by the position of the primary key columns within the primary key. [#61131](https://github.com/ClickHouse/ClickHouse/pull/61131) ([Robert Schulze](https://github.com/rschu1ze)).
* Change how deduplication for Materialized Views works. Fixed a lot of cases like: - on destination table: data is split for 2 or more blocks and that blocks is considered as duplicate when that block is inserted in parallel. - on MV destination table: the equal blocks are deduplicated, that happens when MV often produces equal data as a result for different input data due to performing aggregation. - on MV destination table: the equal blocks which comes from different MV are deduplicated. [#61601](https://github.com/ClickHouse/ClickHouse/pull/61601) ([Sema Checherinda](https://github.com/CheSema)).
* Allow matching column names in a case insensitive manner when reading json files (`input_format_json_case_insensitive_column_matching`). [#61750](https://github.com/ClickHouse/ClickHouse/pull/61750) ([kevinyhzou](https://github.com/KevinyhZou)).
* Support reading partitioned data DeltaLake data. Infer DeltaLake schema by reading metadata instead of data. [#63201](https://github.com/ClickHouse/ClickHouse/pull/63201) ([Kseniia Sumarokova](https://github.com/kssenii)).
* In composable protocols TLS layer accepted only `certificateFile` and `privateKeyFile` parameters. https://clickhouse.com/docs/en/operations/settings/composable-protocols. [#63985](https://github.com/ClickHouse/ClickHouse/pull/63985) ([Anton Ivashkin](https://github.com/ianton-ru)).
* Added profile event `SelectQueriesWithPrimaryKeyUsage` which indicates how many SELECT queries use the primary key to evaluate the WHERE clause. [#64492](https://github.com/ClickHouse/ClickHouse/pull/64492) ([0x01f](https://github.com/0xfei)).
* `StorageS3Queue` related fixes and improvements. Deduce a default value of `s3queue_processing_threads_num` according to the number of physical cpu cores on the server (instead of the previous default value as 1). Set default value of `s3queue_loading_retries` to 10. Fix possible vague "Uncaught exception" in exception column of `system.s3queue`. Do not increment retry count on `MEMORY_LIMIT_EXCEEDED` exception. Move files commit to a stage after insertion into table fully finished to avoid files being commited while not inserted. Add settings `s3queue_max_processed_files_before_commit`, `s3queue_max_processed_rows_before_commit`, `s3queue_max_processed_bytes_before_commit`, `s3queue_max_processing_time_sec_before_commit`, to better control commit and flush time. [#65046](https://github.com/ClickHouse/ClickHouse/pull/65046) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Support aliases in parametrized view function (only new analyzer). [#65190](https://github.com/ClickHouse/ClickHouse/pull/65190) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Updated to mask account key in logs in azureBlobStorage. [#65273](https://github.com/ClickHouse/ClickHouse/pull/65273) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Partition pruning for `IN` predicates when filter expression is a part of `PARTITION BY` expression. [#65335](https://github.com/ClickHouse/ClickHouse/pull/65335) ([Eduard Karacharov](https://github.com/korowa)).
* Add system tables with main information about all detached tables. [#65400](https://github.com/ClickHouse/ClickHouse/pull/65400) ([Konstantin Morozov](https://github.com/k-morozov)).
* `arrayMin`/`arrayMax` can be applicable to all data types that are comparable. [#65455](https://github.com/ClickHouse/ClickHouse/pull/65455) ([pn](https://github.com/chloro-pn)).
* Improved memory accounting for cgroups v2 to exclude the amount occupied by the page cache. [#65470](https://github.com/ClickHouse/ClickHouse/pull/65470) ([Nikita Taranov](https://github.com/nickitat)).
* Do not create format settings for each row when serializing chunks to insert to EmbeddedRocksDB table. [#65474](https://github.com/ClickHouse/ClickHouse/pull/65474) ([Duc Canh Le](https://github.com/canhld94)).
* Reduce `clickhouse-local` prompt to just `:)`. `getFQDNOrHostName()` takes too long on macOS, and we don't want a hostname in the prompt for `clickhouse-local` anyway. [#65510](https://github.com/ClickHouse/ClickHouse/pull/65510) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Avoid printing a message from jemalloc about per-CPU arenas on low-end virtual machines. [#65532](https://github.com/ClickHouse/ClickHouse/pull/65532) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Disable filesystem cache background download by default. It will be enabled back when we fix the issue with possible "Memory limit exceeded" because memory deallocation is done outside of query context (while buffer is allocated inside of query context) if we use background download threads. Plus we need to add a separate setting to define max size to download for background workers (currently it is limited by max_file_segment_size, which might be too big). [#65534](https://github.com/ClickHouse/ClickHouse/pull/65534) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add new option to config `<config_reload_interval_ms>` which allow to specify how often clickhouse will reload config. [#65545](https://github.com/ClickHouse/ClickHouse/pull/65545) ([alesapin](https://github.com/alesapin)).
* Implement binary encoding for ClickHouse data types and add its specification in docs. Use it in Dynamic binary serialization, allow to use it in RowBinaryWithNamesAndTypes and Native formats under settings. [#65546](https://github.com/ClickHouse/ClickHouse/pull/65546) ([Kruglov Pavel](https://github.com/Avogar)).
* Improved ZooKeeper load balancing. The current session doesn't expire until the optimal nodes become available despite `fallback_session_lifetime`. Added support for AZ-aware balancing. [#65570](https://github.com/ClickHouse/ClickHouse/pull/65570) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Server settings `compiled_expression_cache_size` and `compiled_expression_cache_elements_size` are now shown in `system.server_settings`. [#65584](https://github.com/ClickHouse/ClickHouse/pull/65584) ([Robert Schulze](https://github.com/rschu1ze)).
* Add support for user identification based on x509 SubjectAltName extension. [#65626](https://github.com/ClickHouse/ClickHouse/pull/65626) ([Anton Kozlov](https://github.com/tonickkozlov)).
* `clickhouse-local` will respect the `max_server_memory_usage` and `max_server_memory_usage_to_ram_ratio` from the configuration file. It will also set the max memory usage to 90% of the system memory by default, like `clickhouse-server` does. [#65697](https://github.com/ClickHouse/ClickHouse/pull/65697) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add a script to backup your files to ClickHouse. [#65699](https://github.com/ClickHouse/ClickHouse/pull/65699) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* PostgreSQL source support cancel. [#65722](https://github.com/ClickHouse/ClickHouse/pull/65722) ([Maksim Kita](https://github.com/kitaisreal)).
* Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)).
* Allow to use `concat` function with empty arguments ``` sql :) select concat();. [#65887](https://github.com/ClickHouse/ClickHouse/pull/65887) ([李扬](https://github.com/taiyang-li)).
* Allow controlling named collections in clickhouse-local. [#65973](https://github.com/ClickHouse/ClickHouse/pull/65973) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improve Azure profile events. [#65999](https://github.com/ClickHouse/ClickHouse/pull/65999) ([alesapin](https://github.com/alesapin)).
* Support ORC file read by writer time zone. [#66025](https://github.com/ClickHouse/ClickHouse/pull/66025) ([kevinyhzou](https://github.com/KevinyhZou)).
* Add settings to control connection to the PostgreSQL. * Setting `postgresql_connection_attempt_timeout` specifies the value passed to `connect_timeout` parameter of connection URL. * Setting `postgresql_connection_pool_retries` specifies the number of retries to establish a connection to the PostgreSQL end-point. [#66232](https://github.com/ClickHouse/ClickHouse/pull/66232) ([Dmitry Novik](https://github.com/novikd)).
* Reduce inaccuracy of input_wait_elapsed_us/input_wait_elapsed_us/elapsed_us. [#66239](https://github.com/ClickHouse/ClickHouse/pull/66239) ([Azat Khuzhin](https://github.com/azat)).
* Improve FilesystemCache ProfileEvents. [#66249](https://github.com/ClickHouse/ClickHouse/pull/66249) ([zhukai](https://github.com/nauu)).
* Add settings to ignore ON CLUSTER clause in queries for named collection management with replicated storage. [#66288](https://github.com/ClickHouse/ClickHouse/pull/66288) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
* Function `generateSnowflakeID` now allows to specify a machine ID as a parameter to prevent collisions in large clusters. [#66374](https://github.com/ClickHouse/ClickHouse/pull/66374) ([ZAWA_ll](https://github.com/Zawa-ll)).
* Disable suspending on Ctrl+Z in interactive mode. This is a common trap and is not expected behavior for almost all users. I imagine only a few extreme power users could appreciate suspending terminal applications to the background, but I don't know any. [#66511](https://github.com/ClickHouse/ClickHouse/pull/66511) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add option for validating the Primary key type in Dictionaries. Without this option for simple layouts any column type will be implicitly converted to UInt64. ### Documentation entry for user-facing changes. [#66595](https://github.com/ClickHouse/ClickHouse/pull/66595) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix unexpected size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)).
* Check cyclic dependencies on CREATE/REPLACE/RENAME/EXCHANGE queries and throw an exception if there is a cyclic dependency. Previously such cyclic dependencies could lead to a deadlock during server startup. Also fix some bugs in dependencies creation. [#65405](https://github.com/ClickHouse/ClickHouse/pull/65405) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)).
* Fix the VALID UNTIL clause in the user definition resetting after a restart. [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix SHOW MERGES remaining time. [#66735](https://github.com/ClickHouse/ClickHouse/pull/66735) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* `Query was cancelled` might have been printed twice in clickhouse-client. This behaviour is fixed. [#66005](https://github.com/ClickHouse/ClickHouse/pull/66005) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Fixed crash while using MaterializedMySQL with TABLE OVERRIDE that maps MySQL NULL field into ClickHouse not NULL field. [#54649](https://github.com/ClickHouse/ClickHouse/pull/54649) ([Filipp Ozinov](https://github.com/bakwc)).
* Fix logical error when PREWHERE expression read no columns and table has no adaptive index granularity (very old table). [#59173](https://github.com/ClickHouse/ClickHouse/pull/59173) ([Alexander Gololobov](https://github.com/davenger)).
* Fix bug with cancellation buffer when canceling a query. [#64478](https://github.com/ClickHouse/ClickHouse/pull/64478) ([Sema Checherinda](https://github.com/CheSema)).
* Fix filling parts columns from metadata (when columns.txt does not exists). [#64757](https://github.com/ClickHouse/ClickHouse/pull/64757) ([Azat Khuzhin](https://github.com/azat)).
* Fix crash for `ALTER TABLE ... ON CLUSTER ... MODIFY SQL SECURITY`. [#64957](https://github.com/ClickHouse/ClickHouse/pull/64957) ([pufit](https://github.com/pufit)).
* Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)).
* Eliminate injective function in argument of functions `uniq*` recursively. This used to work correctly but was broken in the new analyzer. [#65140](https://github.com/ClickHouse/ClickHouse/pull/65140) ([Duc Canh Le](https://github.com/canhld94)).
* Fix unexpected projection name when query with CTE. [#65267](https://github.com/ClickHouse/ClickHouse/pull/65267) ([wudidapaopao](https://github.com/wudidapaopao)).
* Require `dictGet` privilege when accessing dictionaries via direct query or the `Dictionary` table engine. [#65359](https://github.com/ClickHouse/ClickHouse/pull/65359) ([Joe Lynch](https://github.com/joelynch)).
* Fix user-specific S3 auth with incremental backups. [#65481](https://github.com/ClickHouse/ClickHouse/pull/65481) ([Antonio Andelic](https://github.com/antonio2368)).
* Disable `non-intersecting-parts` optimization for queries with `FINAL` in case of `read-in-order` optimization was enabled. This could lead to an incorrect query result. As a workaround, disable `do_not_merge_across_partitions_select_final` and `split_parts_ranges_into_intersecting_and_non_intersecting_final` before this fix is merged. [#65505](https://github.com/ClickHouse/ClickHouse/pull/65505) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix getting exception `Index out of bound for blob metadata` in case all files from list batch were filtered out. [#65523](https://github.com/ClickHouse/ClickHouse/pull/65523) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix NOT_FOUND_COLUMN_IN_BLOCK for deduplicate merge of projection. [#65573](https://github.com/ClickHouse/ClickHouse/pull/65573) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)).
* Fixed a bug that compatibility level '23.4' was not properly applied. [#65737](https://github.com/ClickHouse/ClickHouse/pull/65737) ([cw5121](https://github.com/cw5121)).
* Fix odbc table with nullable fields. [#65738](https://github.com/ClickHouse/ClickHouse/pull/65738) ([Rodolphe Dugé de Bernonville](https://github.com/RodolpheDuge)).
* Fix data race in `TCPHandler`, which could happen on fatal error. [#65744](https://github.com/ClickHouse/ClickHouse/pull/65744) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix invalid exceptions in function `parseDateTime` with `%F` and `%D` placeholders. [#65768](https://github.com/ClickHouse/ClickHouse/pull/65768) ([Antonio Andelic](https://github.com/antonio2368)).
* For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)).
* Fix a bug leads to EmbeddedRocksDB with TTL write corrupted SST files. [#65816](https://github.com/ClickHouse/ClickHouse/pull/65816) ([Duc Canh Le](https://github.com/canhld94)).
* Functions `bitTest`, `bitTestAll`, and `bitTestAny` now return an error if the specified bit index is out-of-bounds [#65818](https://github.com/ClickHouse/ClickHouse/pull/65818) ([Pablo Marcos](https://github.com/pamarcos)).
* Setting `join_any_take_last_row` is supported in any query with hash join. [#65820](https://github.com/ClickHouse/ClickHouse/pull/65820) ([vdimir](https://github.com/vdimir)).
* Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)).
* Functions `bitShiftLeft` and `bitShitfRight` return an error for out of bounds shift positions [#65838](https://github.com/ClickHouse/ClickHouse/pull/65838) ([Pablo Marcos](https://github.com/pamarcos)).
* Fix growing memory usage in S3Queue. [#65839](https://github.com/ClickHouse/ClickHouse/pull/65839) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix tie handling in `arrayAUC` to match sklearn. [#65840](https://github.com/ClickHouse/ClickHouse/pull/65840) ([gabrielmcg44](https://github.com/gabrielmcg44)).
* Fix possible issues with MySQL server protocol TLS connections. [#65917](https://github.com/ClickHouse/ClickHouse/pull/65917) ([Azat Khuzhin](https://github.com/azat)).
* Fix possible issues with MySQL client protocol TLS connections. [#65938](https://github.com/ClickHouse/ClickHouse/pull/65938) ([Azat Khuzhin](https://github.com/azat)).
* Fix handling of `SSL_ERROR_WANT_READ`/`SSL_ERROR_WANT_WRITE` with zero timeout. [#65941](https://github.com/ClickHouse/ClickHouse/pull/65941) ([Azat Khuzhin](https://github.com/azat)).
* Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)).
* Column _size in s3 engine and s3 table function denotes the size of a file inside the archive, not a size of the archive itself. [#65993](https://github.com/ClickHouse/ClickHouse/pull/65993) ([Daniil Ivanik](https://github.com/divanik)).
* Fix resolving dynamic subcolumns in analyzer, avoid reading the whole column on dynamic subcolumn reading. [#66004](https://github.com/ClickHouse/ClickHouse/pull/66004) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix config merging for from_env with replace overrides. [#66034](https://github.com/ClickHouse/ClickHouse/pull/66034) ([Azat Khuzhin](https://github.com/azat)).
* Fix a possible hanging in `GRPCServer` during shutdown. [#66061](https://github.com/ClickHouse/ClickHouse/pull/66061) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fixed several cases in function `has` with non-constant `LowCardinality` arguments. [#66088](https://github.com/ClickHouse/ClickHouse/pull/66088) ([Anton Popov](https://github.com/CurtizJ)).
* Fix for `groupArrayIntersect`. It had incorrect behavior in the `merge()` function. Also, fixed behavior in `deserialise()` for numeric and general data. [#66103](https://github.com/ClickHouse/ClickHouse/pull/66103) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)).
* Disable the `merge-filters` optimization introduced in [#64760](https://github.com/ClickHouse/ClickHouse/issues/64760). It may cause an exception if optimization merges two filter expressions and does not apply a short-circuit evaluation. [#66126](https://github.com/ClickHouse/ClickHouse/pull/66126) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fixed the issue when the server failed to parse Avro files with negative block size arrays encoded, which is now allowed by the Avro specification. [#66130](https://github.com/ClickHouse/ClickHouse/pull/66130) ([Serge Klochkov](https://github.com/slvrtrn)).
* Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)).
* Fix rare case with missing data in the result of distributed query. [#66174](https://github.com/ClickHouse/ClickHouse/pull/66174) ([vdimir](https://github.com/vdimir)).
* Fix order of parsing metadata fields in StorageDeltaLake. [#66211](https://github.com/ClickHouse/ClickHouse/pull/66211) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Don't throw `TIMEOUT_EXCEEDED` for `none_only_active` mode of `distributed_ddl_output_mode`. [#66218](https://github.com/ClickHouse/ClickHouse/pull/66218) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)).
* Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix error reporting while copying to S3 or AzureBlobStorage. [#66295](https://github.com/ClickHouse/ClickHouse/pull/66295) ([Vitaly Baranov](https://github.com/vitlibar)).
* Prevent watchdog from keeping descriptors of unlinked(rotated) log files. [#66334](https://github.com/ClickHouse/ClickHouse/pull/66334) ([Aleksei Filatov](https://github.com/aalexfvk)).
* Fix the bug that logicalexpressionoptimizerpass lost logical type of constant. [#66344](https://github.com/ClickHouse/ClickHouse/pull/66344) ([pn](https://github.com/chloro-pn)).
* Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix possible incorrect result for queries joining and filtering table external engine (like PostgreSQL), due to too aggressive filter pushdown. Since now, conditions from where section won't be send to external database in case of outer join with external table. [#66402](https://github.com/ClickHouse/ClickHouse/pull/66402) ([vdimir](https://github.com/vdimir)).
* Added missing column materialization for cross join. [#66413](https://github.com/ClickHouse/ClickHouse/pull/66413) ([lgbo](https://github.com/lgbo-ustc)).
* Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Avoid possible logical error during import from Npy format in case of bad array nesting level, fix testing of other kinds of errors. [#66461](https://github.com/ClickHouse/ClickHouse/pull/66461) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix wrong count() result when there is non-deterministic function in predicate. [#66510](https://github.com/ClickHouse/ClickHouse/pull/66510) ([Duc Canh Le](https://github.com/canhld94)).
* Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix reading of uninitialized memory when hashing empty tuples. [#66562](https://github.com/ClickHouse/ClickHouse/pull/66562) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix removing named collections in local storage. [#66599](https://github.com/ClickHouse/ClickHouse/pull/66599) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Fix `column_length` is not updated in `ColumnTuple::insertManyFrom`. [#66626](https://github.com/ClickHouse/ClickHouse/pull/66626) ([lgbo](https://github.com/lgbo-ustc)).
* Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix rare case of stuck merge after drop column. [#66707](https://github.com/ClickHouse/ClickHouse/pull/66707) ([Raúl Marín](https://github.com/Algunenano)).
* Fix assertion `isUniqTypes` when insert select from remote sources. [#66722](https://github.com/ClickHouse/ClickHouse/pull/66722) ([Sema Checherinda](https://github.com/CheSema)).
* Fix logical error in PrometheusRequestHandler. [#66621](https://github.com/ClickHouse/ClickHouse/pull/66621) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix `indexHint` function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)).
* Fix AST formatting of 'create table b empty as a'. [#64951](https://github.com/ClickHouse/ClickHouse/pull/64951) ([Michael Kolupaev](https://github.com/al13n321)).
#### Build/Testing/Packaging Improvement
* Instantiate template methods ahead in different .cpp files, avoid too large translation units during compiling. [#64818](https://github.com/ClickHouse/ClickHouse/pull/64818) ([lgbo](https://github.com/lgbo-ustc)).
### <a id="246"></a> ClickHouse release 24.6, 2024-07-01
#### Backward Incompatible Change

View File

@ -87,10 +87,13 @@
# define ASAN_POISON_MEMORY_REGION(a, b)
#endif
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER) || defined(UNDEFINED_BEHAVIOR_SANITIZER)
#define ABORT_ON_LOGICAL_ERROR
#endif
/// We used to have only ABORT_ON_LOGICAL_ERROR macro, but most of its uses were actually in places where we didn't care about logical errors
/// but wanted to check exactly if the current build type is debug or with sanitizer. This new macro is introduced to fix those places.
#if !defined(DEBUG_OR_SANITIZER_BUILD)
# if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER) \
|| defined(UNDEFINED_BEHAVIOR_SANITIZER)
# define DEBUG_OR_SANITIZER_BUILD
# endif
#endif
/// chassert(x) is similar to assert(x), but:
@ -101,7 +104,7 @@
/// Also it makes sense to call abort() instead of __builtin_unreachable() in debug builds,
/// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy)
#if !defined(chassert)
#if defined(ABORT_ON_LOGICAL_ERROR)
# if defined(DEBUG_OR_SANITIZER_BUILD)
// clang-format off
#include <base/types.h>
namespace DB

View File

@ -2,11 +2,11 @@
# NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54488)
SET(VERSION_REVISION 54489)
SET(VERSION_MAJOR 24)
SET(VERSION_MINOR 7)
SET(VERSION_MINOR 8)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH aa023477a9265e403982fca5ee29a714db5133d9)
SET(VERSION_DESCRIBE v24.7.1.1-testing)
SET(VERSION_STRING 24.7.1.1)
SET(VERSION_GITHASH 3f8b27d7accd2b5ec4afe7d0dd459115323304af)
SET(VERSION_DESCRIBE v24.8.1.1-testing)
SET(VERSION_STRING 24.8.1.1)
# end of autochange

View File

@ -9,6 +9,7 @@ set(DATASKETCHES_LIBRARY theta)
add_library(_datasketches INTERFACE)
target_include_directories(_datasketches SYSTEM BEFORE INTERFACE
"${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/common/include"
"${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/count/include"
"${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/theta/include")
add_library(ch_contrib::datasketches ALIAS _datasketches)

2
contrib/grpc vendored

@ -1 +1 @@
Subproject commit f5b7fdc2dff09ada06dbf6c75df298fb40f898df
Subproject commit 1716359d2e28d304a250f9df0e6c0ccad03de8db

2
contrib/libunwind vendored

@ -1 +1 @@
Subproject commit 8f28e64d15819d2d096badd598c7d85bebddb1f2
Subproject commit a89d904befea07814628c6ce0b44083c4e149c62

View File

@ -1298,7 +1298,6 @@ elseif(ARCH_PPC64LE)
${OPENSSL_SOURCE_DIR}/crypto/camellia/camellia.c
${OPENSSL_SOURCE_DIR}/crypto/camellia/cmll_cbc.c
${OPENSSL_SOURCE_DIR}/crypto/chacha/chacha_enc.c
${OPENSSL_SOURCE_DIR}/crypto/mem_clr.c
${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_enc.c
${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_skey.c
${OPENSSL_SOURCE_DIR}/crypto/sha/keccak1600.c

View File

@ -23,15 +23,17 @@ RUN apt-get update \
# and MEMORY_LIMIT_EXCEEDED exceptions in Functional tests (total memory limit in Functional tests is ~55.24 GiB).
# TSAN will flush shadow memory when reaching this limit.
# It may cause false-negatives, but it's better than OOM.
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment
RUN echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment
RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment
RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment
# max_allocation_size_mb is set to 32GB, so we have much bigger chance to run into memory limit than the limitation of the sanitizers
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1 max_allocation_size_mb=32768'" >> /etc/environment
RUN echo "UBSAN_OPTIONS='print_stacktrace=1 max_allocation_size_mb=32768'" >> /etc/environment
RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1 max_allocation_size_mb=32768'" >> /etc/environment
RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt max_allocation_size_mb=32768'" >> /etc/environment
# Sanitizer options for current shell (not current, but the one that will be spawned on "docker run")
# (but w/o verbosity for TSAN, otherwise test.reference will not match)
ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'
ENV UBSAN_OPTIONS='print_stacktrace=1'
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'
ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1 max_allocation_size_mb=32768'
ENV UBSAN_OPTIONS='print_stacktrace=1 max_allocation_size_mb=32768'
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1 max_allocation_size_mb=32768'
ENV LSAN_OPTIONS='max_allocation_size_mb=32768'
# for external_symbolizer_path
RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer

View File

@ -26,7 +26,10 @@ RUN apt-get update \
zstd \
--yes --no-install-recommends \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* \
&& groupadd --system --gid 1000 clickhouse \
&& useradd --system --gid 1000 --uid 1000 -m clickhouse
# ^ For some reason, groupadd and useradd are needed for tests with 'expect', but I don't know, why.
COPY requirements.txt /
RUN pip3 install --no-cache-dir -r /requirements.txt

View File

@ -9,7 +9,7 @@ trap 'kill $(jobs -pr) ||:' EXIT
stage=${stage:-}
# Compiler version, normally set by Dockerfile
export LLVM_VERSION=${LLVM_VERSION:-17}
export LLVM_VERSION=${LLVM_VERSION:-18}
# A variable to pass additional flags to CMake.
# Here we explicitly default it to nothing so that bash doesn't complain about
@ -261,9 +261,12 @@ function timeout_with_logging() {
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
echo "Checking if it is a timeout. The code 124 will indicate a timeout."
if [[ "${exit_code}" -eq "124" ]]
then
echo "The command 'timeout ${*}' has been killed by timeout"
echo "The command 'timeout ${*}' has been killed by timeout."
else
echo "No, it isn't a timeout."
fi
return $exit_code

View File

@ -20,7 +20,7 @@
</max_execution_time>
<max_memory_usage>
<max>10G</max>
<max>5G</max>
</max_memory_usage>
<table_function_remote_max_addresses>

View File

@ -208,7 +208,6 @@ handle SIGPIPE nostop noprint pass
handle SIGTERM nostop noprint pass
handle SIGUSR1 nostop noprint pass
handle SIGUSR2 nostop noprint pass
handle SIGSEGV nostop pass
handle SIG$RTMIN nostop noprint pass
info signals
continue

View File

@ -6,7 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update --yes \
&& env DEBIAN_FRONTEND=noninteractive apt-get install wget git default-jdk maven python3 --yes --no-install-recommends \
&& env DEBIAN_FRONTEND=noninteractive apt-get install wget git python3 default-jdk maven --yes --no-install-recommends \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

View File

@ -4,6 +4,9 @@
source /setup_export_logs.sh
set -e -x
MAX_RUN_TIME=${MAX_RUN_TIME:-3600}
MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 3600 : MAX_RUN_TIME))
# Choose random timezone for this test run
TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)"
echo "Choosen random timezone $TZ"
@ -188,8 +191,8 @@ else
ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC"
clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC"
else
@ -197,7 +200,7 @@ else
clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
fi
clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
fi
clickhouse-client --query "SHOW TABLES FROM test"
@ -242,7 +245,25 @@ function run_tests()
}
export -f run_tests
timeout "$MAX_RUN_TIME" bash -c run_tests ||:
function timeout_with_logging() {
local exit_code=0
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
echo "Checking if it is a timeout. The code 124 will indicate a timeout."
if [[ "${exit_code}" -eq "124" ]]
then
echo "The command 'timeout ${*}' has been killed by timeout."
else
echo "No, it isn't a timeout."
fi
return $exit_code
}
TIMEOUT=$((MAX_RUN_TIME - 700))
timeout_with_logging "$TIMEOUT" bash -c run_tests ||:
echo "Files in current directory"
ls -la ./

View File

@ -20,7 +20,6 @@ handle SIGPIPE nostop noprint pass
handle SIGTERM nostop noprint pass
handle SIGUSR1 nostop noprint pass
handle SIGUSR2 nostop noprint pass
handle SIGSEGV nostop pass
handle SIG$RTMIN nostop noprint pass
info signals
continue

View File

@ -6,18 +6,12 @@ source /setup_export_logs.sh
# fail on errors, verbose and export all env variables
set -e -x -a
MAX_RUN_TIME=${MAX_RUN_TIME:-7200}
MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 7200 : MAX_RUN_TIME))
MAX_RUN_TIME=${MAX_RUN_TIME:-9000}
MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 9000 : MAX_RUN_TIME))
USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0}
USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0}
RUN_SEQUENTIAL_TESTS_IN_PARALLEL=0
if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] || [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
RUN_SEQUENTIAL_TESTS_IN_PARALLEL=0
fi
# Choose random timezone for this test run.
#
# NOTE: that clickhouse-test will randomize session_timezone by itself as well
@ -101,53 +95,6 @@ if [ "$NUM_TRIES" -gt "1" ]; then
mkdir -p /var/run/clickhouse-server
fi
# Run a CH instance to execute sequential tests on it in parallel with all other tests.
if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then
mkdir -p /var/run/clickhouse-server3 /etc/clickhouse-server3 /var/lib/clickhouse3
cp -r -L /etc/clickhouse-server/* /etc/clickhouse-server3/
sudo chown clickhouse:clickhouse /var/run/clickhouse-server3 /var/lib/clickhouse3 /etc/clickhouse-server3/
sudo chown -R clickhouse:clickhouse /etc/clickhouse-server3/*
function replace(){
sudo find /etc/clickhouse-server3/ -type f -name '*.xml' -exec sed -i "$1" {} \;
}
replace "s|<port>9000</port>|<port>19000</port>|g"
replace "s|<port>9440</port>|<port>19440</port>|g"
replace "s|<port>9988</port>|<port>19988</port>|g"
replace "s|<port>9234</port>|<port>19234</port>|g"
replace "s|<port>9181</port>|<port>19181</port>|g"
replace "s|<https_port>8443</https_port>|<https_port>18443</https_port>|g"
replace "s|<tcp_port>9000</tcp_port>|<tcp_port>19000</tcp_port>|g"
replace "s|<tcp_port>9181</tcp_port>|<tcp_port>19181</tcp_port>|g"
replace "s|<tcp_port_secure>9440</tcp_port_secure>|<tcp_port_secure>19440</tcp_port_secure>|g"
replace "s|<tcp_with_proxy_port>9010</tcp_with_proxy_port>|<tcp_with_proxy_port>19010</tcp_with_proxy_port>|g"
replace "s|<mysql_port>9004</mysql_port>|<mysql_port>19004</mysql_port>|g"
replace "s|<postgresql_port>9005</postgresql_port>|<postgresql_port>19005</postgresql_port>|g"
replace "s|<interserver_http_port>9009</interserver_http_port>|<interserver_http_port>19009</interserver_http_port>|g"
replace "s|8123|18123|g"
replace "s|/var/lib/clickhouse/|/var/lib/clickhouse3/|g"
replace "s|/etc/clickhouse-server/|/etc/clickhouse-server3/|g"
# distributed cache
replace "s|<tcp_port>10001</tcp_port>|<tcp_port>10003</tcp_port>|g"
replace "s|<tcp_port>10002</tcp_port>|<tcp_port>10004</tcp_port>|g"
sudo -E -u clickhouse /usr/bin/clickhouse server --daemon --config /etc/clickhouse-server3/config.xml \
--pid-file /var/run/clickhouse-server3/clickhouse-server.pid \
-- --path /var/lib/clickhouse3/ --logger.stderr /var/log/clickhouse-server/stderr3.log \
--logger.log /var/log/clickhouse-server/clickhouse-server3.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server3.err.log \
--tcp_port 19000 --tcp_port_secure 19440 --http_port 18123 --https_port 18443 --interserver_http_port 19009 --tcp_with_proxy_port 19010 \
--prometheus.port 19988 --keeper_server.raft_configuration.server.port 19234 --keeper_server.tcp_port 19181 \
--mysql_port 19004 --postgresql_port 19005
for _ in {1..100}
do
clickhouse-client --port 19000 --query "SELECT 1" && break
sleep 1
done
fi
# simplest way to forward env variables to server
sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon --pid-file /var/run/clickhouse-server/clickhouse-server.pid
@ -183,9 +130,6 @@ if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
--keeper_server.tcp_port 29181 --keeper_server.server_id 3 \
--prometheus.port 29988 \
--macros.shard s2 # It doesn't work :(
MAX_RUN_TIME=$((MAX_RUN_TIME < 9000 ? MAX_RUN_TIME : 9000)) # min(MAX_RUN_TIME, 2.5 hours)
MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited)
fi
if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
@ -210,9 +154,6 @@ if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
--keeper_server.tcp_port 19181 --keeper_server.server_id 2 \
--prometheus.port 19988 \
--macros.replica r2 # It doesn't work :(
MAX_RUN_TIME=$((MAX_RUN_TIME < 9000 ? MAX_RUN_TIME : 9000)) # min(MAX_RUN_TIME, 2.5 hours)
MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited)
fi
# Wait for the server to start, but not for too long.
@ -223,7 +164,6 @@ do
done
setup_logs_replication
attach_gdb_to_clickhouse || true # FIXME: to not break old builds, clean on 2023-09-01
function fn_exists() {
@ -284,11 +224,7 @@ function run_tests()
else
# All other configurations are OK.
ADDITIONAL_OPTIONS+=('--jobs')
ADDITIONAL_OPTIONS+=('5')
fi
if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then
ADDITIONAL_OPTIONS+=('--run-sequential-tests-in-parallel')
ADDITIONAL_OPTIONS+=('8')
fi
if [[ -n "$RUN_BY_HASH_NUM" ]] && [[ -n "$RUN_BY_HASH_TOTAL" ]]; then
@ -311,12 +247,22 @@ function run_tests()
try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')"
TIMEOUT=$((MAX_RUN_TIME - 800 > 8400 ? 8400 : MAX_RUN_TIME - 800))
START_TIME=${SECONDS}
set +e
timeout -k 60m -s TERM --preserve-status 140m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
timeout --preserve-status --signal TERM --kill-after 60m ${TIMEOUT}s \
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
--no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt
set -e
DURATION=$((START_TIME - SECONDS))
echo "Elapsed ${DURATION} seconds."
if [[ $DURATION -ge $TIMEOUT ]]
then
echo "It looks like the command is terminated by the timeout, which is ${TIMEOUT} seconds."
fi
}
export -f run_tests
@ -328,7 +274,7 @@ if [ "$NUM_TRIES" -gt "1" ]; then
# We don't run tests with Ordinary database in PRs, only in master.
# So run new/changed tests with Ordinary at least once in flaky check.
timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \
| sed 's/All tests have finished//' | sed 's/No tests were run//' ||:
| sed 's/All tests have finished/Redacted: a message about tests finish is deleted/' | sed 's/No tests were run/Redacted: a message about no tests run is deleted/' ||:
fi
timeout_with_logging "$TIMEOUT" bash -c run_tests ||:
@ -373,9 +319,6 @@ done
# Because it's the simplest way to read it when server has crashed.
sudo clickhouse stop ||:
if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then
sudo clickhouse stop --pid-path /var/run/clickhouse-server3 ||:
fi
if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
sudo clickhouse stop --pid-path /var/run/clickhouse-server1 ||:
@ -393,12 +336,6 @@ rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server.log ||:
rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||:
zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst &
if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then
rg -Fa "<Fatal>" /var/log/clickhouse-server3/clickhouse-server.log ||:
rg -A50 -Fa "============" /var/log/clickhouse-server3/stderr.log ||:
zstd --threads=0 < /var/log/clickhouse-server3/clickhouse-server.log > /test_output/clickhouse-server3.log.zst &
fi
data_path_config="--path=/var/lib/clickhouse/"
if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then
# We need s3 storage configuration (but it's more likely that clickhouse-local will fail for some reason)
@ -419,10 +356,6 @@ if [ $failed_to_save_logs -ne 0 ]; then
do
clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then
clickhouse-local --path /var/lib/clickhouse3/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.3.tsv.zst ||:
fi
if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||:
@ -464,12 +397,6 @@ rm -rf /var/lib/clickhouse/data/system/*/
tar -chf /test_output/store.tar /var/lib/clickhouse/store ||:
tar -chf /test_output/metadata.tar /var/lib/clickhouse/metadata/*.sql ||:
if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then
rm -rf /var/lib/clickhouse3/data/system/*/
tar -chf /test_output/store.tar /var/lib/clickhouse3/store ||:
tar -chf /test_output/metadata.tar /var/lib/clickhouse3/metadata/*.sql ||:
fi
if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server1.log ||:

View File

@ -45,9 +45,12 @@ function timeout_with_logging() {
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
echo "Checking if it is a timeout. The code 124 will indicate a timeout."
if [[ "${exit_code}" -eq "124" ]]
then
echo "The command 'timeout ${*}' has been killed by timeout"
echo "The command 'timeout ${*}' has been killed by timeout."
else
echo "No, it isn't a timeout."
fi
return $exit_code

View File

@ -209,9 +209,9 @@ clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDat
ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'"
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC"
clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC"

View File

@ -17,6 +17,7 @@ ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=18
RUN apt-get update \
&& apt-get install \
sudo \
apt-transport-https \
apt-utils \
ca-certificates \

View File

@ -12,6 +12,7 @@ UNKNOWN_SIGN = "[ UNKNOWN "
SKIPPED_SIGN = "[ SKIPPED "
HUNG_SIGN = "Found hung queries in processlist"
SERVER_DIED_SIGN = "Server died, terminating all processes"
SERVER_DIED_SIGN2 = "Server does not respond to health check"
DATABASE_SIGN = "Database: "
SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"]
@ -43,7 +44,7 @@ def process_test_log(log_path, broken_tests):
if HUNG_SIGN in line:
hung = True
break
if SERVER_DIED_SIGN in line:
if SERVER_DIED_SIGN in line or SERVER_DIED_SIGN2 in line:
server_died = True
if RETRIES_SIGN in line:
retries = True
@ -111,12 +112,12 @@ def process_test_log(log_path, broken_tests):
# Python does not support TSV, so we have to escape '\t' and '\n' manually
# and hope that complex escape sequences will not break anything
test_results = [
(
[
test[0],
test[1],
test[2],
"".join(test[3])[:4096].replace("\t", "\\t").replace("\n", "\\n"),
)
]
for test in test_results
]
@ -170,18 +171,23 @@ def process_result(result_path, broken_tests):
if hung:
description = "Some queries hung, "
state = "failure"
test_results.append(("Some queries hung", "FAIL", "0", ""))
test_results.append(["Some queries hung", "FAIL", "0", ""])
elif server_died:
description = "Server died, "
state = "failure"
test_results.append(("Server died", "FAIL", "0", ""))
# When ClickHouse server crashes, some tests are still running
# and fail because they cannot connect to server
for result in test_results:
if result[1] == "FAIL":
result[1] = "SERVER_DIED"
test_results.append(["Server died", "FAIL", "0", ""])
elif not success_finish:
description = "Tests are not finished, "
state = "failure"
test_results.append(("Tests are not finished", "FAIL", "0", ""))
test_results.append(["Tests are not finished", "FAIL", "0", ""])
elif retries:
description = "Some tests restarted, "
test_results.append(("Some tests restarted", "SKIPPED", "0", ""))
test_results.append(["Some tests restarted", "SKIPPED", "0", ""])
else:
description = ""
@ -233,11 +239,12 @@ if __name__ == "__main__":
# sort by status then by check name
order = {
"FAIL": 0,
"Timeout": 1,
"NOT_FAILED": 2,
"BROKEN": 3,
"OK": 4,
"SKIPPED": 5,
"SERVER_DIED": 1,
"Timeout": 2,
"NOT_FAILED": 3,
"BROKEN": 4,
"OK": 5,
"SKIPPED": 6,
}
return order.get(item[1], 10), str(item[0]), item[1]

View File

@ -999,6 +999,10 @@ They can be used for prewhere optimization only if we enable `set allow_statisti
[HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains.
- `count_min`
[Count-min](https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch) sketches which provide an approximate count of the frequency of each value in a column.
## Column-level Settings {#column-level-settings}
Certain MergeTree settings can be override at column level:

View File

@ -55,7 +55,7 @@ CMPLNT_FR_TM Nullable(String)
```
:::tip
Most of the time the above command will let you know which fields in the input data are numeric, and which are strings, and which are tuples. This is not always the case. Because ClickHouse is routineley used with datasets containing billions of records there is a default number (100) of rows examined to [infer the schema](/docs/en/integrations/data-ingestion/data-formats/json.md#relying-on-schema-inference) in order to avoid parsing billions of rows to infer the schema. The response below may not match what you see, as the dataset is updated several times each year. Looking at the Data Dictionary you can see that CMPLNT_NUM is specified as text, and not numeric. By overriding the default of 100 rows for inference with the setting `SETTINGS input_format_max_rows_to_read_for_schema_inference=2000`
Most of the time the above command will let you know which fields in the input data are numeric, and which are strings, and which are tuples. This is not always the case. Because ClickHouse is routineley used with datasets containing billions of records there is a default number (100) of rows examined to [infer the schema](/en/integrations/data-formats/json/inference) in order to avoid parsing billions of rows to infer the schema. The response below may not match what you see, as the dataset is updated several times each year. Looking at the Data Dictionary you can see that CMPLNT_NUM is specified as text, and not numeric. By overriding the default of 100 rows for inference with the setting `SETTINGS input_format_max_rows_to_read_for_schema_inference=2000`
you can get a better idea of the content.
Note: as of version 22.5 the default is now 25,000 rows for inferring the schema, so only change the setting if you are on an older version or if you need more than 25,000 rows to be sampled.

View File

@ -185,6 +185,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
- `--format, -f` Use the specified default format to output the result.
- `--vertical, -E` If specified, use the [Vertical format](../interfaces/formats.md#vertical) by default to output the result. This is the same as `format=Vertical`. In this format, each value is printed on a separate line, which is helpful when displaying wide tables.
- `--time, -t` If specified, print the query execution time to stderr in non-interactive mode.
- `--memory-usage` If specified, print memory usage to stderr in non-interactive mode]. Possible values: 'none' - do not print memory usage, 'default' - print number of bytes, 'readable' - print memory usage in human-readable format.
- `--stacktrace` If specified, also print the stack trace if an exception occurs.
- `--config-file` The name of the configuration file.
- `--secure` If specified, will connect to server over secure connection (TLS). You might need to configure your CA certificates in the [configuration file](#configuration_files). The available configuration settings are the same as for [server-side TLS configuration](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl).

View File

@ -67,6 +67,7 @@ The supported formats are:
| [Prometheus](#prometheus) | ✗ | ✔ |
| [Protobuf](#protobuf) | ✔ | ✔ |
| [ProtobufSingle](#protobufsingle) | ✔ | ✔ |
| [ProtobufList](#protobuflist) | ✔ | ✔ |
| [Avro](#data-format-avro) | ✔ | ✔ |
| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ |
| [Parquet](#data-format-parquet) | ✔ | ✔ |
@ -1952,6 +1953,35 @@ SYSTEM DROP FORMAT SCHEMA CACHE FOR Protobuf
Same as [Protobuf](#protobuf) but for storing/parsing single Protobuf message without length delimiters.
## ProtobufList {#protobuflist}
Similar to Protobuf but rows are represented as a sequence of sub-messages contained in a message with fixed name "Envelope".
Usage example:
``` sql
SELECT * FROM test.table FORMAT ProtobufList SETTINGS format_schema = 'schemafile:MessageType'
```
``` bash
cat protobuflist_messages.bin | clickhouse-client --query "INSERT INTO test.table FORMAT ProtobufList SETTINGS format_schema='schemafile:MessageType'"
```
where the file `schemafile.proto` looks like this:
``` capnp
syntax = "proto3";
message Envelope {
message MessageType {
string name = 1;
string surname = 2;
uint32 birthDate = 3;
repeated string phoneNumbers = 4;
};
MessageType row = 1;
};
```
## Avro {#data-format-avro}
[Apache Avro](https://avro.apache.org/) is a row-oriented data serialization framework developed within Apaches Hadoop project.

View File

@ -124,7 +124,7 @@ which is equal to
#### Default values for from_env and from_zk attributes
It's possible to set the default value and substitute it only if the environment variable or zookeeper node is set using `replace="1"`.
It's possible to set the default value and substitute it only if the environment variable or zookeeper node is set using `replace="1"` (must be declared before from_env).
With previous example, but `MAX_QUERY_SIZE` is unset:
@ -132,7 +132,7 @@ With previous example, but `MAX_QUERY_SIZE` is unset:
<clickhouse>
<profiles>
<default>
<max_query_size from_env="MAX_QUERY_SIZE" replace="1">150000</max_query_size>
<max_query_size replace="1" from_env="MAX_QUERY_SIZE">150000</max_query_size>
</default>
</profiles>
</clickhouse>

View File

@ -22,6 +22,21 @@ Structure of the `users` section:
<!-- Or -->
<password_sha256_hex></password_sha256_hex>
<ssh_keys>
<ssh_key>
<type>ssh-ed25519</type>
<base64_key>AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj</base64_key>
</ssh_key>
<ssh_key>
<type>ecdsa-sha2-nistp256</type>
<base64_key>AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBNxeV2uN5UY6CUbCzTA1rXfYimKQA5ivNIqxdax4bcMXz4D0nSk2l5E1TkR5mG8EBWtmExSPbcEPJ8V7lyWWbA8=</base64_key>
</ssh_key>
<ssh_key>
<type>ssh-rsa</type>
<base64_key>AAAAB3NzaC1yc2EAAAADAQABAAABgQCpgqL1SHhPVBOTFlOm0pu+cYBbADzC2jL41sPMawYCJHDyHuq7t+htaVVh2fRgpAPmSEnLEC2d4BEIKMtPK3bfR8plJqVXlLt6Q8t4b1oUlnjb3VPA9P6iGcW7CV1FBkZQEVx8ckOfJ3F+kI5VsrRlEDgiecm/C1VPl0/9M2llW/mPUMaD65cM9nlZgM/hUeBrfxOEqM11gDYxEZm1aRSbZoY4dfdm3vzvpSQ6lrCrkjn3X2aSmaCLcOWJhfBWMovNDB8uiPuw54g3ioZ++qEQMlfxVsqXDGYhXCrsArOVuW/5RbReO79BvXqdssiYShfwo+GhQ0+aLWMIW/jgBkkqx/n7uKLzCMX7b2F+aebRYFh+/QXEj7SnihdVfr9ud6NN3MWzZ1ltfIczlEcFLrLJ1Yq57wW6wXtviWh59WvTWFiPejGjeSjjJyqqB49tKdFVFuBnIU5u/bch2DXVgiAEdQwUrIp1ACoYPq22HFFAYUJrL32y7RxX3PGzuAv3LOc=</base64_key>
</ssh_key>
</ssh_keys>
<access_management>0|1</access_management>
<networks incl="networks" replace="replace">
@ -79,6 +94,24 @@ Password can be specified in plaintext or in SHA256 (hex format).
The first line of the result is the password. The second line is the corresponding double SHA1 hash.
### username/ssh-key {#user-sshkey}
This setting allows authenticating with SSH keys.
Given a SSH key (as generated by `ssh-keygen`) like
```
ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj john@example.com
```
The `ssh_key` element is expected to be
```
<ssh_key>
<type>ssh-ed25519</type>
<base64_key>AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj</base64_key>
</ssh_key>
```
Substitute `ssh-ed25519` with `ssh-rsa` or `ecdsa-sha2-nistp256` for the other supported algorithms.
### access_management {#access_management-user-setting}
This setting enables or disables using of SQL-driven [access control and account management](../../guides/sre/user-management/index.md#access-control) for the user.

View File

@ -0,0 +1,35 @@
---
slug: /en/operations/system-tables/detached_tables
---
# detached_tables
Contains information of each detached table.
Columns:
- `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the table is in.
- `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid (Atomic database).
- `metadata_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table metadata in the file system.
- `is_permanently` ([UInt8](../../sql-reference/data-types/int-uint.md)) - Flag indicates that the table was detached PERMANENTLY.
**Example**
```sql
SELECT * FROM system.detached_tables FORMAT Vertical;
```
```text
Row 1:
──────
database: base
table: t1
uuid: 81b1c20a-b7c6-4116-a2ce-7583fb6b6736
metadata_path: /var/lib/clickhouse/store/461/461cf698-fd0b-406d-8c01-5d8fd5748a91/t1.sql
is_permanently: 1
```

View File

@ -9,7 +9,6 @@ Columns:
- `name` ([String](../../sql-reference/data-types/string.md)) The name of the function.
- `is_aggregate` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Whether the function is an aggregate function.
- `is_deterministic` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md))) - Whether the function is deterministic.
- `case_insensitive`, ([UInt8](../../sql-reference/data-types/int-uint.md)) - Whether the function name can be used case-insensitively.
- `alias_to`, ([String](../../sql-reference/data-types/string.md)) - The original function name, if the function name is an alias.
- `create_query`, ([String](../../sql-reference/data-types/enum.md)) - Unused.

View File

@ -7,7 +7,7 @@ keywords: [object, data type]
# Object Data Type (deprecated)
**This feature is not production-ready and is now deprecated.** If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864).
**This feature is not production-ready and is now deprecated.** If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-formats/json/overview) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864).
<hr />

View File

@ -56,7 +56,6 @@ Functions:
## Related content
- [Reducing ClickHouse Storage Cost with the Low Cardinality Type Lessons from an Instana Engineer](https://altinity.com/blog/2020-5-20-reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer)
- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf)
- Blog: [Optimizing ClickHouse with Schemas and Codecs](https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema)
- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse)
- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf)

View File

@ -76,7 +76,7 @@ WHERE macro = 'test';
└───────┴──────────────┘
```
## FQDN
## fqdn
Returns the fully qualified domain name of the ClickHouse server.
@ -86,7 +86,7 @@ Returns the fully qualified domain name of the ClickHouse server.
fqdn();
```
Aliases: `fullHostName`, 'FQDN'.
Aliases: `fullHostName`, `FQDN`.
**Returned value**
@ -2102,14 +2102,14 @@ Result:
└─────────────────┘
```
## filesystemFree
## filesystemUnreserved
Returns the total amount of the free space on the filesystem hosting the database persistence. See also `filesystemAvailable`
Returns the total amount of the free space on the filesystem hosting the database persistence. (previously `filesystemFree`). See also [`filesystemAvailable`](#filesystemavailable).
**Syntax**
```sql
filesystemFree()
filesystemUnreserved()
```
**Returned value**
@ -2121,7 +2121,7 @@ filesystemFree()
Query:
```sql
SELECT formatReadableSize(filesystemFree()) AS "Free space";
SELECT formatReadableSize(filesystemUnreserved()) AS "Free space";
```
Result:
@ -2449,11 +2449,11 @@ As you can see, `runningAccumulate` merges states for each group of rows separat
## joinGet
The function lets you extract data from the table the same way as from a [dictionary](../../sql-reference/dictionaries/index.md).
Gets the data from [Join](../../engines/table-engines/special/join.md#creating-a-table) tables using the specified join key.
The function lets you extract data from the table the same way as from a [dictionary](../../sql-reference/dictionaries/index.md). Gets the data from [Join](../../engines/table-engines/special/join.md#creating-a-table) tables using the specified join key.
:::note
Only supports tables created with the `ENGINE = Join(ANY, LEFT, <join_keys>)` statement.
:::
**Syntax**
@ -2463,26 +2463,32 @@ joinGet(join_storage_table_name, `value_column`, join_keys)
**Arguments**
- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicating where the search is performed. The identifier is searched in the default database (see setting `default_database` in the config file). To override the default database, use `USE db_name` or specify the database and the table through the separator `db_name.db_table` as in the example.
- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicating where the search is performed.
- `value_column` — name of the column of the table that contains required data.
- `join_keys` — list of keys.
:::note
The identifier is searched for in the default database (see setting `default_database` in the config file). To override the default database, use `USE db_name` or specify the database and the table through the separator `db_name.db_table` as in the example.
:::
**Returned value**
Returns a list of values corresponded to list of keys.
If certain does not exist in source table then `0` or `null` will be returned based on [join_use_nulls](../../operations/settings/settings.md#join_use_nulls) setting.
- Returns a list of values corresponded to the list of keys.
:::note
If a certain key does not exist in source table then `0` or `null` will be returned based on [join_use_nulls](../../operations/settings/settings.md#join_use_nulls) setting during table creation.
More info about `join_use_nulls` in [Join operation](../../engines/table-engines/special/join.md).
:::
**Example**
Input table:
```sql
CREATE DATABASE db_test
CREATE TABLE db_test.id_val(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id) SETTINGS join_use_nulls = 1
INSERT INTO db_test.id_val VALUES (1,11)(2,12)(4,13)
CREATE DATABASE db_test;
CREATE TABLE db_test.id_val(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id);
INSERT INTO db_test.id_val VALUES (1, 11)(2, 12)(4, 13);
SELECT * FROM db_test.id_val;
```
```text
@ -2496,18 +2502,116 @@ INSERT INTO db_test.id_val VALUES (1,11)(2,12)(4,13)
Query:
```sql
SELECT joinGet(db_test.id_val, 'val', toUInt32(number)) from numbers(4) SETTINGS join_use_nulls = 1
SELECT number, joinGet(db_test.id_val, 'val', toUInt32(number)) from numbers(4);
```
Result:
```text
┌─joinGet(db_test.id_val, 'val', toUInt32(number))─┐
│ 0 │
│ 11 │
│ 12 │
│ 0 │
└──────────────────────────────────────────────────┘
┌─number─┬─joinGet('db_test.id_val', 'val', toUInt32(number))─┐
1. │ 0 │ 0 │
2. │ 1 │ 11 │
3. │ 2 │ 12 │
4. │ 3 │ 0 │
└────────┴────────────────────────────────────────────────────┘
```
Setting `join_use_nulls` can be used during table creation to change the behaviour of what gets returned if no key exists in the source table.
```sql
CREATE DATABASE db_test;
CREATE TABLE db_test.id_val_nulls(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id) SETTINGS join_use_nulls=1;
INSERT INTO db_test.id_val_nulls VALUES (1, 11)(2, 12)(4, 13);
SELECT * FROM db_test.id_val_nulls;
```
```text
┌─id─┬─val─┐
│ 4 │ 13 │
│ 2 │ 12 │
│ 1 │ 11 │
└────┴─────┘
```
Query:
```sql
SELECT number, joinGet(db_test.id_val_nulls, 'val', toUInt32(number)) from numbers(4);
```
Result:
```text
┌─number─┬─joinGet('db_test.id_val_nulls', 'val', toUInt32(number))─┐
1. │ 0 │ ᴺᵁᴸᴸ │
2. │ 1 │ 11 │
3. │ 2 │ 12 │
4. │ 3 │ ᴺᵁᴸᴸ │
└────────┴──────────────────────────────────────────────────────────┘
```
## joinGetOrNull
Like [joinGet](#joinget) but returns `NULL` when the key is missing instead of returning the default value.
**Syntax**
```sql
joinGetOrNull(join_storage_table_name, `value_column`, join_keys)
```
**Arguments**
- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicating where the search is performed.
- `value_column` — name of the column of the table that contains required data.
- `join_keys` — list of keys.
:::note
The identifier is searched for in the default database (see setting `default_database` in the config file). To override the default database, use `USE db_name` or specify the database and the table through the separator `db_name.db_table` as in the example.
:::
**Returned value**
- Returns a list of values corresponded to the list of keys.
:::note
If a certain key does not exist in source table then `NULL` is returned for that key.
:::
**Example**
Input table:
```sql
CREATE DATABASE db_test;
CREATE TABLE db_test.id_val(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id);
INSERT INTO db_test.id_val VALUES (1, 11)(2, 12)(4, 13);
SELECT * FROM db_test.id_val;
```
```text
┌─id─┬─val─┐
│ 4 │ 13 │
│ 2 │ 12 │
│ 1 │ 11 │
└────┴─────┘
```
Query:
```sql
SELECT number, joinGetOrNull(db_test.id_val, 'val', toUInt32(number)) from numbers(4);
```
Result:
```text
┌─number─┬─joinGetOrNull('db_test.id_val', 'val', toUInt32(number))─┐
1. │ 0 │ ᴺᵁᴸᴸ │
2. │ 1 │ 11 │
3. │ 2 │ 12 │
4. │ 3 │ ᴺᵁᴸᴸ │
└────────┴──────────────────────────────────────────────────────────┘
```
## catboostEvaluate

View File

@ -21,7 +21,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [COMMENT 'comment for column'] [compression_codec] [TTL expr2],
...
) ENGINE = engine
COMMENT 'comment for table'
[COMMENT 'comment for table']
```
Creates a table named `table_name` in the `db` database or the current database if `db` is not set, with the structure specified in brackets and the `engine` engine.
@ -626,11 +626,6 @@ SELECT * FROM base.t1;
You can add a comment to the table when you creating it.
:::note
The comment clause is supported by all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md).
:::
**Syntax**
``` sql

View File

@ -16,6 +16,7 @@ Syntax:
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name]
[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }]
AS SELECT ...
[COMMENT 'comment']
```
Normal views do not store any data. They just perform a read from another table on each access. In other words, a normal view is nothing more than a saved query. When reading from a view, this saved query is used as a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause.
@ -57,6 +58,7 @@ SELECT * FROM view(column1=value1, column2=value2 ...)
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE]
[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }]
AS SELECT ...
[COMMENT 'comment']
```
:::tip
@ -161,6 +163,7 @@ RANDOMIZE FOR interval
DEPENDS ON [db.]name [, [db.]name [, ...]]
[TO[db.]name] [(columns)] [ENGINE = engine] [EMPTY]
AS SELECT ...
[COMMENT 'comment']
```
where `interval` is a sequence of simple intervals:
```sql
@ -267,7 +270,10 @@ This is an experimental feature that may change in backwards-incompatible ways i
:::
``` sql
CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [INNER ENGINE engine] [ENGINE engine] [WATERMARK strategy] [ALLOWED_LATENESS interval_function] [POPULATE] AS SELECT ... GROUP BY time_window_function
CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [INNER ENGINE engine] [ENGINE engine] [WATERMARK strategy] [ALLOWED_LATENESS interval_function] [POPULATE]
AS SELECT ...
GROUP BY time_window_function
[COMMENT 'comment']
```
Window view can aggregate data by time window and output the results when the window is ready to fire. It stores the partial aggregation results in an inner(or specified) table to reduce latency and can push the processing result to a specified table or push notifications using the WATCH query.

View File

@ -297,7 +297,7 @@ Algorithm requires the special column in tables. This column:
- Must contain an ordered sequence.
- Can be one of the following types: [Int, UInt](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal](../../../sql-reference/data-types/decimal.md).
- Cant be the only column in the `JOIN` clause.
- For `hash` join algorithm it cant be the only column in the `JOIN` clause.
Syntax `ASOF JOIN ... ON`:
@ -337,7 +337,8 @@ For example, consider the following tables:
`ASOF JOIN` can take the timestamp of a user event from `table_1` and find an event in `table_2` where the timestamp is closest to the timestamp of the event from `table_1` corresponding to the closest match condition. Equal timestamp values are the closest if available. Here, the `user_id` column can be used for joining on equality and the `ev_time` column can be used for joining on the closest match. In our example, `event_1_1` can be joined with `event_2_1` and `event_1_2` can be joined with `event_2_3`, but `event_2_2` cant be joined.
:::note
`ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine.
`ASOF JOIN` is supported only by `hash` and `full_sorting_merge` join algorithms.
It's **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine.
:::
## PASTE JOIN Usage

View File

@ -18,10 +18,21 @@ Reloads all dictionaries that have been successfully loaded before.
By default, dictionaries are loaded lazily (see [dictionaries_lazy_load](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load)), so instead of being loaded automatically at startup, they are initialized on first access through dictGet function or SELECT from tables with ENGINE = Dictionary. The `SYSTEM RELOAD DICTIONARIES` query reloads such dictionaries (LOADED).
Always returns `Ok.` regardless of the result of the dictionary update.
**Syntax**
```sql
SYSTEM RELOAD DICTIONARIES [ON CLUSTER cluster_name]
```
## RELOAD DICTIONARY
Completely reloads a dictionary `dictionary_name`, regardless of the state of the dictionary (LOADED / NOT_LOADED / FAILED).
Always returns `Ok.` regardless of the result of updating the dictionary.
``` sql
SYSTEM RELOAD DICTIONARY [ON CLUSTER cluster_name] dictionary_name
```
The status of the dictionary can be checked by querying the `system.dictionaries` table.
``` sql

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/azureBlobStorageCluster
sidebar_position: 55
sidebar_position: 15
sidebar_label: azureBlobStorageCluster
title: "azureBlobStorageCluster Table Function"
---

View File

@ -1,36 +0,0 @@
---
slug: /en/sql-reference/table-functions/fuzzQuery
sidebar_position: 75
sidebar_label: fuzzQuery
---
# fuzzQuery
Perturbs the given query string with random variations.
``` sql
fuzzQuery(query[, max_query_length[, random_seed]])
```
**Arguments**
- `query` (String) - The source query to perform the fuzzing on.
- `max_query_length` (UInt64) - A maximum length the query can get during the fuzzing process.
- `random_seed` (UInt64) - A random seed for producing stable results.
**Returned Value**
A table object with a single column containing perturbed query strings.
## Usage Example
``` sql
SELECT * FROM fuzzQuery('SELECT materialize(\'a\' AS key) GROUP BY key') LIMIT 2;
```
```
┌─query──────────────────────────────────────────────────────────┐
1. │ SELECT 'a' AS key GROUP BY key │
2. │ EXPLAIN PIPELINE compact = true SELECT 'a' AS key GROUP BY key │
└────────────────────────────────────────────────────────────────┘
```

View File

@ -6,38 +6,38 @@ sidebar_label: Playground
# ClickHouse Playground {#clickhouse-playground}
[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
Several example datasets are available in Playground.
[ClickHouse Playground](https://play.clickhouse.com/play?user=play) позволяет пользователям экспериментировать с ClickHouse, выполняя запросы мгновенно, без необходимости настройки сервера или кластера.
В Playground доступны несколько примеров наборов данных.
You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md).
Вы можете выполнять запросы к Playground, используя любой HTTP-клиент, например [curl](https://curl.haxx.se) или [wget](https://www.gnu.org/software/wget/), или настроить соединение, используя драйверы [JDBC](../interfaces/jdbc.md) или [ODBC](../interfaces/odbc.md). Дополнительную информацию о программных продуктах, поддерживающих ClickHouse, можно найти [здесь](../interfaces/index.md).
## Credentials {#credentials}
## Учетные данные {#credentials}
| Parameter | Value |
| Параметр | Значение |
|:--------------------|:-----------------------------------|
| HTTPS endpoint | `https://play.clickhouse.com:443/` |
| Native TCP endpoint | `play.clickhouse.com:9440` |
| User | `explorer` or `play` |
| Password | (empty) |
| HTTPS-адрес | `https://play.clickhouse.com:443/` |
| TCP-адрес | `play.clickhouse.com:9440` |
| Пользователь | `explorer` или `play` |
| Пароль | (пусто) |
## Limitations {#limitations}
## Ограничения {#limitations}
The queries are executed as a read-only user. It implies some limitations:
Запросы выполняются от имени пользователя с правами только на чтение. Это предполагает некоторые ограничения:
- DDL queries are not allowed
- INSERT queries are not allowed
- DDL-запросы не разрешены
- INSERT-запросы не разрешены
The service also have quotas on its usage.
Сервис также имеет квоты на использование.
## Examples {#examples}
## Примеры {#examples}
HTTPS endpoint example with `curl`:
Пример использования HTTPS-адреса с `curl`:
``` bash
```bash
curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'"
```
TCP endpoint example with [CLI](../interfaces/cli.md):
Пример использования TCP-адреса с [CLI](../interfaces/cli.md):
``` bash
clickhouse client --secure --host play.clickhouse.com --user explorer

View File

@ -356,7 +356,7 @@ sidebar_label: "\u53D8\u66F4\u65E5\u5FD7"
#### 新功能 {#new-feature-1}
- 添加 `deduplicate_blocks_in_dependent_materialized_views` 用于控制具有实例化视图的表中幂等插入的行为的选项。 这个新功能是由Altinity的特殊要求添加到错误修正版本中的。
- 添加 `deduplicate_blocks_in_dependent_materialized_views` 用于控制具有实例化视图的表中幂等插入的行为的选项。
[#9070](https://github.com/ClickHouse/ClickHouse/pull/9070) [(urykhy)](https://github.com/urykhy)
### ClickHouse版本v20.1.2.4,2020-01-22 {#clickhouse-release-v20-1-2-4-2020-01-22}

View File

@ -209,8 +209,8 @@ std::vector<String> Client::loadWarningMessages()
{} /* query_parameters */,
"" /* query_id */,
QueryProcessingStage::Complete,
&global_context->getSettingsRef(),
&global_context->getClientInfo(), false, {});
&client_context->getSettingsRef(),
&client_context->getClientInfo(), false, {});
while (true)
{
Packet packet = connection->receivePacket();
@ -306,9 +306,6 @@ void Client::initialize(Poco::Util::Application & self)
if (env_password && !config().has("password"))
config().setString("password", env_password);
// global_context->setApplicationType(Context::ApplicationType::CLIENT);
global_context->setQueryParameters(query_parameters);
/// settings and limits could be specified in config file, but passed settings has higher priority
for (const auto & setting : global_context->getSettingsRef().allUnchanged())
{
@ -382,7 +379,7 @@ try
showWarnings();
/// Set user password complexity rules
auto & access_control = global_context->getAccessControl();
auto & access_control = client_context->getAccessControl();
access_control.setPasswordComplexityRules(connection->getPasswordComplexityRules());
if (is_interactive && !delayed_interactive)
@ -459,7 +456,7 @@ void Client::connect()
<< connection_parameters.host << ":" << connection_parameters.port
<< (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl;
connection = Connection::createConnection(connection_parameters, global_context);
connection = Connection::createConnection(connection_parameters, client_context);
if (max_client_network_bandwidth)
{
@ -528,7 +525,7 @@ void Client::connect()
}
}
if (!global_context->getSettingsRef().use_client_time_zone)
if (!client_context->getSettingsRef().use_client_time_zone)
{
const auto & time_zone = connection->getServerTimezone(connection_parameters.timeouts);
if (!time_zone.empty())
@ -611,7 +608,7 @@ void Client::printChangedSettings() const
}
};
print_changes(global_context->getSettingsRef().changes(), "settings");
print_changes(client_context->getSettingsRef().changes(), "settings");
print_changes(cmd_merge_tree_settings.changes(), "MergeTree settings");
}
@ -709,7 +706,7 @@ bool Client::processWithFuzzing(const String & full_query)
{
const char * begin = full_query.data();
orig_ast = parseQuery(begin, begin + full_query.size(),
global_context->getSettingsRef(),
client_context->getSettingsRef(),
/*allow_multi_statements=*/ true);
}
catch (const Exception & e)
@ -733,7 +730,7 @@ bool Client::processWithFuzzing(const String & full_query)
}
// Kusto is not a subject for fuzzing (yet)
if (global_context->getSettingsRef().dialect == DB::Dialect::kusto)
if (client_context->getSettingsRef().dialect == DB::Dialect::kusto)
{
return true;
}
@ -1166,6 +1163,11 @@ void Client::processOptions(const OptionsDescription & options_description,
if (options.count("opentelemetry-tracestate"))
global_context->getClientTraceContext().tracestate = options["opentelemetry-tracestate"].as<std::string>();
/// In case of clickhouse-client the `client_context` can be just an alias for the `global_context`.
/// (There is no need to copy the context because clickhouse-client has no background tasks so it won't use that context in parallel.)
client_context = global_context;
initClientContext();
}
@ -1205,11 +1207,6 @@ void Client::processConfig()
pager = config().getString("pager", "");
setDefaultFormatsAndCompressionFromConfiguration();
global_context->setClientName(std::string(DEFAULT_CLIENT_NAME));
global_context->setQueryKindInitial();
global_context->setQuotaClientKey(config().getString("quota_key", ""));
global_context->setQueryKind(query_kind);
}

View File

@ -9,17 +9,13 @@ namespace DB
class Client : public ClientBase
{
public:
Client()
{
fuzzer = QueryFuzzer(randomSeed(), &std::cout, &std::cerr);
}
Client() = default;
void initialize(Poco::Util::Application & self) override;
int main(const std::vector<String> & /*args*/) override;
protected:
Poco::Util::LayeredConfiguration & getClientConfiguration() override;
bool processWithFuzzing(const String & full_query) override;

View File

@ -45,16 +45,17 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv)
keeper_context->setDigestEnabled(true);
keeper_context->setSnapshotDisk(std::make_shared<DiskLocal>("Keeper-snapshots", options["output-dir"].as<std::string>()));
DB::KeeperStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false);
/// TODO(hanfei): support rocksdb here
DB::KeeperMemoryStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false);
DB::deserializeKeeperStorageFromSnapshotsDir(storage, options["zookeeper-snapshots-dir"].as<std::string>(), logger);
storage.initializeSystemNodes();
DB::deserializeLogsAndApplyToStorage(storage, options["zookeeper-logs-dir"].as<std::string>(), logger);
DB::SnapshotMetadataPtr snapshot_meta = std::make_shared<DB::SnapshotMetadata>(storage.getZXID(), 1, std::make_shared<nuraft::cluster_config>());
DB::KeeperStorageSnapshot snapshot(&storage, snapshot_meta);
DB::KeeperStorageSnapshot<DB::KeeperMemoryStorage> snapshot(&storage, snapshot_meta);
DB::KeeperSnapshotManager manager(1, keeper_context);
DB::KeeperSnapshotManager<DB::KeeperMemoryStorage> manager(1, keeper_context);
auto snp = manager.serializeSnapshotToBuffer(snapshot);
auto file_info = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID());
std::cout << "Snapshot serialized to path:" << fs::path(file_info->disk->getPath()) / file_info->path << std::endl;

View File

@ -52,6 +52,10 @@
# include <Server/CertificateReloader.h>
#endif
#if USE_GWP_ASAN
# include <Common/GWPAsan.h>
#endif
#include <Server/ProtocolServerAdapter.h>
#include <Server/KeeperTCPHandlerFactory.h>
@ -639,6 +643,10 @@ try
tryLogCurrentException(log, "Disabling cgroup memory observer because of an error during initialization");
}
#if USE_GWP_ASAN
GWPAsan::initFinished();
#endif
LOG_INFO(log, "Ready for connections.");

View File

@ -295,6 +295,8 @@ void LocalServer::cleanup()
if (suggest)
suggest.reset();
client_context.reset();
if (global_context)
{
global_context->shutdown();
@ -423,6 +425,7 @@ void LocalServer::connect()
{
connection_parameters = ConnectionParameters(getClientConfiguration(), "localhost");
/// This is needed for table function input(...).
ReadBuffer * in;
auto table_file = getClientConfiguration().getString("table-file", "-");
if (table_file == "-" || table_file == "stdin")
@ -435,7 +438,7 @@ void LocalServer::connect()
in = input.get();
}
connection = LocalConnection::createConnection(
connection_parameters, global_context, in, need_render_progress, need_render_profile_events, server_display_name);
connection_parameters, client_context, in, need_render_progress, need_render_profile_events, server_display_name);
}
@ -496,8 +499,6 @@ try
initTTYBuffer(toProgressOption(getClientConfiguration().getString("progress", "default")));
ASTAlterCommand::setFormatAlterCommandsWithParentheses(true);
applyCmdSettings(global_context);
/// try to load user defined executable functions, throw on error and die
try
{
@ -509,6 +510,11 @@ try
throw;
}
/// Must be called after we stopped initializing the global context and changing its settings.
/// After this point the global context must be stayed almost unchanged till shutdown,
/// and all necessary changes must be made to the client context instead.
createClientContext();
if (is_interactive)
{
clearTerminal();
@ -734,6 +740,9 @@ void LocalServer::processConfig()
/// Load global settings from default_profile and system_profile.
global_context->setDefaultProfiles(getClientConfiguration());
/// Command-line parameters can override settings from the default profile.
applyCmdSettings(global_context);
/// We load temporary database first, because projections need it.
DatabaseCatalog::instance().initializeAndLoadTemporaryDatabase();
@ -777,10 +786,6 @@ void LocalServer::processConfig()
server_display_name = getClientConfiguration().getString("display_name", "");
prompt_by_server_display_name = getClientConfiguration().getRawString("prompt_by_server_display_name.default", ":) ");
global_context->setQueryKindInitial();
global_context->setQueryKind(query_kind);
global_context->setQueryParameters(query_parameters);
}
@ -859,6 +864,16 @@ void LocalServer::applyCmdOptions(ContextMutablePtr context)
}
void LocalServer::createClientContext()
{
/// In case of clickhouse-local it's necessary to use a separate context for client-related purposes.
/// We can't just change the global context because it is used in background tasks (for example, in merges)
/// which don't expect that the global context can suddenly change.
client_context = Context::createCopy(global_context);
initClientContext();
}
void LocalServer::processOptions(const OptionsDescription &, const CommandLineOptions & options, const std::vector<Arguments> &, const std::vector<Arguments> &)
{
if (options.count("table"))

View File

@ -31,7 +31,6 @@ public:
int main(const std::vector<String> & /*args*/) override;
protected:
Poco::Util::LayeredConfiguration & getClientConfiguration() override;
void connect() override;
@ -50,7 +49,6 @@ protected:
void processConfig() override;
void readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &) override;
void updateLoggerLevel(const String & logs_level) override;
private:
@ -67,6 +65,8 @@ private:
void applyCmdOptions(ContextMutablePtr context);
void applyCmdSettings(ContextMutablePtr context);
void createClientContext();
ServerSettings server_settings;
std::optional<StatusFile> status;

View File

@ -1,2 +1,2 @@
clickhouse_add_executable (validate-odbc-connection-string validate-odbc-connection-string.cpp ../validateODBCConnectionString.cpp)
target_link_libraries (validate-odbc-connection-string PRIVATE clickhouse_common_io)
target_link_libraries (validate-odbc-connection-string PRIVATE clickhouse_common_io clickhouse_common_config)

View File

@ -2213,6 +2213,7 @@ try
CannotAllocateThreadFaultInjector::setFaultProbability(server_settings.cannot_allocate_thread_fault_injection_probability);
#if USE_GWP_ASAN
GWPAsan::initFinished();
GWPAsan::setForceSampleProbability(server_settings.gwp_asan_force_sample_probability);
#endif
@ -2731,8 +2732,7 @@ void Server::createInterserverServers(
void Server::stopServers(
std::vector<ProtocolServerAdapter> & servers,
const ServerType & server_type
) const
const ServerType & server_type) const
{
LoggerRawPtr log = &logger();

View File

@ -129,8 +129,7 @@ private:
void stopServers(
std::vector<ProtocolServerAdapter> & servers,
const ServerType & server_type
) const;
const ServerType & server_type) const;
};
}

View File

@ -118,10 +118,10 @@ AggregateFunctionPtr createAggregateFunctionAnalysisOfVariance(const std::string
void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .is_order_dependent = false };
factory.registerFunction("analysisOfVariance", {createAggregateFunctionAnalysisOfVariance, properties}, AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("analysisOfVariance", {createAggregateFunctionAnalysisOfVariance, properties}, AggregateFunctionFactory::Case::Insensitive);
/// This is widely used term
factory.registerAlias("anova", "analysisOfVariance", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("anova", "analysisOfVariance", AggregateFunctionFactory::Case::Insensitive);
}
}

View File

@ -361,9 +361,9 @@ void registerAggregateFunctionsAny(AggregateFunctionFactory & factory)
AggregateFunctionProperties default_properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
factory.registerFunction("any", {createAggregateFunctionAny, default_properties});
factory.registerAlias("any_value", "any", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("first_value", "any", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("any_value", "any", AggregateFunctionFactory::Case::Insensitive);
factory.registerAlias("first_value", "any", AggregateFunctionFactory::Case::Insensitive);
factory.registerFunction("anyLast", {createAggregateFunctionAnyLast, default_properties});
factory.registerAlias("last_value", "anyLast", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("last_value", "anyLast", AggregateFunctionFactory::Case::Insensitive);
}
}

View File

@ -221,11 +221,11 @@ void registerAggregateFunctionsAnyRespectNulls(AggregateFunctionFactory & factor
= {.returns_default_when_only_null = false, .is_order_dependent = true, .is_window_function = true};
factory.registerFunction("any_respect_nulls", {createAggregateFunctionAnyRespectNulls, default_properties_for_respect_nulls});
factory.registerAlias("any_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("first_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("any_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::Case::Insensitive);
factory.registerAlias("first_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::Case::Insensitive);
factory.registerFunction("anyLast_respect_nulls", {createAggregateFunctionAnyLastRespectNulls, default_properties_for_respect_nulls});
factory.registerAlias("last_value_respect_nulls", "anyLast_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("last_value_respect_nulls", "anyLast_respect_nulls", AggregateFunctionFactory::Case::Insensitive);
/// Must happen after registering any and anyLast
factory.registerNullsActionTransformation("any", "any_respect_nulls");

View File

@ -46,6 +46,6 @@ AggregateFunctionPtr createAggregateFunctionAvg(const std::string & name, const
void registerAggregateFunctionAvg(AggregateFunctionFactory & factory)
{
factory.registerFunction("avg", createAggregateFunctionAvg, AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("avg", createAggregateFunctionAvg, AggregateFunctionFactory::Case::Insensitive);
}
}

View File

@ -234,9 +234,9 @@ void registerAggregateFunctionsBitwise(AggregateFunctionFactory & factory)
factory.registerFunction("groupBitXor", createAggregateFunctionBitwise<AggregateFunctionGroupBitXorData>);
/// Aliases for compatibility with MySQL.
factory.registerAlias("BIT_OR", "groupBitOr", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("BIT_AND", "groupBitAnd", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("BIT_XOR", "groupBitXor", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("BIT_OR", "groupBitOr", AggregateFunctionFactory::Case::Insensitive);
factory.registerAlias("BIT_AND", "groupBitAnd", AggregateFunctionFactory::Case::Insensitive);
factory.registerAlias("BIT_XOR", "groupBitXor", AggregateFunctionFactory::Case::Insensitive);
}
}

View File

@ -9,7 +9,7 @@ template <typename T1, typename T2> using AggregateFunctionCorr = AggregateFunct
void registerAggregateFunctionsStatisticsCorr(AggregateFunctionFactory & factory)
{
factory.registerFunction("corr", createAggregateFunctionStatisticsBinary<AggregateFunctionCorr, StatisticsFunctionKind::corr>, AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("corr", createAggregateFunctionStatisticsBinary<AggregateFunctionCorr, StatisticsFunctionKind::corr>, AggregateFunctionFactory::Case::Insensitive);
}
}

View File

@ -37,7 +37,7 @@ AggregateFunctionPtr createAggregateFunctionCount(const std::string & name, cons
void registerAggregateFunctionCount(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = false };
factory.registerFunction("count", {createAggregateFunctionCount, properties}, AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("count", {createAggregateFunctionCount, properties}, AggregateFunctionFactory::Case::Insensitive);
}
}

View File

@ -13,8 +13,8 @@ void registerAggregateFunctionsStatisticsCovar(AggregateFunctionFactory & factor
factory.registerFunction("covarPop", createAggregateFunctionStatisticsBinary<AggregateFunctionCovar, StatisticsFunctionKind::covarPop>);
/// Synonyms for compatibility.
factory.registerAlias("COVAR_SAMP", "covarSamp", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("COVAR_POP", "covarPop", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("COVAR_SAMP", "covarSamp", AggregateFunctionFactory::Case::Insensitive);
factory.registerAlias("COVAR_POP", "covarPop", AggregateFunctionFactory::Case::Insensitive);
}
}

View File

@ -29,7 +29,7 @@ const String & getAggregateFunctionCanonicalNameIfAny(const String & name)
return AggregateFunctionFactory::instance().getCanonicalNameIfAny(name);
}
void AggregateFunctionFactory::registerFunction(const String & name, Value creator_with_properties, CaseSensitiveness case_sensitiveness)
void AggregateFunctionFactory::registerFunction(const String & name, Value creator_with_properties, Case case_sensitiveness)
{
if (creator_with_properties.creator == nullptr)
throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionFactory: "
@ -39,7 +39,7 @@ void AggregateFunctionFactory::registerFunction(const String & name, Value creat
throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionFactory: the aggregate function name '{}' is not unique",
name);
if (case_sensitiveness == CaseInsensitive)
if (case_sensitiveness == Case::Insensitive)
{
auto key = Poco::toLower(name);
if (!case_insensitive_aggregate_functions.emplace(key, creator_with_properties).second)

View File

@ -60,7 +60,7 @@ public:
void registerFunction(
const String & name,
Value creator,
CaseSensitiveness case_sensitiveness = CaseSensitive);
Case case_sensitiveness = Case::Sensitive);
/// Register how to transform from one aggregate function to other based on NullsAction
/// Registers them both ways:

View File

@ -840,8 +840,8 @@ void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory)
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("groupArray", { createAggregateFunctionGroupArray<false>, properties });
factory.registerAlias("array_agg", "groupArray", AggregateFunctionFactory::CaseInsensitive);
factory.registerAliasUnchecked("array_concat_agg", "groupArrayArray", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("array_agg", "groupArray", AggregateFunctionFactory::Case::Insensitive);
factory.registerAliasUnchecked("array_concat_agg", "groupArrayArray", AggregateFunctionFactory::Case::Insensitive);
factory.registerFunction("groupArraySample", { createAggregateFunctionGroupArraySample, properties });
factory.registerFunction("groupArrayLast", { createAggregateFunctionGroupArray<true>, properties });
}

View File

@ -1,12 +1,12 @@
#include <cassert>
#include <memory>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadHelpersArena.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnArray.h>
@ -15,18 +15,14 @@
#include <Common/HashTable/HashTableKeyHolder.h>
#include <Common/assert_cast.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/KeyHolderHelpers.h>
#include <Core/Field.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <memory>
namespace DB
@ -51,7 +47,7 @@ struct AggregateFunctionGroupArrayIntersectData
};
/// Puts all values to the hash set. Returns an array of unique values. Implemented for numeric types.
/// Puts all values to the hash set. Returns an array of unique values present in all inputs. Implemented for numeric types.
template <typename T>
class AggregateFunctionGroupArrayIntersect
: public IAggregateFunctionDataHelper<AggregateFunctionGroupArrayIntersectData<T>, AggregateFunctionGroupArrayIntersect<T>>
@ -69,7 +65,7 @@ public:
: IAggregateFunctionDataHelper<AggregateFunctionGroupArrayIntersectData<T>,
AggregateFunctionGroupArrayIntersect<T>>({argument_type}, parameters_, result_type_) {}
String getName() const override { return "GroupArrayIntersect"; }
String getName() const override { return "groupArrayIntersect"; }
bool allocatesMemoryInArena() const override { return false; }
@ -150,8 +146,18 @@ public:
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
readVarUInt(this->data(place).version, buf);
this->data(place).value.read(buf);
auto & set = this->data(place).value;
auto & version = this->data(place).version;
size_t size;
readVarUInt(version, buf);
readVarUInt(size, buf);
set.reserve(size);
for (size_t i = 0; i < size; ++i)
{
T key;
readIntBinary(key, buf);
set.insert(key);
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
@ -203,7 +209,7 @@ public:
: IAggregateFunctionDataHelper<AggregateFunctionGroupArrayIntersectGenericData, AggregateFunctionGroupArrayIntersectGeneric<is_plain_column>>({input_data_type_}, parameters_, result_type_)
, input_data_type(result_type_) {}
String getName() const override { return "GroupArrayIntersect"; }
String getName() const override { return "groupArrayIntersect"; }
bool allocatesMemoryInArena() const override { return true; }
@ -230,7 +236,7 @@ public:
{
const char * begin = nullptr;
StringRef serialized = data_column->serializeValueIntoArena(offset + i, *arena, begin);
assert(serialized.data != nullptr);
chassert(serialized.data != nullptr);
set.emplace(SerializedKeyHolder{serialized, *arena}, it, inserted);
}
}
@ -250,7 +256,7 @@ public:
{
const char * begin = nullptr;
StringRef serialized = data_column->serializeValueIntoArena(offset + i, *arena, begin);
assert(serialized.data != nullptr);
chassert(serialized.data != nullptr);
it = set.find(serialized);
if (it != nullptr)
@ -292,7 +298,7 @@ public:
}
return new_map;
};
auto new_map = rhs_value.size() < set.size() ? create_new_map(rhs_value, set) : create_new_map(set, rhs_value);
auto new_map = create_new_map(set, rhs_value);
set = std::move(new_map);
}
}
@ -316,11 +322,9 @@ public:
readVarUInt(version, buf);
readVarUInt(size, buf);
set.reserve(size);
UInt64 elem_version;
for (size_t i = 0; i < size; ++i)
{
auto key = readStringBinaryInto(*arena, buf);
readVarUInt(elem_version, buf);
set.insert(key);
}
}

View File

@ -277,7 +277,7 @@ void registerAggregateFunctionGroupConcat(AggregateFunctionFactory & factory)
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("groupConcat", { createAggregateFunctionGroupConcat, properties });
factory.registerAlias("group_concat", "groupConcat", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("group_concat", "groupConcat", AggregateFunctionFactory::Case::Insensitive);
}
}

View File

@ -350,7 +350,7 @@ AggregateFunctionPtr createAggregateFunctionKolmogorovSmirnovTest(
void registerAggregateFunctionKolmogorovSmirnovTest(AggregateFunctionFactory & factory)
{
factory.registerFunction("kolmogorovSmirnovTest", createAggregateFunctionKolmogorovSmirnovTest, AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("kolmogorovSmirnovTest", createAggregateFunctionKolmogorovSmirnovTest, AggregateFunctionFactory::Case::Insensitive);
}
}

View File

@ -15,11 +15,11 @@ void registerAggregateFunctionsStatisticsSecondMoment(AggregateFunctionFactory &
factory.registerFunction("stddevPop", createAggregateFunctionStatisticsUnary<AggregateFunctionSecondMoment, StatisticsFunctionKind::stddevPop>);
/// Synonyms for compatibility.
factory.registerAlias("VAR_SAMP", "varSamp", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("VAR_POP", "varPop", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("STDDEV_SAMP", "stddevSamp", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("STDDEV_POP", "stddevPop", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("STD", "stddevPop", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("VAR_SAMP", "varSamp", AggregateFunctionFactory::Case::Insensitive);
factory.registerAlias("VAR_POP", "varPop", AggregateFunctionFactory::Case::Insensitive);
factory.registerAlias("STDDEV_SAMP", "stddevSamp", AggregateFunctionFactory::Case::Insensitive);
factory.registerAlias("STDDEV_POP", "stddevPop", AggregateFunctionFactory::Case::Insensitive);
factory.registerAlias("STD", "stddevPop", AggregateFunctionFactory::Case::Insensitive);
}
}

View File

@ -72,7 +72,7 @@ AggregateFunctionPtr createAggregateFunctionSum(const std::string & name, const
void registerAggregateFunctionSum(AggregateFunctionFactory & factory)
{
factory.registerFunction("sum", createAggregateFunctionSum<AggregateFunctionSumSimple>, AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("sum", createAggregateFunctionSum<AggregateFunctionSumSimple>, AggregateFunctionFactory::Case::Insensitive);
factory.registerFunction("sumWithOverflow", createAggregateFunctionSum<AggregateFunctionSumWithOverflow>);
factory.registerFunction("sumKahan", createAggregateFunctionSum<AggregateFunctionSumKahan>);
}

View File

@ -535,9 +535,9 @@ void registerAggregateFunctionTopK(AggregateFunctionFactory & factory)
factory.registerFunction("topK", { createAggregateFunctionTopK<false, false>, properties });
factory.registerFunction("topKWeighted", { createAggregateFunctionTopK<true, false>, properties });
factory.registerFunction("approx_top_k", { createAggregateFunctionTopK<false, true>, properties }, AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("approx_top_sum", { createAggregateFunctionTopK<true, true>, properties }, AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("approx_top_count", "approx_top_k", AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("approx_top_k", { createAggregateFunctionTopK<false, true>, properties }, AggregateFunctionFactory::Case::Insensitive);
factory.registerFunction("approx_top_sum", { createAggregateFunctionTopK<true, true>, properties }, AggregateFunctionFactory::Case::Insensitive);
factory.registerAlias("approx_top_count", "approx_top_k", AggregateFunctionFactory::Case::Insensitive);
}
}

View File

@ -195,8 +195,8 @@ AggregateFunctionPtr createAggregateFunctionMinMax(
void registerAggregateFunctionsMinMax(AggregateFunctionFactory & factory)
{
factory.registerFunction("min", createAggregateFunctionMinMax<true>, AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("max", createAggregateFunctionMinMax<false>, AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("min", createAggregateFunctionMinMax<true>, AggregateFunctionFactory::Case::Insensitive);
factory.registerFunction("max", createAggregateFunctionMinMax<false>, AggregateFunctionFactory::Case::Insensitive);
}
}

View File

@ -195,7 +195,7 @@ bool SingleValueDataFixed<T>::isEqualTo(const IColumn & column, size_t index) co
template <typename T>
bool SingleValueDataFixed<T>::isEqualTo(const SingleValueDataFixed<T> & to) const
{
return has() && to.value == value;
return has() && to.has() && to.value == value;
}
template <typename T>
@ -904,6 +904,7 @@ bool SingleValueDataNumeric<T>::isEqualTo(const DB::IColumn & column, size_t ind
template <typename T>
bool SingleValueDataNumeric<T>::isEqualTo(const DB::SingleValueDataBase & to) const
{
/// to.has() is checked in memory.get().isEqualTo
auto const & other = assert_cast<const Self &>(to);
return memory.get().isEqualTo(other.memory.get());
}
@ -917,6 +918,7 @@ void SingleValueDataNumeric<T>::set(const DB::IColumn & column, size_t row_num,
template <typename T>
void SingleValueDataNumeric<T>::set(const DB::SingleValueDataBase & to, DB::Arena * arena)
{
/// to.has() is checked in memory.get().set
auto const & other = assert_cast<const Self &>(to);
return memory.get().set(other.memory.get(), arena);
}
@ -924,6 +926,7 @@ void SingleValueDataNumeric<T>::set(const DB::SingleValueDataBase & to, DB::Aren
template <typename T>
bool SingleValueDataNumeric<T>::setIfSmaller(const DB::SingleValueDataBase & to, DB::Arena * arena)
{
/// to.has() is checked in memory.get().setIfSmaller
auto const & other = assert_cast<const Self &>(to);
return memory.get().setIfSmaller(other.memory.get(), arena);
}
@ -931,6 +934,7 @@ bool SingleValueDataNumeric<T>::setIfSmaller(const DB::SingleValueDataBase & to,
template <typename T>
bool SingleValueDataNumeric<T>::setIfGreater(const DB::SingleValueDataBase & to, DB::Arena * arena)
{
/// to.has() is checked in memory.get().setIfGreater
auto const & other = assert_cast<const Self &>(to);
return memory.get().setIfGreater(other.memory.get(), arena);
}
@ -1191,7 +1195,7 @@ bool SingleValueDataString::isEqualTo(const DB::IColumn & column, size_t row_num
bool SingleValueDataString::isEqualTo(const SingleValueDataBase & other) const
{
auto const & to = assert_cast<const Self &>(other);
return has() && to.getStringRef() == getStringRef();
return has() && to.has() && to.getStringRef() == getStringRef();
}
void SingleValueDataString::set(const IColumn & column, size_t row_num, Arena * arena)
@ -1291,7 +1295,7 @@ bool SingleValueDataGeneric::isEqualTo(const IColumn & column, size_t row_num) c
bool SingleValueDataGeneric::isEqualTo(const DB::SingleValueDataBase & other) const
{
auto const & to = assert_cast<const Self &>(other);
return has() && to.value == value;
return has() && to.has() && to.value == value;
}
void SingleValueDataGeneric::set(const IColumn & column, size_t row_num, Arena *)

View File

@ -49,7 +49,7 @@ enum class QueryTreeNodeType : uint8_t
/// Convert query tree node type to string
const char * toString(QueryTreeNodeType type);
/** Query tree is semantical representation of query.
/** Query tree is a semantic representation of query.
* Query tree node represent node in query tree.
* IQueryTreeNode is base class for all query tree nodes.
*

View File

@ -68,6 +68,48 @@ QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes)
return nullptr;
}
/// Checks if the node is combination of isNull and notEquals functions of two the same arguments:
/// [ (a <> b AND) ] (a IS NULL) AND (b IS NULL)
bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, QueryTreeNodePtr & rhs)
{
QueryTreeNodePtrWithHashSet all_arguments;
QueryTreeNodePtrWithHashSet is_null_arguments;
for (const auto & node : nodes)
{
const auto * func_node = node->as<FunctionNode>();
if (!func_node)
return false;
const auto & arguments = func_node->getArguments().getNodes();
if (func_node->getFunctionName() == "isNull" && arguments.size() == 1)
{
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0]));
is_null_arguments.insert(QueryTreeNodePtrWithHash(arguments[0]));
}
else if (func_node->getFunctionName() == "notEquals" && arguments.size() == 2)
{
if (arguments[0]->isEqual(*arguments[1]))
return false;
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0]));
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[1]));
}
else
return false;
if (all_arguments.size() > 2)
return false;
}
if (all_arguments.size() != 2 || is_null_arguments.size() != 2)
return false;
lhs = all_arguments.begin()->node;
rhs = std::next(all_arguments.begin())->node;
return true;
}
bool isBooleanConstant(const QueryTreeNodePtr & node, bool expected_value)
{
const auto * constant_node = node->as<ConstantNode>();
@ -213,11 +255,14 @@ private:
else if (func_name == "and")
{
const auto & and_arguments = argument_function->getArguments().getNodes();
bool all_are_is_null = and_arguments.size() == 2 && isNodeFunction(and_arguments[0], "isNull") && isNodeFunction(and_arguments[1], "isNull");
if (all_are_is_null)
QueryTreeNodePtr is_null_lhs_arg;
QueryTreeNodePtr is_null_rhs_arg;
if (matchIsNullOfTwoArgs(and_arguments, is_null_lhs_arg, is_null_rhs_arg))
{
is_null_argument_to_indices[getFunctionArgument(and_arguments.front(), 0)].push_back(or_operands.size() - 1);
is_null_argument_to_indices[getFunctionArgument(and_arguments.back(), 0)].push_back(or_operands.size() - 1);
is_null_argument_to_indices[is_null_lhs_arg].push_back(or_operands.size() - 1);
is_null_argument_to_indices[is_null_rhs_arg].push_back(or_operands.size() - 1);
continue;
}
/// Expression `a = b AND (a IS NOT NULL) AND true AND (b IS NOT NULL)` we can be replaced with `a = b`

View File

@ -62,7 +62,7 @@ namespace ErrorCodes
namespace
{
#if defined(ABORT_ON_LOGICAL_ERROR)
#if defined(DEBUG_OR_SANITIZER_BUILD)
/** This visitor checks if Query Tree structure is valid after each pass
* in debug build.
@ -183,7 +183,7 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node)
for (size_t i = 0; i < passes_size; ++i)
{
passes[i]->run(query_tree_node, current_context);
#if defined(ABORT_ON_LOGICAL_ERROR)
#if defined(DEBUG_OR_SANITIZER_BUILD)
ValidationChecker(passes[i]->getName()).visit(query_tree_node);
#endif
}
@ -208,7 +208,7 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node, size_t up_to_pa
for (size_t i = 0; i < up_to_pass_index; ++i)
{
passes[i]->run(query_tree_node, current_context);
#if defined(ABORT_ON_LOGICAL_ERROR)
#if defined(DEBUG_OR_SANITIZER_BUILD)
ValidationChecker(passes[i]->getName()).visit(query_tree_node);
#endif
}

View File

@ -1740,7 +1740,7 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveQualifiedMatcher(Qu
const auto * tuple_data_type = typeid_cast<const DataTypeTuple *>(result_type.get());
if (!tuple_data_type)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Qualified matcher {} find non compound expression {} with type {}. Expected tuple or array of tuples. In scope {}",
"Qualified matcher {} found a non-compound expression {} with type {}. Expected a tuple or an array of tuples. In scope {}",
matcher_node->formatASTForErrorMessage(),
expression_query_tree_node->formatASTForErrorMessage(),
expression_query_tree_node->getResultType()->getName(),
@ -2919,6 +2919,17 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
resolveExpressionNode(in_second_argument, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/);
}
/// Edge case when the first argument of IN is scalar subquery.
auto & in_first_argument = function_in_arguments_nodes[0];
auto first_argument_type = in_first_argument->getNodeType();
if (first_argument_type == QueryTreeNodeType::QUERY || first_argument_type == QueryTreeNodeType::UNION)
{
IdentifierResolveScope subquery_scope(in_first_argument, &scope /*parent_scope*/);
subquery_scope.subquery_depth = scope.subquery_depth + 1;
evaluateScalarSubqueryIfNeeded(in_first_argument, subquery_scope);
}
}
/// Initialize function argument columns

View File

@ -105,7 +105,7 @@ bool compareRestoredTableDef(const IAST & restored_table_create_query, const IAS
auto new_query = query.clone();
adjustCreateQueryForBackup(new_query, global_context);
ASTCreateQuery & create = typeid_cast<ASTCreateQuery &>(*new_query);
create.setUUID({});
create.resetUUIDs();
create.if_not_exists = false;
return new_query;
};

View File

@ -1,4 +1,5 @@
#include <Backups/RestoreCoordinationLocal.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/formatAST.h>
#include <Common/logger_useful.h>
@ -67,7 +68,7 @@ void RestoreCoordinationLocal::generateUUIDForTable(ASTCreateQuery & create_quer
auto it = create_query_uuids.find(query_str);
if (it != create_query_uuids.end())
{
create_query.setUUID(it->second);
it->second.copyToQuery(create_query);
return true;
}
return false;
@ -79,7 +80,8 @@ void RestoreCoordinationLocal::generateUUIDForTable(ASTCreateQuery & create_quer
return;
}
auto new_uuids = create_query.generateRandomUUID(/* always_generate_new_uuid= */ true);
CreateQueryUUIDs new_uuids{create_query, /* generate_random= */ true, /* force_random= */ true};
new_uuids.copyToQuery(create_query);
{
std::lock_guard lock{mutex};

View File

@ -1,16 +1,17 @@
#pragma once
#include <Backups/IRestoreCoordination.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/CreateQueryUUIDs.h>
#include <Common/Logger.h>
#include <mutex>
#include <set>
#include <unordered_set>
namespace Poco { class Logger; }
namespace DB
{
class ASTCreateQuery;
/// Implementation of the IRestoreCoordination interface performing coordination in memory.
class RestoreCoordinationLocal : public IRestoreCoordination
@ -55,7 +56,7 @@ private:
std::set<std::pair<String /* database_zk_path */, String /* table_name */>> acquired_tables_in_replicated_databases;
std::unordered_set<String /* table_zk_path */> acquired_data_in_replicated_tables;
std::unordered_map<String, ASTCreateQuery::UUIDs> create_query_uuids;
std::unordered_map<String, CreateQueryUUIDs> create_query_uuids;
std::unordered_set<String /* root_zk_path */> acquired_data_in_keeper_map_tables;
mutable std::mutex mutex;

View File

@ -3,6 +3,7 @@
#include <Backups/RestoreCoordinationRemote.h>
#include <Backups/BackupCoordinationStageSync.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/CreateQueryUUIDs.h>
#include <Parsers/formatAST.h>
#include <Functions/UserDefined/UserDefinedSQLObjectType.h>
#include <Common/ZooKeeper/KeeperException.h>
@ -269,7 +270,8 @@ bool RestoreCoordinationRemote::acquireInsertingDataForKeeperMap(const String &
void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_query)
{
String query_str = serializeAST(create_query);
String new_uuids_str = create_query.generateRandomUUID(/* always_generate_new_uuid= */ true).toString();
CreateQueryUUIDs new_uuids{create_query, /* generate_random= */ true, /* force_random= */ true};
String new_uuids_str = new_uuids.toString();
auto holder = with_retries.createRetriesControlHolder("generateUUIDForTable");
holder.retries_ctl.retryLoop(
@ -281,11 +283,14 @@ void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_que
Coordination::Error res = zk->tryCreate(path, new_uuids_str, zkutil::CreateMode::Persistent);
if (res == Coordination::Error::ZOK)
{
new_uuids.copyToQuery(create_query);
return;
}
if (res == Coordination::Error::ZNODEEXISTS)
{
create_query.setUUID(ASTCreateQuery::UUIDs::fromString(zk->get(path)));
CreateQueryUUIDs::fromString(zk->get(path)).copyToQuery(create_query);
return;
}

View File

@ -226,6 +226,9 @@ add_object_library(clickhouse_storages_windowview Storages/WindowView)
add_object_library(clickhouse_storages_s3queue Storages/ObjectStorageQueue)
add_object_library(clickhouse_storages_materializedview Storages/MaterializedView)
add_object_library(clickhouse_client Client)
# Always compile this file with the highest possible level of optimizations, even in Debug builds.
# https://github.com/ClickHouse/ClickHouse/issues/65745
set_source_files_properties(Client/ClientBaseOptimizedParts.cpp PROPERTIES COMPILE_FLAGS "-O3")
add_object_library(clickhouse_bridge BridgeHelper)
add_object_library(clickhouse_server Server)
add_object_library(clickhouse_server_http Server/HTTP)
@ -419,9 +422,6 @@ dbms_target_link_libraries (
boost::circular_buffer
boost::heap)
target_include_directories(clickhouse_common_io PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/Core/include") # uses some includes from core
dbms_target_include_directories(PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/Core/include")
target_link_libraries(clickhouse_common_io PUBLIC
ch_contrib::miniselect
ch_contrib::pdqsort)
@ -546,7 +546,7 @@ if (TARGET ch_contrib::libpqxx)
endif()
if (TARGET ch_contrib::datasketches)
target_link_libraries (clickhouse_aggregate_functions PRIVATE ch_contrib::datasketches)
dbms_target_link_libraries(PUBLIC ch_contrib::datasketches)
endif ()
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4)

View File

@ -21,6 +21,7 @@
#include <Common/StringUtils.h>
#include <Common/filesystemHelpers.h>
#include <Common/NetException.h>
#include <Common/SignalHandlers.h>
#include <Common/tryGetFileNameByFileDescriptor.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
@ -80,6 +81,10 @@
#include <Common/config_version.h>
#include "config.h"
#if USE_GWP_ASAN
# include <Common/GWPAsan.h>
#endif
namespace fs = std::filesystem;
using namespace std::literals;
@ -103,7 +108,6 @@ namespace ErrorCodes
extern const int UNEXPECTED_PACKET_FROM_SERVER;
extern const int INVALID_USAGE_OF_INPUT;
extern const int CANNOT_SET_SIGNAL_HANDLER;
extern const int UNRECOGNIZED_ARGUMENTS;
extern const int LOGICAL_ERROR;
extern const int CANNOT_OPEN_FILE;
extern const int FILE_ALREADY_EXISTS;
@ -302,7 +306,20 @@ public:
};
ClientBase::~ClientBase() = default;
ClientBase::~ClientBase()
{
try
{
writeSignalIDtoSignalPipe(SignalListener::StopThread);
signal_listener_thread.join();
HandledSignals::instance().reset();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
ClientBase::ClientBase(
int in_fd_,
int out_fd_,
@ -460,7 +477,7 @@ void ClientBase::sendExternalTables(ASTPtr parsed_query)
std::vector<ExternalTableDataPtr> data;
for (auto & table : external_tables)
data.emplace_back(table.getData(global_context));
data.emplace_back(table.getData(client_context));
connection->sendExternalTablesData(data);
}
@ -673,10 +690,10 @@ try
/// intermixed with data with parallel formatting.
/// It may increase code complexity significantly.
if (!extras_into_stdout || select_only_into_file)
output_format = global_context->getOutputFormatParallelIfPossible(
output_format = client_context->getOutputFormatParallelIfPossible(
current_format, out_file_buf ? *out_file_buf : *out_buf, block);
else
output_format = global_context->getOutputFormat(
output_format = client_context->getOutputFormat(
current_format, out_file_buf ? *out_file_buf : *out_buf, block);
output_format->setAutoFlush();
@ -755,6 +772,15 @@ void ClientBase::adjustSettings()
global_context->setSettings(settings);
}
void ClientBase::initClientContext()
{
client_context->setClientName(std::string(DEFAULT_CLIENT_NAME));
client_context->setQuotaClientKey(getClientConfiguration().getString("quota_key", ""));
client_context->setQueryKindInitial();
client_context->setQueryKind(query_kind);
client_context->setQueryParameters(query_parameters);
}
bool ClientBase::isRegularFile(int fd)
{
struct stat file_stat;
@ -945,7 +971,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query)
/// client-side. Thus we need to parse the query.
const char * begin = full_query.data();
auto parsed_query = parseQuery(begin, begin + full_query.size(),
global_context->getSettingsRef(),
client_context->getSettingsRef(),
/*allow_multi_statements=*/ false);
if (!parsed_query)
@ -968,7 +994,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query)
/// But for asynchronous inserts we don't extract data, because it's needed
/// to be done on server side in that case (for coalescing the data from multiple inserts on server side).
const auto * insert = parsed_query->as<ASTInsertQuery>();
if (insert && isSyncInsertWithData(*insert, global_context))
if (insert && isSyncInsertWithData(*insert, client_context))
query_to_execute = full_query.substr(0, insert->data - full_query.data());
else
query_to_execute = full_query;
@ -1086,7 +1112,7 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
}
}
const auto & settings = global_context->getSettingsRef();
const auto & settings = client_context->getSettingsRef();
const Int32 signals_before_stop = settings.partial_result_on_first_cancel ? 2 : 1;
int retries_left = 10;
@ -1101,10 +1127,10 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
connection_parameters.timeouts,
query,
query_parameters,
global_context->getCurrentQueryId(),
client_context->getCurrentQueryId(),
query_processing_stage,
&global_context->getSettingsRef(),
&global_context->getClientInfo(),
&client_context->getSettingsRef(),
&client_context->getClientInfo(),
true,
[&](const Progress & progress) { onProgress(progress); });
@ -1291,7 +1317,7 @@ void ClientBase::onProgress(const Progress & value)
void ClientBase::onTimezoneUpdate(const String & tz)
{
global_context->setSetting("session_timezone", tz);
client_context->setSetting("session_timezone", tz);
}
@ -1487,13 +1513,13 @@ bool ClientBase::receiveSampleBlock(Block & out, ColumnsDescription & columns_de
void ClientBase::setInsertionTable(const ASTInsertQuery & insert_query)
{
if (!global_context->hasInsertionTable() && insert_query.table)
if (!client_context->hasInsertionTable() && insert_query.table)
{
String table = insert_query.table->as<ASTIdentifier &>().shortName();
if (!table.empty())
{
String database = insert_query.database ? insert_query.database->as<ASTIdentifier &>().shortName() : "";
global_context->setInsertionTable(StorageID(database, table));
client_context->setInsertionTable(StorageID(database, table));
}
}
}
@ -1544,7 +1570,7 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars
const auto & parsed_insert_query = parsed_query->as<ASTInsertQuery &>();
if ((!parsed_insert_query.data && !parsed_insert_query.infile) && (is_interactive || (!stdin_is_a_tty && !isStdinNotEmptyAndValid(std_in))))
{
const auto & settings = global_context->getSettingsRef();
const auto & settings = client_context->getSettingsRef();
if (settings.throw_if_no_data_to_insert)
throw Exception(ErrorCodes::NO_DATA_TO_INSERT, "No data to insert");
else
@ -1558,10 +1584,10 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars
connection_parameters.timeouts,
query,
query_parameters,
global_context->getCurrentQueryId(),
client_context->getCurrentQueryId(),
query_processing_stage,
&global_context->getSettingsRef(),
&global_context->getClientInfo(),
&client_context->getSettingsRef(),
&client_context->getClientInfo(),
true,
[&](const Progress & progress) { onProgress(progress); });
@ -1609,7 +1635,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
/// Set callback to be called on file progress.
if (tty_buf)
progress_indication.setFileProgressCallback(global_context, *tty_buf);
progress_indication.setFileProgressCallback(client_context, *tty_buf);
}
/// If data fetched from file (maybe compressed file)
@ -1643,10 +1669,10 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
}
StorageFile::CommonArguments args{
WithContext(global_context),
WithContext(client_context),
parsed_insert_query->table_id,
current_format,
getFormatSettings(global_context),
getFormatSettings(client_context),
compression_method,
columns_for_storage_file,
ConstraintsDescription{},
@ -1654,7 +1680,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
{},
String{},
};
StoragePtr storage = std::make_shared<StorageFile>(in_file, global_context->getUserFilesPath(), args);
StoragePtr storage = std::make_shared<StorageFile>(in_file, client_context->getUserFilesPath(), args);
storage->startup();
SelectQueryInfo query_info;
@ -1665,16 +1691,16 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
storage->read(
plan,
sample.getNames(),
storage->getStorageSnapshot(metadata, global_context),
storage->getStorageSnapshot(metadata, client_context),
query_info,
global_context,
client_context,
{},
global_context->getSettingsRef().max_block_size,
client_context->getSettingsRef().max_block_size,
getNumberOfPhysicalCPUCores());
auto builder = plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(global_context),
BuildQueryPipelineSettings::fromContext(global_context));
QueryPlanOptimizationSettings::fromContext(client_context),
BuildQueryPipelineSettings::fromContext(client_context));
QueryPlanResourceHolder resources;
auto pipe = QueryPipelineBuilder::getPipe(std::move(*builder), resources);
@ -1735,14 +1761,14 @@ void ClientBase::sendDataFrom(ReadBuffer & buf, Block & sample, const ColumnsDes
current_format = insert->format;
}
auto source = global_context->getInputFormat(current_format, buf, sample, insert_format_max_block_size);
auto source = client_context->getInputFormat(current_format, buf, sample, insert_format_max_block_size);
Pipe pipe(source);
if (columns_description.hasDefaults())
{
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<AddingDefaultsTransform>(header, columns_description, *source, global_context);
return std::make_shared<AddingDefaultsTransform>(header, columns_description, *source, client_context);
});
}
@ -1904,12 +1930,12 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (is_interactive)
{
global_context->setCurrentQueryId("");
client_context->setCurrentQueryId("");
// Generate a new query_id
for (const auto & query_id_format : query_id_formats)
{
writeString(query_id_format.first, std_out);
writeString(fmt::format(fmt::runtime(query_id_format.second), fmt::arg("query_id", global_context->getCurrentQueryId())), std_out);
writeString(fmt::format(fmt::runtime(query_id_format.second), fmt::arg("query_id", client_context->getCurrentQueryId())), std_out);
writeChar('\n', std_out);
std_out.next();
}
@ -1936,7 +1962,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
auto password = auth_data->getPassword();
if (password)
global_context->getAccessControl().checkPasswordComplexityRules(*password);
client_context->getAccessControl().checkPasswordComplexityRules(*password);
}
}
}
@ -1951,15 +1977,15 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
std::optional<Settings> old_settings;
SCOPE_EXIT_SAFE({
if (old_settings)
global_context->setSettings(*old_settings);
client_context->setSettings(*old_settings);
});
auto apply_query_settings = [&](const IAST & settings_ast)
{
if (!old_settings)
old_settings.emplace(global_context->getSettingsRef());
global_context->applySettingsChanges(settings_ast.as<ASTSetQuery>()->changes);
global_context->resetSettingsToDefaultValue(settings_ast.as<ASTSetQuery>()->default_settings);
old_settings.emplace(client_context->getSettingsRef());
client_context->applySettingsChanges(settings_ast.as<ASTSetQuery>()->changes);
client_context->resetSettingsToDefaultValue(settings_ast.as<ASTSetQuery>()->default_settings);
};
const auto * insert = parsed_query->as<ASTInsertQuery>();
@ -1992,7 +2018,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (insert && insert->select)
insert->tryFindInputFunction(input_function);
bool is_async_insert_with_inlined_data = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData();
bool is_async_insert_with_inlined_data = client_context->getSettingsRef().async_insert && insert && insert->hasInlinedData();
if (is_async_insert_with_inlined_data)
{
@ -2027,9 +2053,9 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (change.name == "profile")
current_profile = change.value.safeGet<String>();
else
global_context->applySettingChange(change);
client_context->applySettingChange(change);
}
global_context->resetSettingsToDefaultValue(set_query->default_settings);
client_context->resetSettingsToDefaultValue(set_query->default_settings);
/// Query parameters inside SET queries should be also saved on the client side
/// to override their previous definitions set with --param_* arguments
@ -2037,7 +2063,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
for (const auto & [name, value] : set_query->query_parameters)
query_parameters.insert_or_assign(name, value);
global_context->addQueryParameters(NameToNameMap{set_query->query_parameters.begin(), set_query->query_parameters.end()});
client_context->addQueryParameters(NameToNameMap{set_query->query_parameters.begin(), set_query->query_parameters.end()});
}
if (const auto * use_query = parsed_query->as<ASTUseQuery>())
{
@ -2070,9 +2096,18 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
progress_indication.writeFinalProgress();
output_stream << std::endl << std::endl;
}
else if (getClientConfiguration().getBool("print-time-to-stderr", false))
else
{
const auto & config = getClientConfiguration();
if (config.getBool("print-time-to-stderr", false))
error_stream << progress_indication.elapsedSeconds() << "\n";
const auto & print_memory_mode = config.getString("print-memory-to-stderr", "");
auto peak_memeory_usage = std::max<Int64>(progress_indication.getMemoryUsage().peak, 0);
if (print_memory_mode == "default")
error_stream << peak_memeory_usage << "\n";
else if (print_memory_mode == "readable")
error_stream << formatReadableSizeWithBinarySuffix(peak_memeory_usage) << "\n";
}
if (!is_interactive && getClientConfiguration().getBool("print-num-processed-rows", false))
@ -2105,8 +2140,8 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
if (this_query_begin >= all_queries_end)
return MultiQueryProcessingStage::QUERIES_END;
unsigned max_parser_depth = static_cast<unsigned>(global_context->getSettingsRef().max_parser_depth);
unsigned max_parser_backtracks = static_cast<unsigned>(global_context->getSettingsRef().max_parser_backtracks);
unsigned max_parser_depth = static_cast<unsigned>(client_context->getSettingsRef().max_parser_depth);
unsigned max_parser_backtracks = static_cast<unsigned>(client_context->getSettingsRef().max_parser_backtracks);
// If there are only comments left until the end of file, we just
// stop. The parser can't handle this situation because it always
@ -2126,7 +2161,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
try
{
parsed_query = parseQuery(this_query_end, all_queries_end,
global_context->getSettingsRef(),
client_context->getSettingsRef(),
/*allow_multi_statements=*/ true);
}
catch (const Exception & e)
@ -2169,7 +2204,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
{
this_query_end = find_first_symbols<'\n'>(insert_ast->data, all_queries_end);
insert_ast->end = this_query_end;
query_to_execute_end = isSyncInsertWithData(*insert_ast, global_context) ? insert_ast->data : this_query_end;
query_to_execute_end = isSyncInsertWithData(*insert_ast, client_context) ? insert_ast->data : this_query_end;
}
query_to_execute = all_queries_text.substr(this_query_begin - all_queries_text.data(), query_to_execute_end - this_query_begin);
@ -2214,6 +2249,8 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
ASTPtr parsed_query;
std::unique_ptr<Exception> current_exception;
size_t retries_count = 0;
while (true)
{
auto stage = analyzeMultiQueryText(this_query_begin, this_query_end, all_queries_end,
@ -2294,7 +2331,12 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
// Check whether the error (or its absence) matches the test hints
// (or their absence).
bool error_matches_hint = true;
if (have_error)
bool need_retry = test_hint.needRetry(server_exception, &retries_count);
if (need_retry)
{
std::this_thread::sleep_for(std::chrono::seconds(1));
}
else if (have_error)
{
if (test_hint.hasServerErrors())
{
@ -2371,13 +2413,13 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
// , where the inline data is delimited by semicolon and not by a
// newline.
auto * insert_ast = parsed_query->as<ASTInsertQuery>();
if (insert_ast && isSyncInsertWithData(*insert_ast, global_context))
if (insert_ast && isSyncInsertWithData(*insert_ast, client_context))
{
this_query_end = insert_ast->end;
adjustQueryEnd(
this_query_end, all_queries_end,
static_cast<unsigned>(global_context->getSettingsRef().max_parser_depth),
static_cast<unsigned>(global_context->getSettingsRef().max_parser_backtracks));
static_cast<unsigned>(client_context->getSettingsRef().max_parser_depth),
static_cast<unsigned>(client_context->getSettingsRef().max_parser_backtracks));
}
// Report error.
@ -2388,6 +2430,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
if (have_error && !ignore_error)
return is_interactive;
if (!need_retry)
this_query_begin = this_query_end;
break;
}
@ -2507,10 +2550,10 @@ void ClientBase::runInteractive()
if (load_suggestions)
{
/// Load suggestion data from the server.
if (global_context->getApplicationType() == Context::ApplicationType::CLIENT)
suggest->load<Connection>(global_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load);
else if (global_context->getApplicationType() == Context::ApplicationType::LOCAL)
suggest->load<LocalConnection>(global_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load);
if (client_context->getApplicationType() == Context::ApplicationType::CLIENT)
suggest->load<Connection>(client_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load);
else if (client_context->getApplicationType() == Context::ApplicationType::LOCAL)
suggest->load<LocalConnection>(client_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load);
}
if (home_path.empty())
@ -2648,7 +2691,7 @@ void ClientBase::runInteractive()
{
// If a separate connection loading suggestions failed to open a new session,
// use the main session to receive them.
suggest->load(*connection, connection_parameters.timeouts, getClientConfiguration().getInt("suggestion_limit"), global_context->getClientInfo());
suggest->load(*connection, connection_parameters.timeouts, getClientConfiguration().getInt("suggestion_limit"), client_context->getClientInfo());
}
try
@ -2697,10 +2740,10 @@ bool ClientBase::processMultiQueryFromFile(const String & file_name)
if (!getClientConfiguration().has("log_comment"))
{
Settings settings = global_context->getSettings();
Settings settings = client_context->getSettings();
/// NOTE: cannot use even weakly_canonical() since it fails for /dev/stdin due to resolving of "pipe:[X]"
settings.log_comment = fs::absolute(fs::path(file_name));
global_context->setSettings(settings);
client_context->setSettings(settings);
}
return executeMultiQuery(queries_from_file);
@ -2813,168 +2856,6 @@ void ClientBase::showClientVersion()
output_stream << VERSION_NAME << " " + getName() + " version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl;
}
namespace
{
/// Define transparent hash to we can use
/// std::string_view with the containers
struct TransparentStringHash
{
using is_transparent = void;
size_t operator()(std::string_view txt) const
{
return std::hash<std::string_view>{}(txt);
}
};
/*
* This functor is used to parse command line arguments and replace dashes with underscores,
* allowing options to be specified using either dashes or underscores.
*/
class OptionsAliasParser
{
public:
explicit OptionsAliasParser(const boost::program_options::options_description& options)
{
options_names.reserve(options.options().size());
for (const auto& option : options.options())
options_names.insert(option->long_name());
}
/*
* Parses arguments by replacing dashes with underscores, and matches the resulting name with known options
* Implements boost::program_options::ext_parser logic
*/
std::pair<std::string, std::string> operator()(const std::string & token) const
{
if (!token.starts_with("--"))
return {};
std::string arg = token.substr(2);
// divide token by '=' to separate key and value if options style=long_allow_adjacent
auto pos_eq = arg.find('=');
std::string key = arg.substr(0, pos_eq);
if (options_names.contains(key))
// option does not require any changes, because it is already correct
return {};
std::replace(key.begin(), key.end(), '-', '_');
if (!options_names.contains(key))
// after replacing '-' with '_' argument is still unknown
return {};
std::string value;
if (pos_eq != std::string::npos && pos_eq < arg.size())
value = arg.substr(pos_eq + 1);
return {key, value};
}
private:
std::unordered_set<std::string> options_names;
};
}
/// Enable optimizations even in debug builds because otherwise options parsing becomes extremely slow affecting .sh tests
#if defined(__clang__)
#pragma clang optimize on
#endif
void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
{
if (allow_repeated_settings)
addProgramOptionsAsMultitokens(cmd_settings, options_description.main_description.value());
else
addProgramOptions(cmd_settings, options_description.main_description.value());
if (allow_merge_tree_settings)
{
/// Add merge tree settings manually, because names of some settings
/// may clash. Query settings have higher priority and we just
/// skip ambiguous merge tree settings.
auto & main_options = options_description.main_description.value();
std::unordered_set<std::string, TransparentStringHash, std::equal_to<>> main_option_names;
for (const auto & option : main_options.options())
main_option_names.insert(option->long_name());
for (const auto & setting : cmd_merge_tree_settings.all())
{
const auto add_setting = [&](const std::string_view name)
{
if (auto it = main_option_names.find(name); it != main_option_names.end())
return;
if (allow_repeated_settings)
addProgramOptionAsMultitoken(cmd_merge_tree_settings, main_options, name, setting);
else
addProgramOption(cmd_merge_tree_settings, main_options, name, setting);
};
const auto & setting_name = setting.getName();
add_setting(setting_name);
const auto & settings_to_aliases = MergeTreeSettings::Traits::settingsToAliases();
if (auto it = settings_to_aliases.find(setting_name); it != settings_to_aliases.end())
{
for (const auto alias : it->second)
{
add_setting(alias);
}
}
}
}
/// Parse main commandline options.
auto parser = po::command_line_parser(arguments)
.options(options_description.main_description.value())
.extra_parser(OptionsAliasParser(options_description.main_description.value()))
.allow_unregistered();
po::parsed_options parsed = parser.run();
/// Check unrecognized options without positional options.
auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::exclude_positional);
if (!unrecognized_options.empty())
{
auto hints = this->getHints(unrecognized_options[0]);
if (!hints.empty())
throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'. Maybe you meant {}",
unrecognized_options[0], toString(hints));
throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]);
}
/// Check positional options.
for (const auto & op : parsed.options)
{
if (!op.unregistered && op.string_key.empty() && !op.original_tokens[0].starts_with("--")
&& !op.original_tokens[0].empty() && !op.value.empty())
{
/// Two special cases for better usability:
/// - if the option contains a whitespace, it might be a query: clickhouse "SELECT 1"
/// These are relevant for interactive usage - user-friendly, but questionable in general.
/// In case of ambiguity or for scripts, prefer using proper options.
const auto & token = op.original_tokens[0];
po::variable_value value(boost::any(op.value), false);
const char * option;
if (token.contains(' '))
option = "query";
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token);
if (!options.emplace(option, value).second)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token);
}
}
po::store(parsed, options);
}
void ClientBase::init(int argc, char ** argv)
{
namespace po = boost::program_options;
@ -3036,6 +2917,7 @@ void ClientBase::init(int argc, char ** argv)
("disable_suggestion,A", "Disable loading suggestion data. Note that suggestion data is loaded asynchronously through a second connection to ClickHouse server. Also it is reasonable to disable suggestion if you want to paste a query with TAB characters. Shorthand option -A is for those who get used to mysql client.")
("wait_for_suggestions_to_load", "Load suggestion data synchonously.")
("time,t", "print query execution time to stderr in non-interactive mode (for benchmarks)")
("memory-usage", po::value<std::string>()->implicit_value("default")->default_value("none"), "print memory usage to stderr in non-interactive mode (for benchmarks). Values: 'none', 'default', 'readable'")
("echo", "in batch mode, print query before execution")
@ -3062,6 +2944,8 @@ void ClientBase::init(int argc, char ** argv)
("max_memory_usage_in_client", po::value<std::string>(), "Set memory limit in client/local server")
("fuzzer-args", po::value<std::string>(), "Command line arguments for the LLVM's libFuzzer driver. Only relevant if the application is compiled with libFuzzer.")
("client_logs_file", po::value<std::string>(), "Path to a file for writing client logs. Currently we only have fatal logs (when the client crashes)")
;
addOptions(options_description);
@ -3121,6 +3005,14 @@ void ClientBase::init(int argc, char ** argv)
/// Output execution time to stderr in batch mode.
if (options.count("time"))
getClientConfiguration().setBool("print-time-to-stderr", true);
if (options.count("memory-usage"))
{
const auto & memory_usage_mode = options["memory-usage"].as<std::string>();
if (memory_usage_mode != "none" && memory_usage_mode != "default" && memory_usage_mode != "readable")
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown memory-usage mode: {}", memory_usage_mode);
getClientConfiguration().setString("print-memory-to-stderr", memory_usage_mode);
}
if (options.count("query"))
queries = options["query"].as<std::vector<std::string>>();
if (options.count("query_id"))
@ -3218,6 +3110,30 @@ void ClientBase::init(int argc, char ** argv)
total_memory_tracker.setDescription("(total)");
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
}
/// Print stacktrace in case of crash
HandledSignals::instance().setupTerminateHandler();
HandledSignals::instance().setupCommonDeadlySignalHandlers();
/// We don't setup signal handlers for SIGINT, SIGQUIT, SIGTERM because we don't
/// have an option for client to shutdown gracefully.
fatal_channel_ptr = new Poco::SplitterChannel;
fatal_console_channel_ptr = new Poco::ConsoleChannel;
fatal_channel_ptr->addChannel(fatal_console_channel_ptr);
if (options.count("client_logs_file"))
{
fatal_file_channel_ptr = new Poco::SimpleFileChannel(options["client_logs_file"].as<std::string>());
fatal_channel_ptr->addChannel(fatal_file_channel_ptr);
}
fatal_log = createLogger("ClientBase", fatal_channel_ptr.get(), Poco::Message::PRIO_FATAL);
signal_listener = std::make_unique<SignalListener>(nullptr, fatal_log);
signal_listener_thread.start(*signal_listener);
#if USE_GWP_ASAN
GWPAsan::initFinished();
#endif
}
}

View File

@ -6,14 +6,17 @@
#include <Common/ProgressIndication.h>
#include <Common/InterruptListener.h>
#include <Common/ShellCommand.h>
#include <Common/QueryFuzzer.h>
#include <Common/Stopwatch.h>
#include <Common/DNSResolver.h>
#include <Core/ExternalTable.h>
#include <Core/Settings.h>
#include <Poco/Util/Application.h>
#include <Poco/ConsoleChannel.h>
#include <Poco/SimpleFileChannel.h>
#include <Poco/SplitterChannel.h>
#include <Interpreters/Context.h>
#include <Client/Suggest.h>
#include <Client/QueryFuzzer.h>
#include <boost/program_options.hpp>
#include <Storages/StorageFile.h>
#include <Storages/SelectQueryInfo.h>
@ -203,6 +206,9 @@ protected:
/// Adjust some settings after command line options and config had been processed.
void adjustSettings();
/// Initializes the client context.
void initClientContext();
void setDefaultFormatsAndCompressionFromConfiguration();
void initTTYBuffer(ProgressOption progress);
@ -212,6 +218,16 @@ protected:
SharedContextHolder shared_context;
ContextMutablePtr global_context;
/// Client context is a context used only by the client to parse queries, process query parameters and to connect to clickhouse-server.
ContextMutablePtr client_context;
LoggerPtr fatal_log;
Poco::AutoPtr<Poco::SplitterChannel> fatal_channel_ptr;
Poco::AutoPtr<Poco::Channel> fatal_console_channel_ptr;
Poco::AutoPtr<Poco::Channel> fatal_file_channel_ptr;
Poco::Thread signal_listener_thread;
std::unique_ptr<Poco::Runnable> signal_listener;
bool is_interactive = false; /// Use either interactive line editing interface or batch mode.
bool is_multiquery = false;
bool delayed_interactive = false;

View File

@ -0,0 +1,176 @@
#include <Client/ClientBase.h>
#include <Core/BaseSettingsProgramOptions.h>
namespace DB
{
/**
* Program options parsing is very slow in debug builds and it affects .sh tests
* causing them to timeout sporadically.
* It seems impossible to enable optimizations for a single function (only to disable them), so
* instead we extract the code to a separate source file and compile it with different options.
*/
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int UNRECOGNIZED_ARGUMENTS;
}
namespace
{
/// Define transparent hash to we can use
/// std::string_view with the containers
struct TransparentStringHash
{
using is_transparent = void;
size_t operator()(std::string_view txt) const
{
return std::hash<std::string_view>{}(txt);
}
};
/*
* This functor is used to parse command line arguments and replace dashes with underscores,
* allowing options to be specified using either dashes or underscores.
*/
class OptionsAliasParser
{
public:
explicit OptionsAliasParser(const boost::program_options::options_description& options)
{
options_names.reserve(options.options().size());
for (const auto& option : options.options())
options_names.insert(option->long_name());
}
/*
* Parses arguments by replacing dashes with underscores, and matches the resulting name with known options
* Implements boost::program_options::ext_parser logic
*/
std::pair<std::string, std::string> operator()(const std::string & token) const
{
if (!token.starts_with("--"))
return {};
std::string arg = token.substr(2);
// divide token by '=' to separate key and value if options style=long_allow_adjacent
auto pos_eq = arg.find('=');
std::string key = arg.substr(0, pos_eq);
if (options_names.contains(key))
// option does not require any changes, because it is already correct
return {};
std::replace(key.begin(), key.end(), '-', '_');
if (!options_names.contains(key))
// after replacing '-' with '_' argument is still unknown
return {};
std::string value;
if (pos_eq != std::string::npos && pos_eq < arg.size())
value = arg.substr(pos_eq + 1);
return {key, value};
}
private:
std::unordered_set<std::string> options_names;
};
}
void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
{
if (allow_repeated_settings)
addProgramOptionsAsMultitokens(cmd_settings, options_description.main_description.value());
else
addProgramOptions(cmd_settings, options_description.main_description.value());
if (allow_merge_tree_settings)
{
/// Add merge tree settings manually, because names of some settings
/// may clash. Query settings have higher priority and we just
/// skip ambiguous merge tree settings.
auto & main_options = options_description.main_description.value();
std::unordered_set<std::string, TransparentStringHash, std::equal_to<>> main_option_names;
for (const auto & option : main_options.options())
main_option_names.insert(option->long_name());
for (const auto & setting : cmd_merge_tree_settings.all())
{
const auto add_setting = [&](const std::string_view name)
{
if (auto it = main_option_names.find(name); it != main_option_names.end())
return;
if (allow_repeated_settings)
addProgramOptionAsMultitoken(cmd_merge_tree_settings, main_options, name, setting);
else
addProgramOption(cmd_merge_tree_settings, main_options, name, setting);
};
const auto & setting_name = setting.getName();
add_setting(setting_name);
const auto & settings_to_aliases = MergeTreeSettings::Traits::settingsToAliases();
if (auto it = settings_to_aliases.find(setting_name); it != settings_to_aliases.end())
{
for (const auto alias : it->second)
{
add_setting(alias);
}
}
}
}
/// Parse main commandline options.
auto parser = po::command_line_parser(arguments)
.options(options_description.main_description.value())
.extra_parser(OptionsAliasParser(options_description.main_description.value()))
.allow_unregistered();
po::parsed_options parsed = parser.run();
/// Check unrecognized options without positional options.
auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::exclude_positional);
if (!unrecognized_options.empty())
{
auto hints = this->getHints(unrecognized_options[0]);
if (!hints.empty())
throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'. Maybe you meant {}",
unrecognized_options[0], toString(hints));
throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]);
}
/// Check positional options.
for (const auto & op : parsed.options)
{
if (!op.unregistered && op.string_key.empty() && !op.original_tokens[0].starts_with("--")
&& !op.original_tokens[0].empty() && !op.value.empty())
{
/// Two special cases for better usability:
/// - if the option contains a whitespace, it might be a query: clickhouse "SELECT 1"
/// These are relevant for interactive usage - user-friendly, but questionable in general.
/// In case of ambiguity or for scripts, prefer using proper options.
const auto & token = op.original_tokens[0];
po::variable_value value(boost::any(op.value), false);
const char * option;
if (token.contains(' '))
option = "query";
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token);
if (!options.emplace(option, value).second)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token);
}
}
po::store(parsed, options);
}
}

View File

@ -54,8 +54,6 @@ public:
struct ReplicaInfo
{
bool collaborate_with_initiator{false};
size_t all_replicas_count{0};
size_t number_of_current_replica{0};
};

View File

@ -175,4 +175,5 @@ private:
ReadBuffer * in;
};
}

View File

@ -142,14 +142,13 @@ void MultiplexedConnections::sendQuery(
modified_settings.group_by_two_level_threshold = 0;
modified_settings.group_by_two_level_threshold_bytes = 0;
}
}
if (replica_info)
{
client_info.collaborate_with_initiator = true;
client_info.count_participating_replicas = replica_info->all_replicas_count;
client_info.number_of_current_replica = replica_info->number_of_current_replica;
}
}
/// FIXME: Remove once we will make `allow_experimental_analyzer` obsolete setting.
/// Make the analyzer being set, so it will be effectively applied on the remote server.

View File

@ -68,21 +68,22 @@ Field QueryFuzzer::getRandomField(int type)
{
case 0:
{
return bad_int64_values[fuzz_rand() % std::size(bad_int64_values)];
return bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values)
/ sizeof(*bad_int64_values))];
}
case 1:
{
static constexpr double values[]
= {NAN, INFINITY, -INFINITY, 0., -0., 0.0001, 0.5, 0.9999,
1., 1.0001, 2., 10.0001, 100.0001, 1000.0001, 1e10, 1e20,
FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % std::size(values)];
FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % (sizeof(values) / sizeof(*values))];
}
case 2:
{
static constexpr UInt64 scales[] = {0, 1, 2, 10};
return DecimalField<Decimal64>(
bad_int64_values[fuzz_rand() % std::size(bad_int64_values)],
static_cast<UInt32>(scales[fuzz_rand() % std::size(scales)])
bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) / sizeof(*bad_int64_values))],
static_cast<UInt32>(scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))])
);
}
default:
@ -164,8 +165,7 @@ Field QueryFuzzer::fuzzField(Field field)
{
size_t pos = fuzz_rand() % arr.size();
arr.erase(arr.begin() + pos);
if (debug_stream)
*debug_stream << "erased\n";
std::cerr << "erased\n";
}
if (fuzz_rand() % 5 == 0)
@ -174,14 +174,12 @@ Field QueryFuzzer::fuzzField(Field field)
{
size_t pos = fuzz_rand() % arr.size();
arr.insert(arr.begin() + pos, fuzzField(arr[pos]));
if (debug_stream)
*debug_stream << fmt::format("inserted (pos {})\n", pos);
std::cerr << fmt::format("inserted (pos {})\n", pos);
}
else
{
arr.insert(arr.begin(), getRandomField(0));
if (debug_stream)
*debug_stream << "inserted (0)\n";
std::cerr << "inserted (0)\n";
}
}
@ -199,9 +197,7 @@ Field QueryFuzzer::fuzzField(Field field)
{
size_t pos = fuzz_rand() % arr.size();
arr.erase(arr.begin() + pos);
if (debug_stream)
*debug_stream << "erased\n";
std::cerr << "erased\n";
}
if (fuzz_rand() % 5 == 0)
@ -210,16 +206,12 @@ Field QueryFuzzer::fuzzField(Field field)
{
size_t pos = fuzz_rand() % arr.size();
arr.insert(arr.begin() + pos, fuzzField(arr[pos]));
if (debug_stream)
*debug_stream << fmt::format("inserted (pos {})\n", pos);
std::cerr << fmt::format("inserted (pos {})\n", pos);
}
else
{
arr.insert(arr.begin(), getRandomField(0));
if (debug_stream)
*debug_stream << "inserted (0)\n";
std::cerr << "inserted (0)\n";
}
}
@ -352,8 +344,7 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast)
}
else
{
if (debug_stream)
*debug_stream << "No random column.\n";
std::cerr << "No random column.\n";
}
}
@ -387,8 +378,7 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast)
if (col)
impl->children.insert(pos, col);
else
if (debug_stream)
*debug_stream << "No random column.\n";
std::cerr << "No random column.\n";
}
// We don't have to recurse here to fuzz the children, this is handled by
@ -1371,15 +1361,11 @@ void QueryFuzzer::fuzzMain(ASTPtr & ast)
collectFuzzInfoMain(ast);
fuzz(ast);
if (out_stream)
{
*out_stream << std::endl;
WriteBufferFromOStream ast_buf(*out_stream, 4096);
std::cout << std::endl;
WriteBufferFromOStream ast_buf(std::cout, 4096);
formatAST(*ast, ast_buf, false /*highlight*/);
ast_buf.finalize();
*out_stream << std::endl << std::endl;
}
std::cout << std::endl << std::endl;
}
}

View File

@ -35,31 +35,9 @@ struct ASTWindowDefinition;
* queries, so you want to feed it a lot of queries to get some interesting mix
* of them. Normally we feed SQL regression tests to it.
*/
class QueryFuzzer
struct QueryFuzzer
{
public:
explicit QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr)
: fuzz_rand(fuzz_rand_)
, out_stream(out_stream_)
, debug_stream(debug_stream_)
{
}
// This is the only function you have to call -- it will modify the passed
// ASTPtr to point to new AST with some random changes.
void fuzzMain(ASTPtr & ast);
ASTs getInsertQueriesForFuzzedTables(const String & full_query);
ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query);
void notifyQueryFailed(ASTPtr ast);
static bool isSuitableForFuzzing(const ASTCreateQuery & create);
private:
pcg64 fuzz_rand;
std::ostream * out_stream = nullptr;
std::ostream * debug_stream = nullptr;
pcg64 fuzz_rand{randomSeed()};
// We add elements to expression lists with fixed probability. Some elements
// are so large, that the expected number of elements we add to them is
@ -88,6 +66,10 @@ private:
std::unordered_map<std::string, size_t> index_of_fuzzed_table;
std::set<IAST::Hash> created_tables_hashes;
// This is the only function you have to call -- it will modify the passed
// ASTPtr to point to new AST with some random changes.
void fuzzMain(ASTPtr & ast);
// Various helper functions follow, normally you shouldn't have to call them.
Field getRandomField(int type);
Field fuzzField(Field field);
@ -95,6 +77,9 @@ private:
ASTPtr getRandomExpressionList();
DataTypePtr fuzzDataType(DataTypePtr type);
DataTypePtr getRandomType();
ASTs getInsertQueriesForFuzzedTables(const String & full_query);
ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query);
void notifyQueryFailed(ASTPtr ast);
void replaceWithColumnLike(ASTPtr & ast);
void replaceWithTableLike(ASTPtr & ast);
void fuzzOrderByElement(ASTOrderByElement * elem);
@ -117,6 +102,8 @@ private:
void addTableLike(ASTPtr ast);
void addColumnLike(ASTPtr ast);
void collectFuzzInfoRecurse(ASTPtr ast);
static bool isSuitableForFuzzing(const ASTCreateQuery & create);
};
}

View File

@ -10,6 +10,7 @@
namespace DB::ErrorCodes
{
extern const int CANNOT_PARSE_TEXT;
extern const int OK;
}
namespace DB
@ -62,9 +63,28 @@ bool TestHint::hasExpectedServerError(int error)
return std::find(server_errors.begin(), server_errors.end(), error) != server_errors.end();
}
bool TestHint::needRetry(const std::unique_ptr<Exception> & server_exception, size_t * retries_counter)
{
chassert(retries_counter);
if (max_retries <= *retries_counter)
return false;
++*retries_counter;
int error = ErrorCodes::OK;
if (server_exception)
error = server_exception->code();
if (retry_until)
return !hasExpectedServerError(error); /// retry until we get the expected error
else
return hasExpectedServerError(error); /// retry while we have the expected error
}
void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint)
{
std::unordered_set<std::string_view> commands{"echo", "echoOn", "echoOff"};
std::unordered_set<std::string_view> commands{"echo", "echoOn", "echoOff", "retry"};
std::unordered_set<std::string_view> command_errors{
"serverError",
@ -73,6 +93,9 @@ void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint)
for (Token token = comment_lexer.nextToken(); !token.isEnd(); token = comment_lexer.nextToken())
{
if (token.type == TokenType::Whitespace)
continue;
String item = String(token.begin, token.end);
if (token.type == TokenType::BareWord && commands.contains(item))
{
@ -82,6 +105,30 @@ void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint)
echo.emplace(true);
if (item == "echoOff")
echo.emplace(false);
if (item == "retry")
{
token = comment_lexer.nextToken();
while (token.type == TokenType::Whitespace)
token = comment_lexer.nextToken();
if (token.type != TokenType::Number)
throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_TEXT, "Could not parse the number of retries: {}",
std::string_view(token.begin, token.end));
max_retries = std::stoul(std::string(token.begin, token.end));
token = comment_lexer.nextToken();
while (token.type == TokenType::Whitespace)
token = comment_lexer.nextToken();
if (token.type != TokenType::BareWord ||
(std::string_view(token.begin, token.end) != "until" &&
std::string_view(token.begin, token.end) != "while"))
throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_TEXT, "Expected 'until' or 'while' after the number of retries, got: {}",
std::string_view(token.begin, token.end));
retry_until = std::string_view(token.begin, token.end) == "until";
}
}
else if (!is_leading_hint && token.type == TokenType::BareWord && command_errors.contains(item))
{
@ -133,6 +180,9 @@ void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint)
break;
}
}
if (max_retries && server_errors.size() != 1)
throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_TEXT, "Expected one serverError after the 'retry N while|until' command");
}
}

View File

@ -6,6 +6,7 @@
#include <fmt/format.h>
#include <Core/Types.h>
#include <Common/Exception.h>
namespace DB
@ -65,12 +66,17 @@ public:
bool hasExpectedClientError(int error);
bool hasExpectedServerError(int error);
bool needRetry(const std::unique_ptr<Exception> & server_exception, size_t * retries_counter);
private:
const String & query;
ErrorVector server_errors{};
ErrorVector client_errors{};
std::optional<bool> echo;
size_t max_retries = 0;
bool retry_until = false;
void parse(Lexer & comment_lexer, bool is_leading_hint);
bool allErrorsExpected(int actual_server_error, int actual_client_error) const

View File

@ -267,7 +267,7 @@ bool ColumnAggregateFunction::structureEquals(const IColumn & to) const
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start, size_t length)
#else
void ColumnAggregateFunction::doInsertRangeFrom(const IColumn & from, size_t start, size_t length)
@ -366,13 +366,10 @@ void ColumnAggregateFunction::updateHashWithValue(size_t n, SipHash & hash) cons
hash.update(wbuf.str().c_str(), wbuf.str().size());
}
void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnAggregateFunction::getWeakHash32() const
{
auto s = data.size();
if (hash.getData().size() != data.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", std::to_string(s), hash.getData().size());
WeakHash32 hash(s);
auto & hash_data = hash.getData();
std::vector<UInt8> v;
@ -383,6 +380,8 @@ void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const
wbuf.finalize();
hash_data[i] = ::updateWeakHash32(v.data(), v.size(), hash_data[i]);
}
return hash;
}
void ColumnAggregateFunction::updateHashFast(SipHash & hash) const
@ -466,7 +465,7 @@ void ColumnAggregateFunction::insertFromWithOwnership(const IColumn & from, size
insertMergeFrom(from, n);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n)
#else
void ColumnAggregateFunction::doInsertFrom(const IColumn & from, size_t n)

View File

@ -145,7 +145,7 @@ public:
void insertData(const char * pos, size_t length) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertFrom(const IColumn & from, size_t n) override;
#else
using IColumn::insertFrom;
@ -177,7 +177,7 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
WeakHash32 getWeakHash32() const override;
void updateHashFast(SipHash & hash) const override;
@ -189,7 +189,7 @@ public:
void protect() override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & from, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & from, size_t start, size_t length) override;
@ -212,7 +212,7 @@ public:
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int compareAt(size_t, size_t, const IColumn &, int) const override
#else
int doCompareAt(size_t, size_t, const IColumn &, int) const override

View File

@ -271,15 +271,12 @@ void ColumnArray::updateHashWithValue(size_t n, SipHash & hash) const
getData().updateHashWithValue(offset + i, hash);
}
void ColumnArray::updateWeakHash32(WeakHash32 & hash) const
WeakHash32 ColumnArray::getWeakHash32() const
{
auto s = offsets->size();
if (hash.getData().size() != s)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
"column size is {}, hash size is {}", s, hash.getData().size());
WeakHash32 hash(s);
WeakHash32 internal_hash(data->size());
data->updateWeakHash32(internal_hash);
WeakHash32 internal_hash = data->getWeakHash32();
Offset prev_offset = 0;
const auto & offsets_data = getOffsets();
@ -300,6 +297,8 @@ void ColumnArray::updateWeakHash32(WeakHash32 & hash) const
prev_offset = offsets_data[i];
}
return hash;
}
void ColumnArray::updateHashFast(SipHash & hash) const
@ -337,7 +336,7 @@ bool ColumnArray::tryInsert(const Field & x)
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnArray::insertFrom(const IColumn & src_, size_t n)
#else
void ColumnArray::doInsertFrom(const IColumn & src_, size_t n)
@ -396,7 +395,7 @@ int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan
: 1);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
#else
int ColumnArray::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
@ -543,7 +542,7 @@ void ColumnArray::getExtremes(Field & min, Field & max) const
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnArray::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)

Some files were not shown because too many files have changed in this diff Show More