Merge remote-tracking branch 'ClickHouse/master' into icu-s390x

This commit is contained in:
Robert Schulze 2024-08-04 09:38:36 +00:00
commit ee8079b039
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
415 changed files with 4580 additions and 2646 deletions

View File

@ -0,0 +1,21 @@
name: CheckWorkflowResults
description: Check overall workflow status and post error to slack if any
inputs:
needs:
description: github needs context as a json string
required: true
type: string
runs:
using: "composite"
steps:
- name: Check Workflow
shell: bash
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ inputs.needs }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -1,168 +0,0 @@
name: Release
description: Makes patch releases and creates new release branch
inputs:
ref:
description: 'Git reference (branch or commit sha) from which to create the release'
required: true
type: string
type:
description: 'The type of release: "new" for a new release or "patch" for a patch release'
required: true
type: choice
options:
- patch
- new
dry-run:
description: 'Dry run'
required: false
default: true
type: boolean
token:
required: true
type: string
runs:
using: "composite"
steps:
- name: Prepare Release Info
shell: bash
run: |
python3 ./tests/ci/create_release.py --prepare-release-info \
--ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \
${{ inputs.dry-run && '--dry-run' || '' }}
echo "::group::Release Info"
python3 -m json.tool /tmp/release_info.json
echo "::endgroup::"
release_tag=$(jq -r '.release_tag' /tmp/release_info.json)
commit_sha=$(jq -r '.commit_sha' /tmp/release_info.json)
echo "Release Tag: $release_tag"
echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV"
echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV"
- name: Download All Release Artifacts
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --download-packages ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push Git Tag for the Release
shell: bash
run: |
python3 ./tests/ci/create_release.py --push-release-tag ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Push New Release Branch
if: ${{ inputs.type == 'new' }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --push-new-release-branch ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Bump CH Version and Update Contributors' List
shell: bash
run: |
python3 ./tests/ci/create_release.py --create-bump-version-pr ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Bump Docker versions, Changelog, Security
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
git checkout master
python3 ./tests/ci/create_release.py --set-progress-started --progress "update changelog, docker version, security"
echo "List versions"
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
echo "Update docker version"
./utils/list-versions/update-docker-version.sh
echo "Generate ChangeLog"
export CI=1
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \
--volume=".:/ClickHouse" clickhouse/style-test \
/ClickHouse/tests/ci/changelog.py -v --debug-helpers \
--gh-user-or-token=${{ inputs.token }} --jobs=5 \
--output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }}
git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md
echo "Generate Security"
python3 ./utils/security-generator/generate_security.py > SECURITY.md
git diff HEAD
- name: Create ChangeLog PR
if: ${{ inputs.type == 'patch' && ! inputs.dry-run }}
uses: peter-evans/create-pull-request@v6
with:
author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
token: ${{ inputs.token }}
committer: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
branch: auto/${{ env.RELEASE_TAG }}
assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher
delete-branch: true
title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }}
labels: do not test
body: |
Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
### Changelog category (leave one):
- Not for changelog (changelog entry is not required)
- name: Complete previous steps and Restore git state
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --set-progress-completed
git reset --hard HEAD
git checkout "$GITHUB_REF_NAME"
- name: Create GH Release
shell: bash
if: ${{ inputs.type == 'patch' }}
run: |
python3 ./tests/ci/create_release.py --create-gh-release ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export TGZ Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-tgz ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test TGZ Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-tgz ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export RPM Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-rpm ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test RPM Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-rpm ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Export Debian Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-debian ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Test Debian Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-debian ${{ inputs.dry-run && '--dry-run' || '' }}
- name: Docker clickhouse/clickhouse-server building
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
cd "./tests/ci"
python3 ./create_release.py --set-progress-started --progress "docker server release"
export CHECK_NAME="Docker server image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
python3 ./create_release.py --set-progress-completed
- name: Docker clickhouse/clickhouse-keeper building
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
cd "./tests/ci"
python3 ./create_release.py --set-progress-started --progress "docker keeper release"
export CHECK_NAME="Docker keeper image"
python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
python3 ./create_release.py --set-progress-completed
- name: Set current Release progress to Completed with OK
shell: bash
run: |
python3 ./tests/ci/create_release.py --set-progress-started --progress "completed"
python3 ./tests/ci/create_release.py --set-progress-completed
- name: Post Slack Message
if: ${{ !cancelled() }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --post-status ${{ inputs.dry-run && '--dry-run' || '' }}

View File

@ -1,111 +0,0 @@
name: AutoRelease
env:
PYTHONUNBUFFERED: 1
DRY_RUN: true
concurrency:
group: release
on: # yamllint disable-line rule:truthy
# Workflow uses a test bucket for packages and dry run mode (no real releases)
schedule:
- cron: '0 9 * * *'
- cron: '0 15 * * *'
workflow_dispatch:
inputs:
dry-run:
description: 'Dry run'
required: false
default: true
type: boolean
jobs:
AutoRelease:
runs-on: [self-hosted, release-maker]
steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
EOF
- name: Set DRY_RUN for schedule
if: ${{ github.event_name == 'schedule' }}
run: echo "DRY_RUN=true" >> "$GITHUB_ENV"
- name: Set DRY_RUN for dispatch
if: ${{ github.event_name == 'workflow_dispatch' }}
run: echo "DRY_RUN=${{ github.event.inputs.dry-run }}" >> "$GITHUB_ENV"
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
fetch-depth: 0
- name: Auto Release Prepare
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --prepare
echo "::group::Auto Release Info"
python3 -m json.tool /tmp/autorelease_info.json
echo "::endgroup::"
{
echo 'AUTO_RELEASE_PARAMS<<EOF'
cat /tmp/autorelease_info.json
echo 'EOF'
} >> "$GITHUB_ENV"
- name: Post Release Branch statuses
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --post-status
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0] && fromJson(env.AUTO_RELEASE_PARAMS).releases[0].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[1].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0] && fromJson(env.AUTO_RELEASE_PARAMS).releases[1].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[1].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2] && fromJson(env.AUTO_RELEASE_PARAMS).releases[2].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3] && fromJson(env.AUTO_RELEASE_PARAMS).releases[3].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4].release_branch }}
if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4] && fromJson(env.AUTO_RELEASE_PARAMS).releases[4].ready }}
uses: ./.github/actions/release
with:
ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4].commit_sha }}
type: patch
dry-run: ${{ env.DRY_RUN }}
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
- name: Post Slack Message
if: ${{ !cancelled() }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }}
- name: Clean up
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"

View File

@ -16,10 +16,15 @@ concurrency:
options: options:
- patch - patch
- new - new
only-repo:
description: 'Run only repos updates including docker (repo-recovery, tests)'
required: false
default: false
type: boolean
dry-run: dry-run:
description: 'Dry run' description: 'Dry run'
required: false required: false
default: true default: false
type: boolean type: boolean
jobs: jobs:
@ -35,10 +40,163 @@ jobs:
with: with:
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
fetch-depth: 0 fetch-depth: 0
- name: Call Release Action - name: Prepare Release Info
uses: ./.github/actions/release shell: bash
run: |
if [ ${{ inputs.only-repo }} == "true" ]; then
git tag -l ${{ inputs.ref }} || { echo "With only-repo option ref must be a valid release tag"; exit 1; }
fi
python3 ./tests/ci/create_release.py --prepare-release-info \
--ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \
${{ inputs.dry-run == true && '--dry-run' || '' }} \
${{ inputs.only-repo == true && '--skip-tag-check' || '' }}
echo "::group::Release Info"
python3 -m json.tool /tmp/release_info.json
echo "::endgroup::"
release_tag=$(jq -r '.release_tag' /tmp/release_info.json)
commit_sha=$(jq -r '.commit_sha' /tmp/release_info.json)
is_latest=$(jq -r '.latest' /tmp/release_info.json)
echo "Release Tag: $release_tag"
echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV"
echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV"
if [ "$is_latest" == "true" ]; then
echo "DOCKER_TAG_TYPE=release-latest" >> "$GITHUB_ENV"
else
echo "DOCKER_TAG_TYPE=release" >> "$GITHUB_ENV"
fi
- name: Download All Release Artifacts
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --download-packages ${{ inputs.dry-run == true && '--dry-run' || '' }}
- name: Push Git Tag for the Release
if: ${{ ! inputs.only-repo }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --push-release-tag ${{ inputs.dry-run == true && '--dry-run' || '' }}
- name: Push New Release Branch
if: ${{ inputs.type == 'new' && ! inputs.only-repo }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --push-new-release-branch ${{ inputs.dry-run == true && '--dry-run' || '' }}
- name: Bump CH Version and Update Contributors' List
if: ${{ ! inputs.only-repo }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --create-bump-version-pr ${{ inputs.dry-run == true && '--dry-run' || '' }}
- name: Bump Docker versions, Changelog, Security
if: ${{ inputs.type == 'patch' && ! inputs.only-repo }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --set-progress-started --progress "update changelog, docker version, security"
git checkout master # in case WF started from feature branch
echo "List versions"
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
echo "Update docker version"
./utils/list-versions/update-docker-version.sh
echo "Generate ChangeLog"
export CI=1
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \
--volume=".:/wd" --workdir="/wd" \
clickhouse/style-test \
./tests/ci/changelog.py -v --debug-helpers \
--jobs=5 \
--output="./docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }}
git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md
echo "Generate Security"
python3 ./utils/security-generator/generate_security.py > SECURITY.md
git diff HEAD
- name: Create ChangeLog PR
if: ${{ inputs.type == 'patch' && ! inputs.dry-run && ! inputs.only-repo }}
uses: peter-evans/create-pull-request@v6
with: with:
ref: ${{ inputs.ref }} author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
type: ${{ inputs.type }} token: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
dry-run: ${{ inputs.dry-run }} committer: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
branch: auto/${{ env.RELEASE_TAG }}
base: master
assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher
delete-branch: true
title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }}
labels: do not test
body: |
Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }}
### Changelog category (leave one):
- Not for changelog (changelog entry is not required)
- name: Complete previous steps and Restore git state
if: ${{ inputs.type == 'patch' && ! inputs.only-repo }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --set-progress-completed
git reset --hard HEAD
git checkout "$GITHUB_REF_NAME"
- name: Create GH Release
if: ${{ inputs.type == 'patch' && ! inputs.only-repo }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --create-gh-release ${{ inputs.dry-run == true && '--dry-run' || '' }}
- name: Export TGZ Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-tgz ${{ inputs.dry-run == true && '--dry-run' || '' }}
- name: Test TGZ Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-tgz ${{ inputs.dry-run == true && '--dry-run' || '' }}
- name: Export RPM Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-rpm ${{ inputs.dry-run == true && '--dry-run' || '' }}
- name: Test RPM Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-rpm ${{ inputs.dry-run == true && '--dry-run' || '' }}
- name: Export Debian Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --export-debian ${{ inputs.dry-run == true && '--dry-run' || '' }}
- name: Test Debian Packages
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
python3 ./tests/ci/artifactory.py --test-debian ${{ inputs.dry-run == true && '--dry-run' || '' }}
- name: Docker clickhouse/clickhouse-server building
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
cd "./tests/ci"
python3 ./create_release.py --set-progress-started --progress "docker server release"
export CHECK_NAME="Docker server image"
python3 docker_server.py --tag-type ${{ env.DOCKER_TAG_TYPE }} --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
python3 ./create_release.py --set-progress-completed
- name: Docker clickhouse/clickhouse-keeper building
if: ${{ inputs.type == 'patch' }}
shell: bash
run: |
cd "./tests/ci"
python3 ./create_release.py --set-progress-started --progress "docker keeper release"
export CHECK_NAME="Docker keeper image"
python3 docker_server.py --tag-type ${{ env.DOCKER_TAG_TYPE }} --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }}
python3 ./create_release.py --set-progress-completed
- name: Update release info. Merge created PRs
shell: bash
run: |
python3 ./tests/ci/create_release.py --merge-prs ${{ inputs.dry-run == true && '--dry-run' || '' }}
- name: Set current Release progress to Completed with OK
shell: bash
run: |
# dummy stage to finalize release info with "progress: completed; status: OK"
python3 ./tests/ci/create_release.py --set-progress-started --progress "completed"
python3 ./tests/ci/create_release.py --set-progress-completed
- name: Post Slack Message
if: ${{ !cancelled() }}
shell: bash
run: |
python3 ./tests/ci/create_release.py --post-status ${{ inputs.dry-run == true && '--dry-run' || '' }}

View File

@ -142,8 +142,13 @@ jobs:
# Reports should run even if Builds_1/2 fail - run them separately (not in Tests_1/2/3) # Reports should run even if Builds_1/2 fail - run them separately (not in Tests_1/2/3)
Builds_Report: Builds_Report:
# run report check for failed builds to indicate the CI error # run report check for failed builds to indicate the CI error
if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }} if: ${{ !cancelled()
needs: [RunConfig, StyleCheck, Builds_1, Builds_2] && needs.RunConfig.result == 'success'
&& needs.StyleCheck.result != 'failure'
&& needs.FastTest.result != 'failure'
&& needs.BuildDockers.result != 'failure'
&& contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }}
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2]
uses: ./.github/workflows/reusable_test.yml uses: ./.github/workflows/reusable_test.yml
with: with:
test_name: Builds test_name: Builds
@ -167,12 +172,9 @@ jobs:
cd "$GITHUB_WORKSPACE/tests/ci" cd "$GITHUB_WORKSPACE/tests/ci"
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
- name: Check Workflow results - name: Check Workflow results
run: | uses: ./.github/actions/check_workflow
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" with:
cat > "$WORKFLOW_RESULT_FILE" << 'EOF' needs: ${{ toJson(needs) }}
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status
################################# Stage Final ################################# ################################# Stage Final #################################
# #

View File

@ -1,69 +0,0 @@
name: PublishedReleaseCI
# - Gets artifacts from S3
# - Sends it to JFROG Artifactory
# - Adds them to the release assets
on: # yamllint disable-line rule:truthy
release:
types:
- published
workflow_dispatch:
inputs:
tag:
description: 'Release tag'
required: true
type: string
jobs:
ReleasePublish:
runs-on: [self-hosted, style-checker]
steps:
- name: Set tag from input
if: github.event_name == 'workflow_dispatch'
run: |
echo "GITHUB_TAG=${{ github.event.inputs.tag }}" >> "$GITHUB_ENV"
- name: Set tag from REF
if: github.event_name == 'release'
run: |
echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV"
- name: Deploy packages and assets
run: |
curl --silent --data '' --no-buffer \
'${{ secrets.PACKAGES_RELEASE_URL }}/release/'"${GITHUB_TAG}"'?binary=binary_darwin&binary=binary_darwin_aarch64&sync=true'
############################################################################################
##################################### Docker images #######################################
############################################################################################
DockerServerImages:
runs-on: [self-hosted, style-checker]
steps:
- name: Set tag from input
if: github.event_name == 'workflow_dispatch'
run: |
echo "GITHUB_TAG=${{ github.event.inputs.tag }}" >> "$GITHUB_ENV"
- name: Set tag from REF
if: github.event_name == 'release'
run: |
echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV"
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
fetch-depth: 0 # otherwise we will have no version info
filter: tree:0
ref: ${{ env.GITHUB_TAG }}
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
export CHECK_NAME="Docker server image"
python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --check-name "$CHECK_NAME" --push
- name: Check docker clickhouse/clickhouse-keeper building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
export CHECK_NAME="Docker keeper image"
python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --check-name "$CHECK_NAME" --push
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"

View File

@ -1,74 +0,0 @@
name: TagsStableWorkflow
# - Gets artifacts from S3
# - Sends it to JFROG Artifactory
# - Adds them to the release assets
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
on: # yamllint disable-line rule:truthy
push:
tags:
- 'v*-prestable'
- 'v*-stable'
- 'v*-lts'
workflow_dispatch:
inputs:
tag:
description: 'Test tag'
required: true
type: string
jobs:
UpdateVersions:
runs-on: [self-hosted, style-checker]
steps:
- name: Set test tag
if: github.event_name == 'workflow_dispatch'
run: |
echo "GITHUB_TAG=${{ github.event.inputs.tag }}" >> "$GITHUB_ENV"
- name: Get tag name
if: github.event_name != 'workflow_dispatch'
run: |
echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV"
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
ref: master
fetch-depth: 0
filter: tree:0
- name: Update versions, docker version, changelog, security
env:
GITHUB_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
run: |
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
./utils/list-versions/update-docker-version.sh
GID=$(id -g "${UID}")
# --network=host and CI=1 are required for the S3 access from a container
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \
--volume="${GITHUB_WORKSPACE}:/ClickHouse" clickhouse/style-test \
/ClickHouse/tests/ci/changelog.py -v --debug-helpers \
--gh-user-or-token="$GITHUB_TOKEN" --jobs=5 \
--output="/ClickHouse/docs/changelogs/${GITHUB_TAG}.md" "${GITHUB_TAG}"
git add "./docs/changelogs/${GITHUB_TAG}.md"
python3 ./utils/security-generator/generate_security.py > SECURITY.md
git diff HEAD
- name: Create Pull Request
uses: peter-evans/create-pull-request@v6
with:
author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
token: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
committer: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
commit-message: Update version_date.tsv and changelogs after ${{ env.GITHUB_TAG }}
branch: auto/${{ env.GITHUB_TAG }}
assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher
delete-branch: true
title: Update version_date.tsv and changelogs after ${{ env.GITHUB_TAG }}
labels: do not test
body: |
Update version_date.tsv and changelogs after ${{ env.GITHUB_TAG }}
### Changelog category (leave one):
- Not for changelog (changelog entry is not required)

View File

@ -22,7 +22,6 @@
#### New Feature #### New Feature
* Add `ASOF JOIN` support for `full_sorting_join` algorithm. [#55051](https://github.com/ClickHouse/ClickHouse/pull/55051) ([vdimir](https://github.com/vdimir)). * Add `ASOF JOIN` support for `full_sorting_join` algorithm. [#55051](https://github.com/ClickHouse/ClickHouse/pull/55051) ([vdimir](https://github.com/vdimir)).
* Add new window function `percent_rank`. [#62747](https://github.com/ClickHouse/ClickHouse/pull/62747) ([lgbo](https://github.com/lgbo-ustc)).
* Support JWT authentication in `clickhouse-client` (will be available only in ClickHouse Cloud). [#62829](https://github.com/ClickHouse/ClickHouse/pull/62829) ([Konstantin Bogdanov](https://github.com/thevar1able)). * Support JWT authentication in `clickhouse-client` (will be available only in ClickHouse Cloud). [#62829](https://github.com/ClickHouse/ClickHouse/pull/62829) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Add SQL functions `changeYear`, `changeMonth`, `changeDay`, `changeHour`, `changeMinute`, `changeSecond`. For example, `SELECT changeMonth(toDate('2024-06-14'), 7)` returns date `2024-07-14`. [#63186](https://github.com/ClickHouse/ClickHouse/pull/63186) ([cucumber95](https://github.com/cucumber95)). * Add SQL functions `changeYear`, `changeMonth`, `changeDay`, `changeHour`, `changeMinute`, `changeSecond`. For example, `SELECT changeMonth(toDate('2024-06-14'), 7)` returns date `2024-07-14`. [#63186](https://github.com/ClickHouse/ClickHouse/pull/63186) ([cucumber95](https://github.com/cucumber95)).
* Introduce startup scripts, which allow the execution of preconfigured queries at the startup stage. [#64889](https://github.com/ClickHouse/ClickHouse/pull/64889) ([pufit](https://github.com/pufit)). * Introduce startup scripts, which allow the execution of preconfigured queries at the startup stage. [#64889](https://github.com/ClickHouse/ClickHouse/pull/64889) ([pufit](https://github.com/pufit)).

View File

@ -34,17 +34,13 @@ curl https://clickhouse.com/ | sh
Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know. Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know.
* [v24.7 Community Call](https://clickhouse.com/company/events/v24-7-community-release-call) - Jul 30 * [v24.8 Community Call](https://clickhouse.com/company/events/v24-8-community-release-call) - August 29
## Upcoming Events ## Upcoming Events
Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.
* [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/300783448/) - Jul 9 * MORE COMING SOON!
* [ClickHouse Cloud - Live Update Call](https://clickhouse.com/company/events/202407-cloud-update-live) - Jul 9
* [ClickHouse Meetup @ Ramp - New York City](https://www.meetup.com/clickhouse-new-york-user-group/events/300595845/) - Jul 9
* [AWS Summit in New York](https://clickhouse.com/company/events/2024-07-awssummit-nyc) - Jul 10
* [ClickHouse Meetup @ Klaviyo - Boston](https://www.meetup.com/clickhouse-boston-user-group/events/300907870) - Jul 11
## Recent Recordings ## Recent Recordings
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"

View File

@ -57,7 +57,8 @@ option(WITH_COVERAGE "Instrumentation for code coverage with default implementat
if (WITH_COVERAGE) if (WITH_COVERAGE)
message (STATUS "Enabled instrumentation for code coverage") message (STATUS "Enabled instrumentation for code coverage")
set(COVERAGE_FLAGS "-fprofile-instr-generate -fcoverage-mapping") set(COVERAGE_FLAGS "SHELL:-fprofile-instr-generate -fcoverage-mapping")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
endif() endif()
option (SANITIZE_COVERAGE "Instrumentation for code coverage with custom callbacks" OFF) option (SANITIZE_COVERAGE "Instrumentation for code coverage with custom callbacks" OFF)

@ -1 +1 @@
Subproject commit a304ec48dcf15d942607032151f7e9ee504b5dcf Subproject commit 1f95f8083066f5b38fd2db172e7e7f9aa7c49d2d

View File

@ -26,7 +26,6 @@ sed -i '/onBrokenMarkdownLinks:/ s/ignore/error/g' docusaurus.config.js
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
export CI=true export CI=true
yarn install
exec yarn build "$@" exec yarn build "$@"
fi fi

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc # lts / testing / prestable / etc
ARG REPO_CHANNEL="stable" ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.7.1.2915" ARG VERSION="24.7.2.13"
ARG PACKAGES="clickhouse-keeper" ARG PACKAGES="clickhouse-keeper"
ARG DIRECT_DOWNLOAD_URLS="" ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc # lts / testing / prestable / etc
ARG REPO_CHANNEL="stable" ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.7.1.2915" ARG VERSION="24.7.2.13"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
ARG DIRECT_DOWNLOAD_URLS="" ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable" ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="24.7.1.2915" ARG VERSION="24.7.2.13"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
#docker-official-library:off #docker-official-library:off

View File

@ -0,0 +1,35 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v23.8.16.40-lts (e143a9039ba) FIXME as compared to v23.8.15.35-lts (060ff8e813a)
#### Improvement
* Backported in [#66962](https://github.com/ClickHouse/ClickHouse/issues/66962): Added support for parameterized view with analyzer to not analyze create parameterized view. Refactor existing parameterized view logic to not analyze create parameterized view. [#54211](https://github.com/ClickHouse/ClickHouse/pull/54211) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Backported in [#65461](https://github.com/ClickHouse/ClickHouse/issues/65461): Reload certificate chain during certificate reload. [#61671](https://github.com/ClickHouse/ClickHouse/pull/61671) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)).
* Backported in [#65880](https://github.com/ClickHouse/ClickHouse/issues/65880): Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)).
* Backported in [#65912](https://github.com/ClickHouse/ClickHouse/issues/65912): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)).
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
* Backported in [#65281](https://github.com/ClickHouse/ClickHouse/issues/65281): Fix crash with UniqInjectiveFunctionsEliminationPass and uniqCombined. [#65188](https://github.com/ClickHouse/ClickHouse/pull/65188) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#65368](https://github.com/ClickHouse/ClickHouse/issues/65368): Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)).
* Backported in [#65743](https://github.com/ClickHouse/ClickHouse/issues/65743): Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#65351](https://github.com/ClickHouse/ClickHouse/issues/65351): Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#66037](https://github.com/ClickHouse/ClickHouse/issues/66037): Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)).
* Backported in [#65782](https://github.com/ClickHouse/ClickHouse/issues/65782): Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#65926](https://github.com/ClickHouse/ClickHouse/issues/65926): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#65822](https://github.com/ClickHouse/ClickHouse/issues/65822): Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)).
* Backported in [#66449](https://github.com/ClickHouse/ClickHouse/issues/66449): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#66717](https://github.com/ClickHouse/ClickHouse/issues/66717): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#65080](https://github.com/ClickHouse/ClickHouse/issues/65080): Follow up to [#56541](https://github.com/ClickHouse/ClickHouse/issues/56541). [#57141](https://github.com/ClickHouse/ClickHouse/pull/57141) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#65913](https://github.com/ClickHouse/ClickHouse/issues/65913): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#66853](https://github.com/ClickHouse/ClickHouse/issues/66853): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)).

View File

@ -0,0 +1,40 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.3.5.46-lts (fe54cead6b6) FIXME as compared to v24.3.4.147-lts (31a7bdc346d)
#### Improvement
* Backported in [#65463](https://github.com/ClickHouse/ClickHouse/issues/65463): Reload certificate chain during certificate reload. [#61671](https://github.com/ClickHouse/ClickHouse/pull/61671) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)).
* Backported in [#65882](https://github.com/ClickHouse/ClickHouse/issues/65882): Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)).
* Backported in [#65302](https://github.com/ClickHouse/ClickHouse/issues/65302): Returned back the behaviour of how ClickHouse works and interprets Tuples in CSV format. This change effectively reverts https://github.com/ClickHouse/ClickHouse/pull/60994 and makes it available only under a few settings: `output_format_csv_serialize_tuple_into_separate_columns`, `input_format_csv_deserialize_separate_columns_into_tuple` and `input_format_csv_try_infer_strings_from_quoted_tuples`. [#65170](https://github.com/ClickHouse/ClickHouse/pull/65170) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Backported in [#65892](https://github.com/ClickHouse/ClickHouse/issues/65892): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)).
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
* Backported in [#65283](https://github.com/ClickHouse/ClickHouse/issues/65283): Fix crash with UniqInjectiveFunctionsEliminationPass and uniqCombined. [#65188](https://github.com/ClickHouse/ClickHouse/pull/65188) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#65370](https://github.com/ClickHouse/ClickHouse/issues/65370): Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)).
* Backported in [#65446](https://github.com/ClickHouse/ClickHouse/issues/65446): Use correct memory alignment for Distinct combinator. Previously, crash could happen because of invalid memory allocation when the combinator was used. [#65379](https://github.com/ClickHouse/ClickHouse/pull/65379) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#65708](https://github.com/ClickHouse/ClickHouse/issues/65708): Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#65352](https://github.com/ClickHouse/ClickHouse/issues/65352): Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#65327](https://github.com/ClickHouse/ClickHouse/issues/65327): Fix the crash loop when restoring from backup is blocked by creating an MV with a definer that hasn't been restored yet. [#64595](https://github.com/ClickHouse/ClickHouse/pull/64595) ([pufit](https://github.com/pufit)).
* Backported in [#65538](https://github.com/ClickHouse/ClickHouse/issues/65538): Fix crash for `ALTER TABLE ... ON CLUSTER ... MODIFY SQL SECURITY`. [#64957](https://github.com/ClickHouse/ClickHouse/pull/64957) ([pufit](https://github.com/pufit)).
* Backported in [#65576](https://github.com/ClickHouse/ClickHouse/issues/65576): Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)).
* Backported in [#65159](https://github.com/ClickHouse/ClickHouse/issues/65159): Fix pushing arithmetic operations out of aggregation. In the new analyzer, optimization was applied only once. [#65104](https://github.com/ClickHouse/ClickHouse/pull/65104) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#65615](https://github.com/ClickHouse/ClickHouse/issues/65615): Fix aggregate function name rewriting in the new analyzer. [#65110](https://github.com/ClickHouse/ClickHouse/pull/65110) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#65728](https://github.com/ClickHouse/ClickHouse/issues/65728): Eliminate injective function in argument of functions `uniq*` recursively. This used to work correctly but was broken in the new analyzer. [#65140](https://github.com/ClickHouse/ClickHouse/pull/65140) ([Duc Canh Le](https://github.com/canhld94)).
* Backported in [#65261](https://github.com/ClickHouse/ClickHouse/issues/65261): Fix the bug in Hashed and Hashed_Array dictionary short circuit evaluation, which may read uninitialized number, leading to various errors. [#65256](https://github.com/ClickHouse/ClickHouse/pull/65256) ([jsc0218](https://github.com/jsc0218)).
* Backported in [#65667](https://github.com/ClickHouse/ClickHouse/issues/65667): Disable `non-intersecting-parts` optimization for queries with `FINAL` in case of `read-in-order` optimization was enabled. This could lead to an incorrect query result. As a workaround, disable `do_not_merge_across_partitions_select_final` and `split_parts_ranges_into_intersecting_and_non_intersecting_final` before this fix is merged. [#65505](https://github.com/ClickHouse/ClickHouse/pull/65505) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#65784](https://github.com/ClickHouse/ClickHouse/issues/65784): Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#65929](https://github.com/ClickHouse/ClickHouse/issues/65929): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#65824](https://github.com/ClickHouse/ClickHouse/issues/65824): Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#65223](https://github.com/ClickHouse/ClickHouse/issues/65223): Capture weak_ptr of ContextAccess for safety. [#65051](https://github.com/ClickHouse/ClickHouse/pull/65051) ([Alexander Gololobov](https://github.com/davenger)).
* Backported in [#65901](https://github.com/ClickHouse/ClickHouse/issues/65901): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)).

View File

@ -0,0 +1,39 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.3.6.48-lts (b2d33c3c45d) FIXME as compared to v24.3.5.46-lts (fe54cead6b6)
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
* Backported in [#66889](https://github.com/ClickHouse/ClickHouse/issues/66889): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#66687](https://github.com/ClickHouse/ClickHouse/issues/66687): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)).
* Backported in [#67497](https://github.com/ClickHouse/ClickHouse/issues/67497): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#66324](https://github.com/ClickHouse/ClickHouse/issues/66324): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#66151](https://github.com/ClickHouse/ClickHouse/issues/66151): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#66451](https://github.com/ClickHouse/ClickHouse/issues/66451): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#66222](https://github.com/ClickHouse/ClickHouse/issues/66222): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#66676](https://github.com/ClickHouse/ClickHouse/issues/66676): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#66602](https://github.com/ClickHouse/ClickHouse/issues/66602): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)).
* Backported in [#66356](https://github.com/ClickHouse/ClickHouse/issues/66356): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66970](https://github.com/ClickHouse/ClickHouse/issues/66970): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66967](https://github.com/ClickHouse/ClickHouse/issues/66967): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66718](https://github.com/ClickHouse/ClickHouse/issues/66718): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#66949](https://github.com/ClickHouse/ClickHouse/issues/66949): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66946](https://github.com/ClickHouse/ClickHouse/issues/66946): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#67629](https://github.com/ClickHouse/ClickHouse/issues/67629): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)).
* Backported in [#67193](https://github.com/ClickHouse/ClickHouse/issues/67193): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#67375](https://github.com/ClickHouse/ClickHouse/issues/67375): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#67572](https://github.com/ClickHouse/ClickHouse/issues/67572): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#66422](https://github.com/ClickHouse/ClickHouse/issues/66422): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66855](https://github.com/ClickHouse/ClickHouse/issues/66855): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)).
* Backported in [#67055](https://github.com/ClickHouse/ClickHouse/issues/67055): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)).
* Backported in [#66943](https://github.com/ClickHouse/ClickHouse/issues/66943): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)).

View File

@ -0,0 +1,70 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.4.4.107-stable (af0ed6b197e) FIXME as compared to v24.4.3.25-stable (a915dd4eda4)
#### Improvement
* Backported in [#65884](https://github.com/ClickHouse/ClickHouse/issues/65884): Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)).
* Backported in [#65303](https://github.com/ClickHouse/ClickHouse/issues/65303): Returned back the behaviour of how ClickHouse works and interprets Tuples in CSV format. This change effectively reverts https://github.com/ClickHouse/ClickHouse/pull/60994 and makes it available only under a few settings: `output_format_csv_serialize_tuple_into_separate_columns`, `input_format_csv_deserialize_separate_columns_into_tuple` and `input_format_csv_try_infer_strings_from_quoted_tuples`. [#65170](https://github.com/ClickHouse/ClickHouse/pull/65170) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Backported in [#65894](https://github.com/ClickHouse/ClickHouse/issues/65894): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)).
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
* Backported in [#65372](https://github.com/ClickHouse/ClickHouse/issues/65372): Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)).
* Backported in [#66883](https://github.com/ClickHouse/ClickHouse/issues/66883): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#65435](https://github.com/ClickHouse/ClickHouse/issues/65435): Forbid `QUALIFY` clause in the old analyzer. The old analyzer ignored `QUALIFY`, so it could lead to unexpected data removal in mutations. [#65356](https://github.com/ClickHouse/ClickHouse/pull/65356) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#65448](https://github.com/ClickHouse/ClickHouse/issues/65448): Use correct memory alignment for Distinct combinator. Previously, crash could happen because of invalid memory allocation when the combinator was used. [#65379](https://github.com/ClickHouse/ClickHouse/pull/65379) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#65710](https://github.com/ClickHouse/ClickHouse/issues/65710): Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#66689](https://github.com/ClickHouse/ClickHouse/issues/66689): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#65353](https://github.com/ClickHouse/ClickHouse/issues/65353): Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#65060](https://github.com/ClickHouse/ClickHouse/issues/65060): Fix the `Expression nodes list expected 1 projection names` and `Unknown expression or identifier` errors for queries with aliases to `GLOBAL IN.`. [#64517](https://github.com/ClickHouse/ClickHouse/pull/64517) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#65329](https://github.com/ClickHouse/ClickHouse/issues/65329): Fix the crash loop when restoring from backup is blocked by creating an MV with a definer that hasn't been restored yet. [#64595](https://github.com/ClickHouse/ClickHouse/pull/64595) ([pufit](https://github.com/pufit)).
* Backported in [#64833](https://github.com/ClickHouse/ClickHouse/issues/64833): Fix bug which could lead to non-working TTLs with expressions. [#64694](https://github.com/ClickHouse/ClickHouse/pull/64694) ([alesapin](https://github.com/alesapin)).
* Backported in [#65086](https://github.com/ClickHouse/ClickHouse/issues/65086): Fix removing the `WHERE` and `PREWHERE` expressions, which are always true (for the new analyzer). [#64695](https://github.com/ClickHouse/ClickHouse/pull/64695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#65540](https://github.com/ClickHouse/ClickHouse/issues/65540): Fix crash for `ALTER TABLE ... ON CLUSTER ... MODIFY SQL SECURITY`. [#64957](https://github.com/ClickHouse/ClickHouse/pull/64957) ([pufit](https://github.com/pufit)).
* Backported in [#65578](https://github.com/ClickHouse/ClickHouse/issues/65578): Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)).
* Backported in [#65161](https://github.com/ClickHouse/ClickHouse/issues/65161): Fix pushing arithmetic operations out of aggregation. In the new analyzer, optimization was applied only once. [#65104](https://github.com/ClickHouse/ClickHouse/pull/65104) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#65616](https://github.com/ClickHouse/ClickHouse/issues/65616): Fix aggregate function name rewriting in the new analyzer. [#65110](https://github.com/ClickHouse/ClickHouse/pull/65110) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#65730](https://github.com/ClickHouse/ClickHouse/issues/65730): Eliminate injective function in argument of functions `uniq*` recursively. This used to work correctly but was broken in the new analyzer. [#65140](https://github.com/ClickHouse/ClickHouse/pull/65140) ([Duc Canh Le](https://github.com/canhld94)).
* Backported in [#65668](https://github.com/ClickHouse/ClickHouse/issues/65668): Disable `non-intersecting-parts` optimization for queries with `FINAL` in case of `read-in-order` optimization was enabled. This could lead to an incorrect query result. As a workaround, disable `do_not_merge_across_partitions_select_final` and `split_parts_ranges_into_intersecting_and_non_intersecting_final` before this fix is merged. [#65505](https://github.com/ClickHouse/ClickHouse/pull/65505) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#65786](https://github.com/ClickHouse/ClickHouse/issues/65786): Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#65810](https://github.com/ClickHouse/ClickHouse/issues/65810): Fix invalid exceptions in function `parseDateTime` with `%F` and `%D` placeholders. [#65768](https://github.com/ClickHouse/ClickHouse/pull/65768) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#65931](https://github.com/ClickHouse/ClickHouse/issues/65931): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#65826](https://github.com/ClickHouse/ClickHouse/issues/65826): Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)).
* Backported in [#66299](https://github.com/ClickHouse/ClickHouse/issues/66299): Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)).
* Backported in [#66326](https://github.com/ClickHouse/ClickHouse/issues/66326): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#66153](https://github.com/ClickHouse/ClickHouse/issues/66153): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#66459](https://github.com/ClickHouse/ClickHouse/issues/66459): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#66224](https://github.com/ClickHouse/ClickHouse/issues/66224): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#66267](https://github.com/ClickHouse/ClickHouse/issues/66267): Don't throw `TIMEOUT_EXCEEDED` for `none_only_active` mode of `distributed_ddl_output_mode`. [#66218](https://github.com/ClickHouse/ClickHouse/pull/66218) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#66678](https://github.com/ClickHouse/ClickHouse/issues/66678): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#66603](https://github.com/ClickHouse/ClickHouse/issues/66603): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)).
* Backported in [#66358](https://github.com/ClickHouse/ClickHouse/issues/66358): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66971](https://github.com/ClickHouse/ClickHouse/issues/66971): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66968](https://github.com/ClickHouse/ClickHouse/issues/66968): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66719](https://github.com/ClickHouse/ClickHouse/issues/66719): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#66950](https://github.com/ClickHouse/ClickHouse/issues/66950): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66947](https://github.com/ClickHouse/ClickHouse/issues/66947): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#67195](https://github.com/ClickHouse/ClickHouse/issues/67195): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#67377](https://github.com/ClickHouse/ClickHouse/issues/67377): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#67240](https://github.com/ClickHouse/ClickHouse/issues/67240): This closes [#67156](https://github.com/ClickHouse/ClickHouse/issues/67156). This closes [#66447](https://github.com/ClickHouse/ClickHouse/issues/66447). The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/62907. [#67178](https://github.com/ClickHouse/ClickHouse/pull/67178) ([Maksim Kita](https://github.com/kitaisreal)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#65410](https://github.com/ClickHouse/ClickHouse/issues/65410): Re-enable OpenSSL session caching. [#65111](https://github.com/ClickHouse/ClickHouse/pull/65111) ([Robert Schulze](https://github.com/rschu1ze)).
* Backported in [#65903](https://github.com/ClickHouse/ClickHouse/issues/65903): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#66385](https://github.com/ClickHouse/ClickHouse/issues/66385): Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)).
* Backported in [#66424](https://github.com/ClickHouse/ClickHouse/issues/66424): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66542](https://github.com/ClickHouse/ClickHouse/issues/66542): Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#66857](https://github.com/ClickHouse/ClickHouse/issues/66857): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)).
* Backported in [#66873](https://github.com/ClickHouse/ClickHouse/issues/66873): Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)).
* Backported in [#67057](https://github.com/ClickHouse/ClickHouse/issues/67057): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)).
* Backported in [#66944](https://github.com/ClickHouse/ClickHouse/issues/66944): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#67250](https://github.com/ClickHouse/ClickHouse/issues/67250): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)).
* Backported in [#67410](https://github.com/ClickHouse/ClickHouse/issues/67410): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)).

View File

@ -0,0 +1,73 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.4.4.113-stable (d63a54957bd) FIXME as compared to v24.4.3.25-stable (a915dd4eda4)
#### Improvement
* Backported in [#65884](https://github.com/ClickHouse/ClickHouse/issues/65884): Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)).
* Backported in [#65303](https://github.com/ClickHouse/ClickHouse/issues/65303): Returned back the behaviour of how ClickHouse works and interprets Tuples in CSV format. This change effectively reverts https://github.com/ClickHouse/ClickHouse/pull/60994 and makes it available only under a few settings: `output_format_csv_serialize_tuple_into_separate_columns`, `input_format_csv_deserialize_separate_columns_into_tuple` and `input_format_csv_try_infer_strings_from_quoted_tuples`. [#65170](https://github.com/ClickHouse/ClickHouse/pull/65170) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Backported in [#65894](https://github.com/ClickHouse/ClickHouse/issues/65894): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)).
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
* Backported in [#65372](https://github.com/ClickHouse/ClickHouse/issues/65372): Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)).
* Backported in [#66883](https://github.com/ClickHouse/ClickHouse/issues/66883): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#65435](https://github.com/ClickHouse/ClickHouse/issues/65435): Forbid `QUALIFY` clause in the old analyzer. The old analyzer ignored `QUALIFY`, so it could lead to unexpected data removal in mutations. [#65356](https://github.com/ClickHouse/ClickHouse/pull/65356) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#65448](https://github.com/ClickHouse/ClickHouse/issues/65448): Use correct memory alignment for Distinct combinator. Previously, crash could happen because of invalid memory allocation when the combinator was used. [#65379](https://github.com/ClickHouse/ClickHouse/pull/65379) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#65710](https://github.com/ClickHouse/ClickHouse/issues/65710): Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#66689](https://github.com/ClickHouse/ClickHouse/issues/66689): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)).
* Backported in [#67499](https://github.com/ClickHouse/ClickHouse/issues/67499): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#65353](https://github.com/ClickHouse/ClickHouse/issues/65353): Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#65060](https://github.com/ClickHouse/ClickHouse/issues/65060): Fix the `Expression nodes list expected 1 projection names` and `Unknown expression or identifier` errors for queries with aliases to `GLOBAL IN.`. [#64517](https://github.com/ClickHouse/ClickHouse/pull/64517) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#65329](https://github.com/ClickHouse/ClickHouse/issues/65329): Fix the crash loop when restoring from backup is blocked by creating an MV with a definer that hasn't been restored yet. [#64595](https://github.com/ClickHouse/ClickHouse/pull/64595) ([pufit](https://github.com/pufit)).
* Backported in [#64833](https://github.com/ClickHouse/ClickHouse/issues/64833): Fix bug which could lead to non-working TTLs with expressions. [#64694](https://github.com/ClickHouse/ClickHouse/pull/64694) ([alesapin](https://github.com/alesapin)).
* Backported in [#65086](https://github.com/ClickHouse/ClickHouse/issues/65086): Fix removing the `WHERE` and `PREWHERE` expressions, which are always true (for the new analyzer). [#64695](https://github.com/ClickHouse/ClickHouse/pull/64695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#65540](https://github.com/ClickHouse/ClickHouse/issues/65540): Fix crash for `ALTER TABLE ... ON CLUSTER ... MODIFY SQL SECURITY`. [#64957](https://github.com/ClickHouse/ClickHouse/pull/64957) ([pufit](https://github.com/pufit)).
* Backported in [#65578](https://github.com/ClickHouse/ClickHouse/issues/65578): Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)).
* Backported in [#65161](https://github.com/ClickHouse/ClickHouse/issues/65161): Fix pushing arithmetic operations out of aggregation. In the new analyzer, optimization was applied only once. [#65104](https://github.com/ClickHouse/ClickHouse/pull/65104) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#65616](https://github.com/ClickHouse/ClickHouse/issues/65616): Fix aggregate function name rewriting in the new analyzer. [#65110](https://github.com/ClickHouse/ClickHouse/pull/65110) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#65730](https://github.com/ClickHouse/ClickHouse/issues/65730): Eliminate injective function in argument of functions `uniq*` recursively. This used to work correctly but was broken in the new analyzer. [#65140](https://github.com/ClickHouse/ClickHouse/pull/65140) ([Duc Canh Le](https://github.com/canhld94)).
* Backported in [#65668](https://github.com/ClickHouse/ClickHouse/issues/65668): Disable `non-intersecting-parts` optimization for queries with `FINAL` in case of `read-in-order` optimization was enabled. This could lead to an incorrect query result. As a workaround, disable `do_not_merge_across_partitions_select_final` and `split_parts_ranges_into_intersecting_and_non_intersecting_final` before this fix is merged. [#65505](https://github.com/ClickHouse/ClickHouse/pull/65505) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#65786](https://github.com/ClickHouse/ClickHouse/issues/65786): Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#65810](https://github.com/ClickHouse/ClickHouse/issues/65810): Fix invalid exceptions in function `parseDateTime` with `%F` and `%D` placeholders. [#65768](https://github.com/ClickHouse/ClickHouse/pull/65768) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#65931](https://github.com/ClickHouse/ClickHouse/issues/65931): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#65826](https://github.com/ClickHouse/ClickHouse/issues/65826): Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)).
* Backported in [#66299](https://github.com/ClickHouse/ClickHouse/issues/66299): Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)).
* Backported in [#66326](https://github.com/ClickHouse/ClickHouse/issues/66326): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#66153](https://github.com/ClickHouse/ClickHouse/issues/66153): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#66459](https://github.com/ClickHouse/ClickHouse/issues/66459): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#66224](https://github.com/ClickHouse/ClickHouse/issues/66224): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#66267](https://github.com/ClickHouse/ClickHouse/issues/66267): Don't throw `TIMEOUT_EXCEEDED` for `none_only_active` mode of `distributed_ddl_output_mode`. [#66218](https://github.com/ClickHouse/ClickHouse/pull/66218) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#66678](https://github.com/ClickHouse/ClickHouse/issues/66678): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#66603](https://github.com/ClickHouse/ClickHouse/issues/66603): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)).
* Backported in [#66358](https://github.com/ClickHouse/ClickHouse/issues/66358): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66971](https://github.com/ClickHouse/ClickHouse/issues/66971): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66968](https://github.com/ClickHouse/ClickHouse/issues/66968): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66719](https://github.com/ClickHouse/ClickHouse/issues/66719): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#66950](https://github.com/ClickHouse/ClickHouse/issues/66950): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66947](https://github.com/ClickHouse/ClickHouse/issues/66947): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#67631](https://github.com/ClickHouse/ClickHouse/issues/67631): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)).
* Backported in [#67195](https://github.com/ClickHouse/ClickHouse/issues/67195): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#67377](https://github.com/ClickHouse/ClickHouse/issues/67377): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#67240](https://github.com/ClickHouse/ClickHouse/issues/67240): This closes [#67156](https://github.com/ClickHouse/ClickHouse/issues/67156). This closes [#66447](https://github.com/ClickHouse/ClickHouse/issues/66447). The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/62907. [#67178](https://github.com/ClickHouse/ClickHouse/pull/67178) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#67574](https://github.com/ClickHouse/ClickHouse/issues/67574): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#65410](https://github.com/ClickHouse/ClickHouse/issues/65410): Re-enable OpenSSL session caching. [#65111](https://github.com/ClickHouse/ClickHouse/pull/65111) ([Robert Schulze](https://github.com/rschu1ze)).
* Backported in [#65903](https://github.com/ClickHouse/ClickHouse/issues/65903): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#66385](https://github.com/ClickHouse/ClickHouse/issues/66385): Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)).
* Backported in [#66424](https://github.com/ClickHouse/ClickHouse/issues/66424): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66542](https://github.com/ClickHouse/ClickHouse/issues/66542): Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#66857](https://github.com/ClickHouse/ClickHouse/issues/66857): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)).
* Backported in [#66873](https://github.com/ClickHouse/ClickHouse/issues/66873): Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)).
* Backported in [#67057](https://github.com/ClickHouse/ClickHouse/issues/67057): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)).
* Backported in [#66944](https://github.com/ClickHouse/ClickHouse/issues/66944): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#67250](https://github.com/ClickHouse/ClickHouse/issues/67250): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)).
* Backported in [#67410](https://github.com/ClickHouse/ClickHouse/issues/67410): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)).

View File

@ -0,0 +1,24 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.7.2.13-stable (6e41f601b2f) FIXME as compared to v24.7.1.2915-stable (a37d2d43da7)
#### Improvement
* Backported in [#67531](https://github.com/ClickHouse/ClickHouse/issues/67531): In pr : https://github.com/ClickHouse/ClickHouse/pull/66025, we introduce a settings `input_format_orc_read_use_writer_time_zone` to fix when read orc file, make the reader use writer timezone, not always use `GMT`. [#67175](https://github.com/ClickHouse/ClickHouse/pull/67175) ([kevinyhzou](https://github.com/KevinyhZou)).
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
* Backported in [#67505](https://github.com/ClickHouse/ClickHouse/issues/67505): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#67580](https://github.com/ClickHouse/ClickHouse/issues/67580): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#67551](https://github.com/ClickHouse/ClickHouse/issues/67551): [Green CI] Fix test test_storage_s3_queue/test.py::test_max_set_age. [#67035](https://github.com/ClickHouse/ClickHouse/pull/67035) ([Pablo Marcos](https://github.com/pamarcos)).
* Backported in [#67514](https://github.com/ClickHouse/ClickHouse/issues/67514): Split test 02967_parallel_replicas_join_algo_and_analyzer. [#67211](https://github.com/ClickHouse/ClickHouse/pull/67211) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#67545](https://github.com/ClickHouse/ClickHouse/issues/67545): [Green CI] Fix WriteBuffer destructor when finalize has failed for MergeTreeDeduplicationLog::shutdown. [#67474](https://github.com/ClickHouse/ClickHouse/pull/67474) ([Alexey Katsman](https://github.com/alexkats)).

View File

@ -103,8 +103,6 @@ Default: 2
The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`.
## background_merges_mutations_scheduling_policy
Algorithm used to select next merge or mutation to be executed by background thread pool. Policy may be changed at runtime without server restart. Algorithm used to select next merge or mutation to be executed by background thread pool. Policy may be changed at runtime without server restart.
Could be applied from the `default` profile for backward compatibility. Could be applied from the `default` profile for backward compatibility.

View File

@ -119,11 +119,6 @@ Minimum size of blocks of uncompressed data required for compression when writin
You can also specify this setting in the global settings (see [min_compress_block_size](/docs/en/operations/settings/settings.md/#min-compress-block-size) setting). You can also specify this setting in the global settings (see [min_compress_block_size](/docs/en/operations/settings/settings.md/#min-compress-block-size) setting).
The value specified when table is created overrides the global value for this setting. The value specified when table is created overrides the global value for this setting.
## max_partitions_to_read
Limits the maximum number of partitions that can be accessed in one query.
You can also specify setting [max_partitions_to_read](/docs/en/operations/settings/merge-tree-settings.md/#max-partitions-to-read) in the global setting.
## max_suspicious_broken_parts ## max_suspicious_broken_parts
If the number of broken parts in a single partition exceeds the `max_suspicious_broken_parts` value, automatic deletion is denied. If the number of broken parts in a single partition exceeds the `max_suspicious_broken_parts` value, automatic deletion is denied.
@ -691,6 +686,8 @@ Possible values:
Default value: -1 (unlimited). Default value: -1 (unlimited).
You can also specify a query complexity setting [max_partitions_to_read](query-complexity#max-partitions-to-read) at a query / session / profile level.
## min_age_to_force_merge_seconds {#min_age_to_force_merge_seconds} ## min_age_to_force_merge_seconds {#min_age_to_force_merge_seconds}
Merge parts if every part in the range is older than the value of `min_age_to_force_merge_seconds`. Merge parts if every part in the range is older than the value of `min_age_to_force_merge_seconds`.

View File

@ -188,7 +188,7 @@ If you set `timeout_before_checking_execution_speed `to 0, ClickHouse will use c
What to do if the query is run longer than `max_execution_time` or the estimated running time is longer than `max_estimated_execution_time`: `throw` or `break`. By default, `throw`. What to do if the query is run longer than `max_execution_time` or the estimated running time is longer than `max_estimated_execution_time`: `throw` or `break`. By default, `throw`.
# max_execution_time_leaf ## max_execution_time_leaf
Similar semantic to `max_execution_time` but only apply on leaf node for distributed or remote queries. Similar semantic to `max_execution_time` but only apply on leaf node for distributed or remote queries.
@ -204,7 +204,7 @@ We can use `max_execution_time_leaf` as the query settings:
SELECT count() FROM cluster(cluster, view(SELECT * FROM t)) SETTINGS max_execution_time_leaf = 10; SELECT count() FROM cluster(cluster, view(SELECT * FROM t)) SETTINGS max_execution_time_leaf = 10;
``` ```
# timeout_overflow_mode_leaf ## timeout_overflow_mode_leaf
What to do when the query in leaf node run longer than `max_execution_time_leaf`: `throw` or `break`. By default, `throw`. What to do when the query in leaf node run longer than `max_execution_time_leaf`: `throw` or `break`. By default, `throw`.
@ -426,3 +426,17 @@ Example:
``` ```
Default value: 0 (Infinite count of simultaneous sessions). Default value: 0 (Infinite count of simultaneous sessions).
## max_partitions_to_read {#max-partitions-to-read}
Limits the maximum number of partitions that can be accessed in one query.
The setting value specified when the table is created can be overridden via query-level setting.
Possible values:
- Any positive integer.
Default value: -1 (unlimited).
You can also specify a MergeTree setting [max_partitions_to_read](merge-tree-settings#max-partitions-to-read) in tables' setting.

View File

@ -5608,3 +5608,9 @@ Default value: `10000000`.
Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached. Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached.
Default value: `1GiB`. Default value: `1GiB`.
## disable_insertion_and_mutation
Disable all insert and mutations (alter table update / alter table delete / alter table drop partition). Set to true, can make this node focus on reading queries.
Default value: `false`.

View File

@ -0,0 +1,90 @@
---
slug: /en/sql-reference/aggregate-functions/reference/groupconcat
sidebar_position: 363
sidebar_label: groupConcat
title: groupConcat
---
Calculates a concatenated string from a group of strings, optionally separated by a delimiter, and optionally limited by a maximum number of elements.
**Syntax**
``` sql
groupConcat(expression [, delimiter] [, limit]);
```
**Arguments**
- `expression` — The expression or column name that outputs strings to be concatenated..
- `delimiter` — A [string](../../../sql-reference/data-types/string.md) that will be used to separate concatenated values. This parameter is optional and defaults to an empty string if not specified.
- `limit` — A positive [integer](../../../sql-reference/data-types/int-uint.md) specifying the maximum number of elements to concatenate. If more elements are present, excess elements are ignored. This parameter is optional.
:::note
If delimiter is specified without limit, it must be the first parameter following the expression. If both delimiter and limit are specified, delimiter must precede limit.
:::
**Returned value**
- Returns a [string](../../../sql-reference/data-types/string.md) consisting of the concatenated values of the column or expression. If the group has no elements or only null elements, and the function does not specify a handling for only null values, the result is a nullable string with a null value.
**Examples**
Input table:
``` text
┌─id─┬─name─┐
│ 1 │ John│
│ 2 │ Jane│
│ 3 │ Bob│
└────┴──────┘
```
1. Basic usage without a delimiter:
Query:
``` sql
SELECT groupConcat(Name) FROM Employees;
```
Result:
``` text
JohnJaneBob
```
This concatenates all names into one continuous string without any separator.
2. Using comma as a delimiter:
Query:
``` sql
SELECT groupConcat(Name, ', ', 2) FROM Employees;
```
Result:
``` text
John, Jane, Bob
```
This output shows the names separated by a comma followed by a space.
3. Limiting the number of concatenated elements
Query:
``` sql
SELECT groupConcat(Name, ', ', 2) FROM Employees;
```
Result:
``` text
John, Jane
```
This query limits the output to the first two names, even though there are more names in the table.

View File

@ -150,15 +150,15 @@ A case insensitive invariant of [position](#position).
Query: Query:
``` sql ``` sql
SELECT position('Hello, world!', 'hello'); SELECT positionCaseInsensitive('Hello, world!', 'hello');
``` ```
Result: Result:
``` text ``` text
┌─position('Hello, world!', 'hello')─┐ ┌─positionCaseInsensitive('Hello, world!', 'hello')─┐
0 1
└────────────────────────────────────┘ └───────────────────────────────────────────────────
``` ```
## positionUTF8 ## positionUTF8

View File

@ -849,7 +849,7 @@ try
#endif #endif
#if defined(SANITIZER) #if defined(SANITIZER)
LOG_INFO(log, "Query Profiler disabled because they cannot work under sanitizers" LOG_INFO(log, "Query Profiler is disabled because it cannot work under sanitizers"
" when two different stack unwinding methods will interfere with each other."); " when two different stack unwinding methods will interfere with each other.");
#endif #endif

View File

@ -1130,8 +1130,7 @@
<flush_interval_milliseconds>7500</flush_interval_milliseconds> <flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_views_log> </query_views_log>
<!-- Uncomment if use part log. <!-- Part log contains information about all actions with parts in MergeTree tables (creation, deletion, merges, downloads). -->
Part log contains information about all actions with parts in MergeTree tables (creation, deletion, merges, downloads).-->
<part_log> <part_log>
<database>system</database> <database>system</database>
<table>part_log</table> <table>part_log</table>
@ -1143,9 +1142,9 @@
<flush_on_crash>false</flush_on_crash> <flush_on_crash>false</flush_on_crash>
</part_log> </part_log>
<!-- Uncomment to write text log into table. <!-- Text log contains all information from usual server log but stores it in structured and efficient way.
Text log contains all information from usual server log but stores it in structured and efficient way.
The level of the messages that goes to the table can be limited (<level>), if not specified all messages will go to the table. The level of the messages that goes to the table can be limited (<level>), if not specified all messages will go to the table.
-->
<text_log> <text_log>
<database>system</database> <database>system</database>
<table>text_log</table> <table>text_log</table>
@ -1154,9 +1153,8 @@
<reserved_size_rows>8192</reserved_size_rows> <reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold> <buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash> <flush_on_crash>false</flush_on_crash>
<level></level> <level>trace</level>
</text_log> </text_log>
-->
<!-- Metric log contains rows with current values of ProfileEvents, CurrentMetrics collected with "collect_interval_milliseconds" interval. --> <!-- Metric log contains rows with current values of ProfileEvents, CurrentMetrics collected with "collect_interval_milliseconds" interval. -->
<metric_log> <metric_log>

View File

@ -17,7 +17,7 @@
--input-shadow-color: rgba(0, 255, 0, 1); --input-shadow-color: rgba(0, 255, 0, 1);
--error-color: red; --error-color: red;
--global-error-color: white; --global-error-color: white;
--legend-background: rgba(255, 255, 255, 0.75); --legend-background: rgba(255, 255, 0, 0.75);
--title-color: #666; --title-color: #666;
--text-color: black; --text-color: black;
--edit-title-background: #FEE; --edit-title-background: #FEE;
@ -41,7 +41,7 @@
--moving-shadow-color: rgba(255, 255, 255, 0.25); --moving-shadow-color: rgba(255, 255, 255, 0.25);
--input-shadow-color: rgba(255, 128, 0, 0.25); --input-shadow-color: rgba(255, 128, 0, 0.25);
--error-color: #F66; --error-color: #F66;
--legend-background: rgba(255, 255, 255, 0.25); --legend-background: rgba(0, 96, 128, 0.75);
--title-color: white; --title-color: white;
--text-color: white; --text-color: white;
--edit-title-background: #364f69; --edit-title-background: #364f69;
@ -218,6 +218,7 @@
#chart-params .param { #chart-params .param {
width: 6%; width: 6%;
font-family: monospace;
} }
input { input {
@ -256,6 +257,7 @@
font-weight: bold; font-weight: bold;
user-select: none; user-select: none;
cursor: pointer; cursor: pointer;
margin-bottom: 1rem;
} }
#run:hover { #run:hover {
@ -309,7 +311,7 @@
color: var(--param-text-color); color: var(--param-text-color);
display: inline-block; display: inline-block;
box-shadow: 1px 1px 0 var(--shadow-color); box-shadow: 1px 1px 0 var(--shadow-color);
margin-bottom: 1rem; margin-bottom: 0.5rem;
} }
input:focus { input:focus {
@ -657,6 +659,10 @@ function insertParam(name, value) {
param_value.value = value; param_value.value = value;
param_value.spellcheck = false; param_value.spellcheck = false;
let setWidth = e => { e.style.width = (e.value.length + 1) + 'ch' };
if (value) { setWidth(param_value); }
param_value.addEventListener('input', e => setWidth(e.target));
param_wrapper.appendChild(param_name); param_wrapper.appendChild(param_name);
param_wrapper.appendChild(param_value); param_wrapper.appendChild(param_value);
document.getElementById('chart-params').appendChild(param_wrapper); document.getElementById('chart-params').appendChild(param_wrapper);
@ -945,6 +951,7 @@ function showMassEditor() {
let editor = document.getElementById('mass-editor-textarea'); let editor = document.getElementById('mass-editor-textarea');
editor.value = JSON.stringify({params: params, queries: queries}, null, 2); editor.value = JSON.stringify({params: params, queries: queries}, null, 2);
editor.focus();
mass_editor_active = true; mass_editor_active = true;
} }
@ -1004,14 +1011,14 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend-
className && legendEl.classList.add(className); className && legendEl.classList.add(className);
uPlot.assign(legendEl.style, { uPlot.assign(legendEl.style, {
textAlign: "left", textAlign: "right",
pointerEvents: "none", pointerEvents: "none",
display: "none", display: "none",
position: "absolute", position: "absolute",
left: 0, left: 0,
top: 0, top: 0,
zIndex: 100, zIndex: 100,
boxShadow: "2px 2px 10px rgba(0,0,0,0.1)", boxShadow: "2px 2px 10px rgba(0, 0, 0, 0.1)",
...style ...style
}); });
@ -1051,8 +1058,10 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend-
function update(u) { function update(u) {
let { left, top } = u.cursor; let { left, top } = u.cursor;
left -= legendEl.clientWidth / 2; /// This will make the balloon to the right of the cursor when the cursor is on the left side, and vise-versa,
top -= legendEl.clientHeight / 2; /// avoiding the borders of the chart.
left -= legendEl.clientWidth * (left / u.width);
top -= legendEl.clientHeight;
legendEl.style.transform = "translate(" + left + "px, " + top + "px)"; legendEl.style.transform = "translate(" + left + "px, " + top + "px)";
if (multiline) { if (multiline) {
@ -1139,7 +1148,7 @@ async function draw(idx, chart, url_params, query) {
let {reply, error} = await doFetch(query, url_params); let {reply, error} = await doFetch(query, url_params);
if (!error) { if (!error) {
if (reply.rows.length == 0) { if (reply.rows == 0) {
error = "Query returned empty result."; error = "Query returned empty result.";
} else if (reply.meta.length < 2) { } else if (reply.meta.length < 2) {
error = "Query should return at least two columns: unix timestamp and value."; error = "Query should return at least two columns: unix timestamp and value.";
@ -1229,14 +1238,53 @@ async function draw(idx, chart, url_params, query) {
let sync = uPlot.sync("sync"); let sync = uPlot.sync("sync");
let axis = { function formatDateTime(t) {
return (new Date(t * 1000)).toISOString().replace('T', '\n').replace('.000Z', '');
}
function formatDateTimes(self, ticks) {
return ticks.map((t, idx) => {
let res = formatDateTime(t);
if (idx == 0 || res.substring(0, 10) != formatDateTime(ticks[idx - 1]).substring(0, 10)) {
return res;
} else {
return res.substring(11);
}
});
}
function formatValue(v) {
const a = Math.abs(v);
if (a >= 1000000000000000) { return (v / 1000000000000000) + 'P'; }
if (a >= 1000000000000) { return (v / 1000000000000) + 'T'; }
if (a >= 1000000000) { return (v / 1000000000) + 'G'; }
if (a >= 1000000) { return (v / 1000000) + 'M'; }
if (a >= 1000) { return (v / 1000) + 'K'; }
if (a > 0 && a < 0.001) { return (v * 1000000) + "μ"; }
return v;
}
let axis_x = {
stroke: axes_color, stroke: axes_color,
grid: { width: 1 / devicePixelRatio, stroke: grid_color }, grid: { width: 1 / devicePixelRatio, stroke: grid_color },
ticks: { width: 1 / devicePixelRatio, stroke: grid_color } ticks: { width: 1 / devicePixelRatio, stroke: grid_color },
values: formatDateTimes,
space: 80,
incrs: [1, 5, 10, 15, 30,
60, 60 * 5, 60 * 10, 60 * 15, 60 * 30,
3600, 3600 * 2, 3600 * 3, 3600 * 4, 3600 * 6, 3600 * 12,
3600 * 24],
}; };
let axes = [axis, axis]; let axis_y = {
let series = [{ label: "x" }]; stroke: axes_color,
grid: { width: 1 / devicePixelRatio, stroke: grid_color },
ticks: { width: 1 / devicePixelRatio, stroke: grid_color },
values: (self, ticks) => ticks.map(formatValue)
};
let axes = [axis_x, axis_y];
let series = [{ label: "time", value: (self, t) => formatDateTime(t) }];
let data = [reply.data[reply.meta[0].name]]; let data = [reply.data[reply.meta[0].name]];
// Treat every column as series // Treat every column as series
@ -1254,9 +1302,10 @@ async function draw(idx, chart, url_params, query) {
const opts = { const opts = {
width: chart.clientWidth, width: chart.clientWidth,
height: chart.clientHeight, height: chart.clientHeight,
scales: { x: { time: false } }, /// Because we want to split and format time on our own.
axes, axes,
series, series,
padding: [ null, null, null, (Math.round(max_value * 100) / 100).toString().length * 6 - 10 ], padding: [ null, null, null, 3 ],
plugins: [ legendAsTooltipPlugin() ], plugins: [ legendAsTooltipPlugin() ],
cursor: { cursor: {
sync: { sync: {

View File

@ -39,6 +39,8 @@ disable = '''
no-else-return, no-else-return,
global-statement, global-statement,
f-string-without-interpolation, f-string-without-interpolation,
consider-using-with,
use-maxsplit-arg,
''' '''
[tool.pylint.SIMILARITIES] [tool.pylint.SIMILARITIES]

View File

@ -1,2 +1,2 @@
clickhouse_add_executable(aggregate_function_state_deserialization_fuzzer aggregate_function_state_deserialization_fuzzer.cpp ${SRCS}) clickhouse_add_executable(aggregate_function_state_deserialization_fuzzer aggregate_function_state_deserialization_fuzzer.cpp ${SRCS})
target_link_libraries(aggregate_function_state_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions) target_link_libraries(aggregate_function_state_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions clickhouse_functions)

View File

@ -12,38 +12,36 @@
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/registerAggregateFunctions.h> #include <AggregateFunctions/registerAggregateFunctions.h>
#include <base/scope_guard.h> #include <base/scope_guard.h>
using namespace DB;
ContextMutablePtr context;
extern "C" int LLVMFuzzerInitialize(int *, char ***)
{
if (context)
return true;
SharedContextHolder shared_context = Context::createShared();
context = Context::createGlobal(shared_context.get());
context->makeGlobalContext();
MainThreadStatus::getInstance();
registerAggregateFunctions();
return 0;
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
{ {
try try
{ {
using namespace DB;
static SharedContextHolder shared_context;
static ContextMutablePtr context;
auto initialize = [&]() mutable
{
if (context)
return true;
shared_context = Context::createShared();
context = Context::createGlobal(shared_context.get());
context->makeGlobalContext();
context->setApplicationType(Context::ApplicationType::LOCAL);
MainThreadStatus::getInstance();
registerAggregateFunctions();
return true;
};
static bool initialized = initialize();
(void) initialized;
total_memory_tracker.resetCounters(); total_memory_tracker.resetCounters();
total_memory_tracker.setHardLimit(1_GiB); total_memory_tracker.setHardLimit(1_GiB);
CurrentThread::get().memory_tracker.resetCounters(); CurrentThread::get().memory_tracker.resetCounters();

View File

@ -24,7 +24,7 @@ void InterpolateNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_st
{ {
buffer << std::string(indent, ' ') << "INTERPOLATE id: " << format_state.getNodeId(this); buffer << std::string(indent, ' ') << "INTERPOLATE id: " << format_state.getNodeId(this);
buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION\n"; buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION " << expression_name << " \n";
getExpression()->dumpTreeImpl(buffer, format_state, indent + 4); getExpression()->dumpTreeImpl(buffer, format_state, indent + 4);
buffer << '\n' << std::string(indent + 2, ' ') << "INTERPOLATE_EXPRESSION\n"; buffer << '\n' << std::string(indent + 2, ' ') << "INTERPOLATE_EXPRESSION\n";

View File

@ -50,6 +50,8 @@ public:
return QueryTreeNodeType::INTERPOLATE; return QueryTreeNodeType::INTERPOLATE;
} }
const std::string & getExpressionName() const { return expression_name; }
void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
protected: protected:

View File

@ -64,6 +64,8 @@
#include <Analyzer/Resolve/TableExpressionsAliasVisitor.h> #include <Analyzer/Resolve/TableExpressionsAliasVisitor.h>
#include <Analyzer/Resolve/ReplaceColumnsVisitor.h> #include <Analyzer/Resolve/ReplaceColumnsVisitor.h>
#include <Planner/PlannerActionsVisitor.h>
#include <Core/Settings.h> #include <Core/Settings.h>
namespace ProfileEvents namespace ProfileEvents
@ -4122,11 +4124,7 @@ void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpo
{ {
auto & interpolate_node_typed = interpolate_node->as<InterpolateNode &>(); auto & interpolate_node_typed = interpolate_node->as<InterpolateNode &>();
auto * column_to_interpolate = interpolate_node_typed.getExpression()->as<IdentifierNode>(); auto column_to_interpolate_name = interpolate_node_typed.getExpressionName();
if (!column_to_interpolate)
throw Exception(ErrorCodes::LOGICAL_ERROR, "INTERPOLATE can work only for indentifiers, but {} is found",
interpolate_node_typed.getExpression()->formatASTForErrorMessage());
auto column_to_interpolate_name = column_to_interpolate->getIdentifier().getFullName();
resolveExpressionNode(interpolate_node_typed.getExpression(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); resolveExpressionNode(interpolate_node_typed.getExpression(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
@ -4135,14 +4133,11 @@ void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpo
auto & interpolation_to_resolve = interpolate_node_typed.getInterpolateExpression(); auto & interpolation_to_resolve = interpolate_node_typed.getInterpolateExpression();
IdentifierResolveScope interpolate_scope(interpolation_to_resolve, &scope /*parent_scope*/); IdentifierResolveScope interpolate_scope(interpolation_to_resolve, &scope /*parent_scope*/);
auto fake_column_node = std::make_shared<ColumnNode>(NameAndTypePair(column_to_interpolate_name, interpolate_node_typed.getExpression()->getResultType()), interpolate_node_typed.getExpression()); auto fake_column_node = std::make_shared<ColumnNode>(NameAndTypePair(column_to_interpolate_name, interpolate_node_typed.getExpression()->getResultType()), interpolate_node);
if (is_column_constant) if (is_column_constant)
interpolate_scope.expression_argument_name_to_node.emplace(column_to_interpolate_name, fake_column_node); interpolate_scope.expression_argument_name_to_node.emplace(column_to_interpolate_name, fake_column_node);
resolveExpressionNode(interpolation_to_resolve, interpolate_scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); resolveExpressionNode(interpolation_to_resolve, interpolate_scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
if (is_column_constant)
interpolation_to_resolve = interpolation_to_resolve->cloneAndReplace(fake_column_node, interpolate_node_typed.getExpression());
} }
} }

View File

@ -43,6 +43,12 @@ size_t getCompoundTypeDepth(const IDataType & type)
const auto & tuple_elements = assert_cast<const DataTypeTuple &>(*current_type).getElements(); const auto & tuple_elements = assert_cast<const DataTypeTuple &>(*current_type).getElements();
if (!tuple_elements.empty()) if (!tuple_elements.empty())
current_type = tuple_elements.at(0).get(); current_type = tuple_elements.at(0).get();
else
{
/// Special case: tuple with no element - tuple(). In this case, what's the compound type depth?
/// I'm not certain about the theoretical answer, but from experiment, 1 is the most reasonable choice.
return 1;
}
++result; ++result;
} }

View File

@ -323,7 +323,7 @@ bool RestoreCoordinationRemote::hasConcurrentRestores(const std::atomic<size_t>
return false; return false;
bool result = false; bool result = false;
std::string path = zookeeper_path +"/stage"; std::string path = zookeeper_path + "/stage";
auto holder = with_retries.createRetriesControlHolder("createRootNodes"); auto holder = with_retries.createRetriesControlHolder("createRootNodes");
holder.retries_ctl.retryLoop( holder.retries_ctl.retryLoop(

View File

@ -61,8 +61,6 @@ private:
void createRootNodes(); void createRootNodes();
void removeAllNodes(); void removeAllNodes();
class ReplicatedDatabasesMetadataSync;
/// get_zookeeper will provide a zookeeper client without any fault injection /// get_zookeeper will provide a zookeeper client without any fault injection
const zkutil::GetZooKeeper get_zookeeper; const zkutil::GetZooKeeper get_zookeeper;
const String root_zookeeper_path; const String root_zookeeper_path;

View File

@ -222,10 +222,19 @@ void RestorerFromBackup::setStage(const String & new_stage, const String & messa
if (restore_coordination) if (restore_coordination)
{ {
restore_coordination->setStage(new_stage, message); restore_coordination->setStage(new_stage, message);
if (new_stage == Stage::FINDING_TABLES_IN_BACKUP)
restore_coordination->waitForStage(new_stage, on_cluster_first_sync_timeout); /// The initiator of a RESTORE ON CLUSTER query waits for other hosts to complete their work (see waitForStage(Stage::COMPLETED) in BackupsWorker::doRestore),
else /// but other hosts shouldn't wait for each others' completion. (That's simply unnecessary and also
restore_coordination->waitForStage(new_stage); /// the initiator may start cleaning up (e.g. removing restore-coordination ZooKeeper nodes) once all other hosts are in Stage::COMPLETED.)
bool need_wait = (new_stage != Stage::COMPLETED);
if (need_wait)
{
if (new_stage == Stage::FINDING_TABLES_IN_BACKUP)
restore_coordination->waitForStage(new_stage, on_cluster_first_sync_timeout);
else
restore_coordination->waitForStage(new_stage);
}
} }
} }

View File

@ -296,16 +296,28 @@ ColumnWithTypeAndName ColumnFunction::reduce() const
function->getName(), toString(args), toString(captured)); function->getName(), toString(args), toString(captured));
ColumnsWithTypeAndName columns = captured_columns; ColumnsWithTypeAndName columns = captured_columns;
IFunction::ShortCircuitSettings settings;
/// Arguments of lazy executed function can also be lazy executed. /// Arguments of lazy executed function can also be lazy executed.
/// But we shouldn't execute arguments if this function is short circuit, if (is_short_circuit_argument)
/// because it will handle lazy executed arguments by itself.
if (is_short_circuit_argument && !function->isShortCircuit(settings, args))
{ {
for (auto & col : columns) IFunction::ShortCircuitSettings settings;
/// We shouldn't execute all arguments if this function is short circuit,
/// because it will handle lazy executed arguments by itself.
/// Execute only arguments with disabled lazy execution.
if (function->isShortCircuit(settings, args))
{ {
if (const ColumnFunction * arg = checkAndGetShortCircuitArgument(col.column)) for (size_t i : settings.arguments_with_disabled_lazy_execution)
col = arg->reduce(); {
if (const ColumnFunction * arg = checkAndGetShortCircuitArgument(columns[i].column))
columns[i] = arg->reduce();
}
}
else
{
for (auto & col : columns)
{
if (const ColumnFunction * arg = checkAndGetShortCircuitArgument(col.column))
col = arg->reduce();
}
} }
} }

View File

@ -218,20 +218,27 @@ AsyncLoader::~AsyncLoader()
{ {
// All `LoadTask` objects should be destructed before AsyncLoader destruction because they hold a reference. // All `LoadTask` objects should be destructed before AsyncLoader destruction because they hold a reference.
// To make sure we check for all pending jobs to be finished. // To make sure we check for all pending jobs to be finished.
std::unique_lock lock{mutex}; {
if (scheduled_jobs.empty() && finished_jobs.empty()) std::unique_lock lock{mutex};
return; if (!scheduled_jobs.empty() || !finished_jobs.empty())
{
std::vector<String> scheduled;
std::vector<String> finished;
scheduled.reserve(scheduled_jobs.size());
finished.reserve(finished_jobs.size());
for (const auto & [job, _] : scheduled_jobs)
scheduled.push_back(job->name);
for (const auto & job : finished_jobs)
finished.push_back(job->name);
LOG_ERROR(log, "Bug. Destruction with pending ({}) and finished ({}) load jobs.", fmt::join(scheduled, ", "), fmt::join(finished, ", "));
abort();
}
}
std::vector<String> scheduled; // When all jobs are done we could still have finalizing workers.
std::vector<String> finished; // These workers could call updateCurrentPriorityAndSpawn() that scans all pools.
scheduled.reserve(scheduled_jobs.size()); // We need to stop all of them before destructing any of them.
finished.reserve(finished_jobs.size()); stop();
for (const auto & [job, _] : scheduled_jobs)
scheduled.push_back(job->name);
for (const auto & job : finished_jobs)
finished.push_back(job->name);
LOG_ERROR(log, "Bug. Destruction with pending ({}) and finished ({}) load jobs.", fmt::join(scheduled, ", "), fmt::join(finished, ", "));
abort();
} }
void AsyncLoader::start() void AsyncLoader::start()

View File

@ -306,6 +306,8 @@
\ \
M(FilteringMarksWithPrimaryKey, "Number of threads currently doing filtering of mark ranges by the primary key") \ M(FilteringMarksWithPrimaryKey, "Number of threads currently doing filtering of mark ranges by the primary key") \
M(FilteringMarksWithSecondaryKeys, "Number of threads currently doing filtering of mark ranges by secondary keys") \ M(FilteringMarksWithSecondaryKeys, "Number of threads currently doing filtering of mark ranges by secondary keys") \
\
M(S3DiskNoKeyErrors, "The number of `NoSuchKey` errors that occur when reading data from S3 cloud storage through ClickHouse disks.") \
#ifdef APPLY_FOR_EXTERNAL_METRICS #ifdef APPLY_FOR_EXTERNAL_METRICS
#define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M) #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M)

View File

@ -19,7 +19,7 @@ Epoll::Epoll() : events_count(0)
{ {
epoll_fd = epoll_create1(0); epoll_fd = epoll_create1(0);
if (epoll_fd == -1) if (epoll_fd == -1)
throw DB::ErrnoException(DB::ErrorCodes::EPOLL_ERROR, "Cannot open epoll descriptor"); throw ErrnoException(ErrorCodes::EPOLL_ERROR, "Cannot open epoll descriptor");
} }
Epoll::Epoll(Epoll && other) noexcept : epoll_fd(other.epoll_fd), events_count(other.events_count.load()) Epoll::Epoll(Epoll && other) noexcept : epoll_fd(other.epoll_fd), events_count(other.events_count.load())
@ -47,7 +47,7 @@ void Epoll::add(int fd, void * ptr, uint32_t events)
++events_count; ++events_count;
if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &event) == -1) if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &event) == -1)
throw DB::ErrnoException(DB::ErrorCodes::EPOLL_ERROR, "Cannot add new descriptor to epoll"); throw ErrnoException(ErrorCodes::EPOLL_ERROR, "Cannot add new descriptor to epoll");
} }
void Epoll::remove(int fd) void Epoll::remove(int fd)
@ -55,7 +55,7 @@ void Epoll::remove(int fd)
--events_count; --events_count;
if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, nullptr) == -1) if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, nullptr) == -1)
throw DB::ErrnoException(DB::ErrorCodes::EPOLL_ERROR, "Cannot remove descriptor from epoll"); throw ErrnoException(ErrorCodes::EPOLL_ERROR, "Cannot remove descriptor from epoll");
} }
size_t Epoll::getManyReady(int max_events, epoll_event * events_out, int timeout) const size_t Epoll::getManyReady(int max_events, epoll_event * events_out, int timeout) const
@ -82,7 +82,7 @@ size_t Epoll::getManyReady(int max_events, epoll_event * events_out, int timeout
continue; continue;
} }
else else
throw DB::ErrnoException(DB::ErrorCodes::EPOLL_ERROR, "Error in epoll_wait"); throw ErrnoException(ErrorCodes::EPOLL_ERROR, "Error in epoll_wait");
} }
else else
break; break;

View File

@ -4,8 +4,6 @@
#include <Common/ExponentiallySmoothedCounter.h> #include <Common/ExponentiallySmoothedCounter.h>
#include <numbers>
namespace DB namespace DB
{ {
@ -14,9 +12,10 @@ namespace DB
class EventRateMeter class EventRateMeter
{ {
public: public:
explicit EventRateMeter(double now, double period_) explicit EventRateMeter(double now, double period_, size_t heating_ = 0)
: period(period_) : period(period_)
, half_decay_time(period * std::numbers::ln2) // for `ExponentiallySmoothedAverage::sumWeights()` to be equal to `1/period` , max_interval(period * 10)
, heating(heating_)
{ {
reset(now); reset(now);
} }
@ -29,16 +28,11 @@ public:
{ {
// Remove data for initial heating stage that can present at the beginning of a query. // Remove data for initial heating stage that can present at the beginning of a query.
// Otherwise it leads to wrong gradual increase of average value, turning algorithm into not very reactive. // Otherwise it leads to wrong gradual increase of average value, turning algorithm into not very reactive.
if (count != 0.0 && ++data_points < 5) if (count != 0.0 && data_points++ <= heating)
{ reset(events.time, data_points);
start = events.time;
events = ExponentiallySmoothedAverage();
}
if (now - period <= start) // precise counting mode duration.add(std::min(max_interval, now - duration.time), now, period);
events = ExponentiallySmoothedAverage(events.value + count, now); events.add(count, now, period);
else // exponential smoothing mode
events.add(count, now, half_decay_time);
} }
/// Compute average event rate throughout `[now - period, now]` period. /// Compute average event rate throughout `[now - period, now]` period.
@ -49,24 +43,26 @@ public:
add(now, 0); add(now, 0);
if (unlikely(now <= start)) if (unlikely(now <= start))
return 0; return 0;
if (now - period <= start) // precise counting mode
return events.value / (now - start); // We do not use .get() because sum of weights will anyway be canceled out (optimization)
else // exponential smoothing mode return events.value / duration.value;
return events.get(half_decay_time); // equals to `events.value / period`
} }
void reset(double now) void reset(double now, size_t data_points_ = 0)
{ {
start = now; start = now;
events = ExponentiallySmoothedAverage(); events = ExponentiallySmoothedAverage();
data_points = 0; duration = ExponentiallySmoothedAverage();
data_points = data_points_;
} }
private: private:
const double period; const double period;
const double half_decay_time; const double max_interval;
const size_t heating;
double start; // Instant in past without events before it; when measurement started or reset double start; // Instant in past without events before it; when measurement started or reset
ExponentiallySmoothedAverage events; // Estimated number of events in the last `period` ExponentiallySmoothedAverage duration; // Current duration of a period
ExponentiallySmoothedAverage events; // Estimated number of events in last `duration` seconds
size_t data_points = 0; size_t data_points = 0;
}; };

View File

@ -253,18 +253,18 @@ void HostResolver::updateImpl(Poco::Timestamp now, std::vector<Poco::Net::IPAddr
} }
} }
for (auto & rec : merged) for (auto & record : merged)
{ {
if (!rec.failed) if (!record.failed || !record.consecutive_fail_count)
continue; continue;
/// Exponential increased time for each consecutive fail /// Exponential increased time for each consecutive fail
auto banned_until = now - Poco::Timespan(history.totalMicroseconds() * (1ull << (rec.consecutive_fail_count - 1))); auto banned_until = now - Poco::Timespan(history.totalMicroseconds() * (1ull << (record.consecutive_fail_count - 1)));
if (rec.fail_time < banned_until) if (record.fail_time < banned_until)
{ {
rec.failed = false; record.failed = false;
CurrentMetrics::sub(metrics.banned_count); CurrentMetrics::sub(metrics.banned_count);
} }
} }
chassert(std::is_sorted(merged.begin(), merged.end())); chassert(std::is_sorted(merged.begin(), merged.end()));

View File

@ -86,7 +86,10 @@ inline std::string_view toDescription(OvercommitResult result)
bool shouldTrackAllocation(Float64 probability, void * ptr) bool shouldTrackAllocation(Float64 probability, void * ptr)
{ {
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wimplicit-const-int-float-conversion"
return intHash64(uintptr_t(ptr)) < std::numeric_limits<uint64_t>::max() * probability; return intHash64(uintptr_t(ptr)) < std::numeric_limits<uint64_t>::max() * probability;
#pragma clang diagnostic pop
} }
} }

View File

@ -14,7 +14,10 @@ public:
, re_gen(key_template) , re_gen(key_template)
{ {
} }
DB::ObjectStorageKey generate(const String &, bool) const override { return DB::ObjectStorageKey::createAsAbsolute(re_gen.generate()); } DB::ObjectStorageKey generate(const String &, bool /* is_directory */, const std::optional<String> & /* key_prefix */) const override
{
return DB::ObjectStorageKey::createAsAbsolute(re_gen.generate());
}
private: private:
String key_template; String key_template;
@ -29,7 +32,7 @@ public:
: key_prefix(std::move(key_prefix_)) : key_prefix(std::move(key_prefix_))
{} {}
DB::ObjectStorageKey generate(const String &, bool) const override DB::ObjectStorageKey generate(const String &, bool /* is_directory */, const std::optional<String> & /* key_prefix */) const override
{ {
/// Path to store the new S3 object. /// Path to store the new S3 object.
@ -60,7 +63,8 @@ public:
: key_prefix(std::move(key_prefix_)) : key_prefix(std::move(key_prefix_))
{} {}
DB::ObjectStorageKey generate(const String & path, bool) const override DB::ObjectStorageKey
generate(const String & path, bool /* is_directory */, const std::optional<String> & /* key_prefix */) const override
{ {
return DB::ObjectStorageKey::createAsRelative(key_prefix, path); return DB::ObjectStorageKey::createAsRelative(key_prefix, path);
} }

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <memory> #include <memory>
#include <optional>
#include "ObjectStorageKey.h" #include "ObjectStorageKey.h"
namespace DB namespace DB
@ -11,7 +12,11 @@ class IObjectStorageKeysGenerator
public: public:
virtual ~IObjectStorageKeysGenerator() = default; virtual ~IObjectStorageKeysGenerator() = default;
virtual ObjectStorageKey generate(const String & path, bool is_directory) const = 0; /// Generates an object storage key based on a path in the virtual filesystem.
/// @param path - Path in the virtual filesystem.
/// @param is_directory - If the path in the virtual filesystem corresponds to a directory.
/// @param key_prefix - Optional key prefix for the generated object storage key. If provided, this prefix will be added to the beginning of the generated key.
virtual ObjectStorageKey generate(const String & path, bool is_directory, const std::optional<String> & key_prefix) const = 0;
}; };
using ObjectStorageKeysGeneratorPtr = std::shared_ptr<IObjectStorageKeysGenerator>; using ObjectStorageKeysGeneratorPtr = std::shared_ptr<IObjectStorageKeysGenerator>;

View File

@ -105,7 +105,7 @@ private:
bool write_progress_on_update = false; bool write_progress_on_update = false;
EventRateMeter cpu_usage_meter{static_cast<double>(clock_gettime_ns()), 2'000'000'000 /*ns*/}; // average cpu utilization last 2 second EventRateMeter cpu_usage_meter{static_cast<double>(clock_gettime_ns()), 2'000'000'000 /*ns*/, 4}; // average cpu utilization last 2 second, skip first 4 points
HostToTimesMap hosts_data; HostToTimesMap hosts_data;
/// In case of all of the above: /// In case of all of the above:
/// - clickhouse-local /// - clickhouse-local

View File

@ -3,6 +3,8 @@
#include <Common/ErrorCodes.h> #include <Common/ErrorCodes.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Common/Priority.h> #include <Common/Priority.h>
#include <Common/EventRateMeter.h>
#include <Common/Stopwatch.h>
#include <base/defines.h> #include <base/defines.h>
#include <base/types.h> #include <base/types.h>
@ -176,6 +178,14 @@ protected:
/// Postponed to be handled in scheduler thread, so it is intended to be called from outside. /// Postponed to be handled in scheduler thread, so it is intended to be called from outside.
void scheduleActivation(); void scheduleActivation();
/// Helper for introspection metrics
void incrementDequeued(ResourceCost cost)
{
dequeued_requests++;
dequeued_cost += cost;
throughput.add(static_cast<double>(clock_gettime_ns())/1e9, cost);
}
public: public:
EventQueue * const event_queue; EventQueue * const event_queue;
String basename; String basename;
@ -189,6 +199,10 @@ public:
std::atomic<ResourceCost> dequeued_cost{0}; std::atomic<ResourceCost> dequeued_cost{0};
std::atomic<ResourceCost> canceled_cost{0}; std::atomic<ResourceCost> canceled_cost{0};
std::atomic<UInt64> busy_periods{0}; std::atomic<UInt64> busy_periods{0};
/// Average dequeued_cost per second
/// WARNING: Should only be accessed from the scheduler thread, so that locking is not required
EventRateMeter throughput{static_cast<double>(clock_gettime_ns())/1e9, 2, 1};
}; };
using SchedulerNodePtr = std::shared_ptr<ISchedulerNode>; using SchedulerNodePtr = std::shared_ptr<ISchedulerNode>;

View File

@ -188,8 +188,7 @@ public:
if (request) if (request)
{ {
dequeued_requests++; incrementDequeued(request->cost);
dequeued_cost += request->cost;
return {request, heap_size > 0}; return {request, heap_size > 0};
} }
} }

View File

@ -59,8 +59,7 @@ public:
if (requests.empty()) if (requests.empty())
busy_periods++; busy_periods++;
queue_cost -= result->cost; queue_cost -= result->cost;
dequeued_requests++; incrementDequeued(result->cost);
dequeued_cost += result->cost;
return {result, !requests.empty()}; return {result, !requests.empty()};
} }

View File

@ -122,8 +122,7 @@ public:
if (request) if (request)
{ {
dequeued_requests++; incrementDequeued(request->cost);
dequeued_cost += request->cost;
return {request, !items.empty()}; return {request, !items.empty()};
} }
} }

View File

@ -81,8 +81,7 @@ public:
child_active = child_now_active; child_active = child_now_active;
if (!active()) if (!active())
busy_periods++; busy_periods++;
dequeued_requests++; incrementDequeued(request->cost);
dequeued_cost += request->cost;
return {request, active()}; return {request, active()};
} }

View File

@ -89,8 +89,7 @@ public:
child_active = child_now_active; child_active = child_now_active;
if (!active()) if (!active())
busy_periods++; busy_periods++;
dequeued_requests++; incrementDequeued(request->cost);
dequeued_cost += request->cost;
return {request, active()}; return {request, active()};
} }

View File

@ -162,8 +162,7 @@ public:
if (request == nullptr) // Possible in case of request cancel, just retry if (request == nullptr) // Possible in case of request cancel, just retry
continue; continue;
dequeued_requests++; incrementDequeued(request->cost);
dequeued_cost += request->cost;
return {request, current != nullptr}; return {request, current != nullptr};
} }
} }

View File

@ -2,6 +2,7 @@
#include <Common/TimerDescriptor.h> #include <Common/TimerDescriptor.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Common/Epoll.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include <sys/timerfd.h> #include <sys/timerfd.h>
@ -75,10 +76,22 @@ void TimerDescriptor::drain() const
/// or since the last successful read(2), then the buffer given to read(2) returns an unsigned 8-byte integer (uint64_t) /// or since the last successful read(2), then the buffer given to read(2) returns an unsigned 8-byte integer (uint64_t)
/// containing the number of expirations that have occurred. /// containing the number of expirations that have occurred.
/// (The returned value is in host byte order—that is, the native byte order for integers on the host machine.) /// (The returned value is in host byte order—that is, the native byte order for integers on the host machine.)
/// Due to a bug in Linux Kernel, reading from timerfd in non-blocking mode can be still blocking.
/// Avoid it with polling.
Epoll epoll;
epoll.add(timer_fd);
epoll_event event;
event.data.fd = -1;
size_t ready_count = epoll.getManyReady(1, &event, 0);
if (!ready_count)
return;
uint64_t buf; uint64_t buf;
while (true) while (true)
{ {
ssize_t res = ::read(timer_fd, &buf, sizeof(buf)); ssize_t res = ::read(timer_fd, &buf, sizeof(buf));
if (res < 0) if (res < 0)
{ {
/// man timerfd_create: /// man timerfd_create:

View File

@ -0,0 +1,68 @@
#include <gtest/gtest.h>
#include <Common/EventRateMeter.h>
#include <cmath>
TEST(EventRateMeter, ExponentiallySmoothedAverage)
{
double target = 100.0;
// The test is only correct for timestep of 1 second because of
// how sum of weights is implemented inside `ExponentiallySmoothedAverage`
double time_step = 1.0;
for (double half_decay_time : { 0.1, 1.0, 10.0, 100.0})
{
DB::ExponentiallySmoothedAverage esa;
int steps = static_cast<int>(half_decay_time * 30 / time_step);
for (int i = 1; i <= steps; ++i)
esa.add(target * time_step, i * time_step, half_decay_time);
double measured = esa.get(half_decay_time);
ASSERT_LE(std::fabs(measured - target), 1e-5 * target);
}
}
TEST(EventRateMeter, ConstantRate)
{
double target = 100.0;
for (double period : {0.1, 1.0, 10.0})
{
for (double time_step : {0.001, 0.01, 0.1, 1.0})
{
DB::EventRateMeter erm(0.0, period);
int steps = static_cast<int>(period * 30 / time_step);
for (int i = 1; i <= steps; ++i)
erm.add(i * time_step, target * time_step);
double measured = erm.rate(steps * time_step);
// std::cout << "T=" << period << " dt=" << time_step << " measured=" << measured << std::endl;
ASSERT_LE(std::fabs(measured - target), 1e-5 * target);
}
}
}
TEST(EventRateMeter, PreciseStart)
{
double target = 100.0;
for (double period : {0.1, 1.0, 10.0})
{
for (double time_step : {0.001, 0.01, 0.1, 1.0})
{
DB::EventRateMeter erm(0.0, period);
int steps = static_cast<int>(period / time_step);
for (int i = 1; i <= steps; ++i)
{
erm.add(i * time_step, target * time_step);
double measured = erm.rate(i * time_step);
// std::cout << "T=" << period << " dt=" << time_step << " measured=" << measured << std::endl;
ASSERT_LE(std::fabs(measured - target), 1e-5 * target);
}
}
}
}

View File

@ -166,6 +166,7 @@ namespace DB
M(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `system.*_log` tables before the startup. It can be helpful if some startup scripts depend on these tables.", 0) \ M(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `system.*_log` tables before the startup. It can be helpful if some startup scripts depend on these tables.", 0) \
M(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ M(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \
M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \
M(Bool, disable_insertion_and_mutation, false, "Disable all insert/alter/delete queries. This setting will be enabled if someone needs read-only nodes to prevent insertion and mutation affect reading performance.", 0)
/// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp

View File

@ -936,6 +936,7 @@ class IColumn;
M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \ M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \
M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \ M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \
M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \ M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \
M(Bool, allow_archive_path_syntax, true, "File/S3 engines/table function will parse paths with '::' as '<archive> :: <file>' if archive has correct extension", 0) \
\ \
M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \ M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \
M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \ M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \

View File

@ -57,265 +57,448 @@ String ClickHouseVersion::toString() const
/// Note: please check if the key already exists to prevent duplicate entries. /// Note: please check if the key already exists to prevent duplicate entries.
static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory::SettingsChanges>> settings_changes_history_initializer = static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory::SettingsChanges>> settings_changes_history_initializer =
{ {
{"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, {"24.12",
{"output_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in RowBinaryWithNamesAndTypes output format"}, {
{"input_format_binary_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"}, }
{"output_format_native_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in Native output format"}, },
{"input_format_native_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in Native output format"}, {"24.11",
{"read_in_order_use_buffering", false, true, "Use buffering before merging while reading in order of primary key"}, {
{"enable_named_columns_in_function_tuple", false, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers."}, }
{"input_format_json_case_insensitive_column_matching", false, false, "Ignore case when matching JSON keys with CH columns."}, },
{"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."}, {"24.10",
{"dictionary_validate_primary_key_type", false, false, "Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64."}, {
{"collect_hash_table_stats_during_joins", false, true, "New setting."}, }
{"max_size_to_preallocate_for_joins", 0, 100'000'000, "New setting."}, },
{"input_format_orc_reader_time_zone_name", "GMT", "GMT", "The time zone name for ORC row reader, the default ORC row reader's time zone is GMT."}, {"24.9",
{"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."}, {
{"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"}, }
{"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"}, },
{"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"}, {"24.8",
{"azure_sdk_retry_initial_backoff_ms", 10, 10, "Minimal backoff between retries in azure sdk"}, {
{"azure_sdk_retry_max_backoff_ms", 1000, 1000, "Maximal backoff between retries in azure sdk"}, {"merge_tree_min_bytes_per_task_for_remote_reading", 4194304, 2097152, "Value is unified with `filesystem_prefetch_min_bytes_for_single_read_task`"},
{"merge_tree_min_bytes_per_task_for_remote_reading", 4194304, 2097152, "Value is unified with `filesystem_prefetch_min_bytes_for_single_read_task`"}, {"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."},
{"ignore_on_cluster_for_replicated_named_collections_queries", false, false, "Ignore ON CLUSTER clause for replicated named collections management queries."}, }
{"backup_restore_s3_retry_attempts", 1000,1000, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore."}, },
{"postgresql_connection_attempt_timeout", 2, 2, "Allow to control 'connect_timeout' parameter of PostgreSQL connection."}, {"24.7",
{"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."} {
}}, {"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."},
{"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"output_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in RowBinaryWithNamesAndTypes output format"},
{"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, {"input_format_binary_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"},
{"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, {"output_format_native_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in Native output format"},
{"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"}, {"input_format_native_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in Native output format"},
{"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"}, {"read_in_order_use_buffering", false, true, "Use buffering before merging while reading in order of primary key"},
{"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"}, {"enable_named_columns_in_function_tuple", false, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers."},
{"allow_experimental_full_text_index", false, false, "Enable experimental full-text index"}, {"input_format_json_case_insensitive_column_matching", false, false, "Ignore case when matching JSON keys with CH columns."},
{"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"}, {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
{"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"}, {"dictionary_validate_primary_key_type", false, false, "Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64."},
{"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"}, {"collect_hash_table_stats_during_joins", false, true, "New setting."},
{"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, {"max_size_to_preallocate_for_joins", 0, 100'000'000, "New setting."},
{"s3_max_part_number", 10000, 10000, "Maximum part number number for s3 upload part"}, {"input_format_orc_reader_time_zone_name", "GMT", "GMT", "The time zone name for ORC row reader, the default ORC row reader's time zone is GMT."}, {"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."},
{"s3_max_single_operation_copy_size", 32 * 1024 * 1024, 32 * 1024 * 1024, "Maximum size for a single copy operation in s3"}, {"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"},
{"input_format_parquet_max_block_size", 8192, DEFAULT_BLOCK_SIZE, "Increase block size for parquet reader."}, {"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"},
{"input_format_parquet_prefer_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader."}, {"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"},
{"enable_blob_storage_log", true, true, "Write information about blob storage operations to system.blob_storage_log table"}, {"azure_sdk_retry_initial_backoff_ms", 10, 10, "Minimal backoff between retries in azure sdk"},
{"allow_deprecated_snowflake_conversion_functions", true, false, "Disabled deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake."}, {"azure_sdk_retry_max_backoff_ms", 1000, 1000, "Maximal backoff between retries in azure sdk"},
{"allow_statistic_optimize", false, false, "Old setting which popped up here being renamed."}, {"ignore_on_cluster_for_replicated_named_collections_queries", false, false, "Ignore ON CLUSTER clause for replicated named collections management queries."},
{"allow_experimental_statistic", false, false, "Old setting which popped up here being renamed."}, {"backup_restore_s3_retry_attempts", 1000,1000, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore."},
{"allow_statistics_optimize", false, false, "The setting was renamed. The previous name is `allow_statistic_optimize`."}, {"postgresql_connection_attempt_timeout", 2, 2, "Allow to control 'connect_timeout' parameter of PostgreSQL connection."},
{"allow_experimental_statistics", false, false, "The setting was renamed. The previous name is `allow_experimental_statistic`."}, {"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."}
{"enable_vertical_final", false, true, "Enable vertical final by default again after fixing bug"}, }
{"parallel_replicas_custom_key_range_lower", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards"}, },
{"parallel_replicas_custom_key_range_upper", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards. A value of 0 disables the upper limit"}, {"24.6",
{"output_format_pretty_display_footer_column_names", 0, 1, "Add a setting to display column names in the footer if there are many rows. Threshold value is controlled by output_format_pretty_display_footer_column_names_min_rows."}, {
{"output_format_pretty_display_footer_column_names_min_rows", 0, 50, "Add a setting to control the threshold value for setting output_format_pretty_display_footer_column_names_min_rows. Default 50."}, {"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"},
{"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"},
{"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."}, {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
{"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."}, {"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"},
}}, {"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"},
{"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"}, {"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"},
{"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, {"allow_experimental_full_text_index", false, false, "Enable experimental full-text index"},
{"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}, {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"},
{"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."}, {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"},
{"cross_join_min_rows_to_compress", 0, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"},
{"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"},
{"http_max_chunk_size", 0, 0, "Internal limitation"}, {"s3_max_part_number", 10000, 10000, "Maximum part number number for s3 upload part"},
{"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, {"s3_max_single_operation_copy_size", 32 * 1024 * 1024, 32 * 1024 * 1024, "Maximum size for a single copy operation in s3"},
{"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, {"input_format_parquet_max_block_size", 8192, DEFAULT_BLOCK_SIZE, "Increase block size for parquet reader."},
{"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"}, {"input_format_parquet_prefer_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader."},
{"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"}, {"enable_blob_storage_log", true, true, "Write information about blob storage operations to system.blob_storage_log table"},
{"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."}, {"allow_deprecated_snowflake_conversion_functions", true, false, "Disabled deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake."},
}}, {"allow_statistic_optimize", false, false, "Old setting which popped up here being renamed."},
{"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, {"allow_experimental_statistic", false, false, "Old setting which popped up here being renamed."},
{"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, {"allow_statistics_optimize", false, false, "The setting was renamed. The previous name is `allow_statistic_optimize`."},
{"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"}, {"allow_experimental_statistics", false, false, "The setting was renamed. The previous name is `allow_experimental_statistic`."},
{"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, {"enable_vertical_final", false, true, "Enable vertical final by default again after fixing bug"},
{"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"}, {"parallel_replicas_custom_key_range_lower", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards"},
{"input_format_json_ignore_unnecessary_fields", false, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields"}, {"parallel_replicas_custom_key_range_upper", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards. A value of 0 disables the upper limit"},
{"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, {"output_format_pretty_display_footer_column_names", 0, 1, "Add a setting to display column names in the footer if there are many rows. Threshold value is controlled by output_format_pretty_display_footer_column_names_min_rows."},
{"allow_experimental_database_replicated", false, true, "Database engine Replicated is now in Beta stage"}, {"output_format_pretty_display_footer_column_names_min_rows", 0, 50, "Add a setting to control the threshold value for setting output_format_pretty_display_footer_column_names_min_rows. Default 50."},
{"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"}, {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."},
{"optimize_rewrite_sum_if_to_count_if", false, true, "Only available for the analyzer, where it works correctly"}, {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."},
{"azure_allow_parallel_part_upload", "true", "true", "Use multiple threads for azure multipart upload."}, {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."},
{"max_recursive_cte_evaluation_depth", DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, "Maximum limit on recursive CTE evaluation depth"}, }
{"query_plan_convert_outer_join_to_inner_join", false, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values"}, },
}}, {"24.5",
{"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, {
{"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, {"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"},
{"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."},
{"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"},
{"page_cache_inject_eviction", false, false, "Added userspace page cache"}, {"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."},
{"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"}, {"cross_join_min_rows_to_compress", 0, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."},
{"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"}, {"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."},
{"traverse_shadow_remote_data_paths", false, false, "Traverse shadow directory when query system.remote_data_paths."}, {"http_max_chunk_size", 0, 0, "Internal limitation"},
{"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication in dependent materialized view cannot work together with async inserts."}, {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."},
{"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"}, {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"},
{"log_processors_profiles", false, true, "Enable by default"}, {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"},
{"function_locate_has_mysql_compatible_argument_order", false, true, "Increase compatibility with MySQL's locate function."}, {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"},
{"allow_suspicious_primary_key", true, false, "Forbid suspicious PRIMARY KEY/ORDER BY for MergeTree (i.e. SimpleAggregateFunction)"}, {"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."},
{"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"}, {"allow_archive_path_syntax", false, true, "Added new setting to allow disabling archive path syntax."},
{"max_parser_backtracks", 0, 1000000, "Limiting the complexity of parsing"}, }
{"analyzer_compatibility_join_using_top_level_identifier", false, false, "Force to resolve identifier in JOIN USING from projection"}, },
{"distributed_insert_skip_read_only_replicas", false, false, "If true, INSERT into Distributed will skip read-only replicas"}, {"24.4",
{"keeper_max_retries", 10, 10, "Max retries for general keeper operations"}, {
{"keeper_retry_initial_backoff_ms", 100, 100, "Initial backoff timeout for general keeper operations"}, {"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"},
{"keeper_retry_max_backoff_ms", 5000, 5000, "Max backoff timeout for general keeper operations"}, {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"},
{"s3queue_allow_experimental_sharded_mode", false, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten"}, {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"},
{"allow_experimental_analyzer", false, true, "Enable analyzer and planner by default."}, {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"},
{"merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability", 0.0, 0.0, "For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability."}, {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"},
{"allow_get_client_http_header", false, false, "Introduced a new function."}, {"input_format_json_ignore_unnecessary_fields", false, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields"},
{"output_format_pretty_row_numbers", false, true, "It is better for usability."}, {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."},
{"output_format_pretty_max_value_width_apply_for_single_value", true, false, "Single values in Pretty formats won't be cut."}, {"allow_experimental_database_replicated", false, true, "Database engine Replicated is now in Beta stage"},
{"output_format_parquet_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, {"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"},
{"output_format_orc_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, {"optimize_rewrite_sum_if_to_count_if", false, true, "Only available for the analyzer, where it works correctly"},
{"output_format_arrow_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, {"azure_allow_parallel_part_upload", "true", "true", "Use multiple threads for azure multipart upload."},
{"output_format_parquet_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, {"max_recursive_cte_evaluation_depth", DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, "Maximum limit on recursive CTE evaluation depth"},
{"output_format_orc_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, {"query_plan_convert_outer_join_to_inner_join", false, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values"},
{"output_format_pretty_highlight_digit_groups", false, true, "If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline."}, }
{"geo_distance_returns_float64_on_float64_arguments", false, true, "Increase the default precision."}, },
{"azure_max_inflight_parts_for_one_file", 20, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited."}, {"24.3",
{"azure_strict_upload_part_size", 0, 0, "The exact size of part to upload during multipart upload to Azure blob storage."}, {
{"azure_min_upload_part_size", 16*1024*1024, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage."}, {"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"},
{"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."}, {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"},
{"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."}, {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"},
{"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."}, {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"},
{"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."}, {"page_cache_inject_eviction", false, false, "Added userspace page cache"},
{"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."}, {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"},
{"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."}, {"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"},
}}, {"traverse_shadow_remote_data_paths", false, false, "Traverse shadow directory when query system.remote_data_paths."},
{"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication in dependent materialized view cannot work together with async inserts."},
{"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, {"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"},
{"output_format_values_escape_quote_with_quote", false, false, "If true escape ' with '', otherwise quoted with \\'"}, {"log_processors_profiles", false, true, "Enable by default"},
{"output_format_pretty_single_large_number_tip_threshold", 0, 1'000'000, "Print a readable number tip on the right side of the table if the block consists of a single number which exceeds this value (except 0)"}, {"function_locate_has_mysql_compatible_argument_order", false, true, "Increase compatibility with MySQL's locate function."},
{"input_format_try_infer_exponent_floats", true, false, "Don't infer floats in exponential notation by default"}, {"allow_suspicious_primary_key", true, false, "Forbid suspicious PRIMARY KEY/ORDER BY for MergeTree (i.e. SimpleAggregateFunction)"},
{"query_plan_optimize_prewhere", true, true, "Allow to push down filter to PREWHERE expression for supported storages"}, {"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"},
{"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."}, {"max_parser_backtracks", 0, 1000000, "Limiting the complexity of parsing"},
{"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"}, {"analyzer_compatibility_join_using_top_level_identifier", false, false, "Force to resolve identifier in JOIN USING from projection"},
{"async_insert_use_adaptive_busy_timeout", false, true, "Use adaptive asynchronous insert timeout"}, {"distributed_insert_skip_read_only_replicas", false, false, "If true, INSERT into Distributed will skip read-only replicas"},
{"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"}, {"keeper_max_retries", 10, 10, "Max retries for general keeper operations"},
{"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"}, {"keeper_retry_initial_backoff_ms", 100, 100, "Initial backoff timeout for general keeper operations"},
{"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"}, {"keeper_retry_max_backoff_ms", 5000, 5000, "Max backoff timeout for general keeper operations"},
{"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"}, {"s3queue_allow_experimental_sharded_mode", false, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten"},
{"format_template_row_format", "", "", "Template row format string can be set directly in query"}, {"allow_experimental_analyzer", false, true, "Enable analyzer and planner by default."},
{"format_template_resultset_format", "", "", "Template result set format string can be set in query"}, {"merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability", 0.0, 0.0, "For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability."},
{"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, {"allow_get_client_http_header", false, false, "Introduced a new function."},
{"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}, {"output_format_pretty_row_numbers", false, true, "It is better for usability."},
{"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."}, {"output_format_pretty_max_value_width_apply_for_single_value", true, false, "Single values in Pretty formats won't be cut."},
{"min_external_table_block_size_rows", DEFAULT_INSERT_BLOCK_SIZE, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to external table to specified size in rows, if blocks are not big enough"}, {"output_format_parquet_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."},
{"min_external_table_block_size_bytes", DEFAULT_INSERT_BLOCK_SIZE * 256, DEFAULT_INSERT_BLOCK_SIZE * 256, "Squash blocks passed to external table to specified size in bytes, if blocks are not big enough."}, {"output_format_orc_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."},
{"parallel_replicas_prefer_local_join", true, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN."}, {"output_format_arrow_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."},
{"optimize_time_filter_with_preimage", true, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')"}, {"output_format_parquet_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."},
{"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, {"output_format_orc_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."},
{"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, {"output_format_pretty_highlight_digit_groups", false, true, "If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline."},
{"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, {"geo_distance_returns_float64_on_float64_arguments", false, true, "Increase the default precision."},
{"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, {"azure_max_inflight_parts_for_one_file", 20, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited."},
{"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, {"azure_strict_upload_part_size", 0, 0, "The exact size of part to upload during multipart upload to Azure blob storage."},
{"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, {"azure_min_upload_part_size", 16*1024*1024, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage."},
}}, {"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."},
{"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, {"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."},
{"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, {"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."},
{"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."},
{"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."},
{"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."},
{"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, }
{"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, },
{"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, {"24.2",
{"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, {
{"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, {"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"},
{"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"},
{"enable_vertical_final", false, true, "Use vertical final by default"}, {"output_format_values_escape_quote_with_quote", false, false, "If true escape ' with '', otherwise quoted with \\'"},
{"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, {"output_format_pretty_single_large_number_tip_threshold", 0, 1'000'000, "Print a readable number tip on the right side of the table if the block consists of a single number which exceeds this value (except 0)"},
{"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, {"input_format_try_infer_exponent_floats", true, false, "Don't infer floats in exponential notation by default"},
{"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, {"query_plan_optimize_prewhere", true, true, "Allow to push down filter to PREWHERE expression for supported storages"},
{"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, {"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."},
{"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, {"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"},
{"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, {"async_insert_use_adaptive_busy_timeout", false, true, "Use adaptive asynchronous insert timeout"},
{"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"},
{"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"},
{"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"},
{"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"},
{"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, {"format_template_row_format", "", "", "Template row format string can be set directly in query"},
{"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, {"format_template_resultset_format", "", "", "Template result set format string can be set in query"},
{"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, {"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"},
{"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"},
{"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, {"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."},
{"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, {"min_external_table_block_size_rows", DEFAULT_INSERT_BLOCK_SIZE, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to external table to specified size in rows, if blocks are not big enough"},
{"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, {"min_external_table_block_size_bytes", DEFAULT_INSERT_BLOCK_SIZE * 256, DEFAULT_INSERT_BLOCK_SIZE * 256, "Squash blocks passed to external table to specified size in bytes, if blocks are not big enough."},
{"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, {"parallel_replicas_prefer_local_join", true, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN."},
{"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, {"optimize_time_filter_with_preimage", true, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')"},
{"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."},
{"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"},
{"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"},
{"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"},
{"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."},
{"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."},
{"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, }
{"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, },
{"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, {"24.1",
{"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, {
{"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, {"print_pretty_type_names", false, true, "Better user experience."},
{"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"},
{"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"},
{"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"},
{"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"},
{"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"},
{"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}, {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"},
{"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}, {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"},
{"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"},
{"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"},
{"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"},
{"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, {"enable_vertical_final", false, true, "Use vertical final by default"},
{"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"},
{"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"},
{"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"},
{"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"},
{"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"},
{"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"},
{"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"},
{"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"},
{"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"},
{"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"},
{"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}
{"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, }
{"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, },
{"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, {"23.12",
{"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, {
{"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, {"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
{"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
{"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},
{"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}
{"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, }
{"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, },
{"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, {"23.11",
{"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, {
{"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, {"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}
{"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, }
{"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, },
{"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, {"23.9",
{"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, {
{"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, {"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"},
{"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"},
{"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"},
{"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"},
{"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"},
{"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"},
{"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}
{"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, }
{"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, },
{"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, {"23.8",
{"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, {
{"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, {"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}
{"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, }
{"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, },
{"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, {"23.7",
{"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, {
{"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, {"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}
{"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, }
{"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, },
{"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, {"23.6",
{"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, {
{"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, {"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."},
{"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, {"http_receive_timeout", 180, 30, "See http_send_timeout."}
{"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, }
},
{"23.5",
{
{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."},
{"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."},
{"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"},
{"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}
}
},
{"23.4",
{
{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"},
{"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."},
{"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"},
{"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"},
{"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"},
{"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"},
{"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}
}
},
{"23.3",
{
{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"},
{"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"},
{"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"},
{"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"},
{"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"},
{"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"},
{"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}
}
},
{"23.2",
{
{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"},
{"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"},
{"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"},
{"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"},
{"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}
}
},
{"23.1",
{
{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"},
{"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"},
{"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"},
{"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"},
{"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"},
{"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}
}
},
{"22.12",
{
{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"},
{"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"},
{"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}
}
},
{"22.11",
{
{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}
}
},
{"22.9",
{
{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}
}
},
{"22.7",
{
{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"},
{"enable_positional_arguments", false, true, "Enable positional arguments feature by default"},
{"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}
}
},
{"22.6",
{
{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"},
{"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}
}
},
{"22.5",
{
{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"},
{"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}
}
},
{"22.4",
{
{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}
}
},
{"22.3",
{
{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}
}
},
{"21.12",
{
{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}
}
},
{"21.9",
{
{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"},
{"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}
}
},
{"21.7",
{
{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}
}
},
{"21.5",
{
{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}
}
},
{"21.3",
{
{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"},
{"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"},
{"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}
}
},
{"21.2",
{
{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}
}
},
{"21.1",
{
{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"},
{"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"},
{"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"},
{"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}
}
},
{"20.10",
{
{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}
}
},
{"20.7",
{
{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}
}
},
{"20.5",
{
{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"},
{"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}
}
},
{"20.4",
{
{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}
}
},
{"19.18",
{
{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}
}
},
{"19.14",
{
{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}
}
},
{"19.12",
{
{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}
}
},
{"19.5",
{
{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}
}
},
{"18.12.17",
{
{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}
}
},
}; };

View File

@ -271,9 +271,12 @@ namespace
if (d != 0.0 && !std::isnormal(d)) if (d != 0.0 && !std::isnormal(d))
throw Exception( throw Exception(
ErrorCodes::CANNOT_PARSE_NUMBER, "A setting's value in seconds must be a normal floating point number or zero. Got {}", d); ErrorCodes::CANNOT_PARSE_NUMBER, "A setting's value in seconds must be a normal floating point number or zero. Got {}", d);
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wimplicit-const-int-float-conversion"
if (d * 1000000 > std::numeric_limits<Poco::Timespan::TimeDiff>::max() || d * 1000000 < std::numeric_limits<Poco::Timespan::TimeDiff>::min()) if (d * 1000000 > std::numeric_limits<Poco::Timespan::TimeDiff>::max() || d * 1000000 < std::numeric_limits<Poco::Timespan::TimeDiff>::min())
throw Exception( throw Exception(
ErrorCodes::BAD_ARGUMENTS, "Cannot convert seconds to microseconds: the setting's value in seconds is too big: {}", d); ErrorCodes::BAD_ARGUMENTS, "Cannot convert seconds to microseconds: the setting's value in seconds is too big: {}", d);
#pragma clang diagnostic pop
return static_cast<Poco::Timespan::TimeDiff>(d * 1000000); return static_cast<Poco::Timespan::TimeDiff>(d * 1000000);
} }

View File

@ -103,7 +103,15 @@ static std::string getSortDescriptionDump(const SortDescription & description, c
WriteBufferFromOwnString buffer; WriteBufferFromOwnString buffer;
for (size_t i = 0; i < description.size(); ++i) for (size_t i = 0; i < description.size(); ++i)
buffer << header_types[i]->getName() << ' ' << description[i].direction << ' ' << description[i].nulls_direction; {
if (i != 0)
buffer << ", ";
buffer << "(type: " << header_types[i]->getName()
<< ", direction: " << description[i].direction
<< ", nulls_direction: " << description[i].nulls_direction
<< ")";
}
return buffer.str(); return buffer.str();
} }

View File

@ -1,2 +1,2 @@
clickhouse_add_executable (names_and_types_fuzzer names_and_types_fuzzer.cpp) clickhouse_add_executable (names_and_types_fuzzer names_and_types_fuzzer.cpp)
target_link_libraries (names_and_types_fuzzer PRIVATE dbms) target_link_libraries (names_and_types_fuzzer PRIVATE dbms clickhouse_functions)

View File

@ -1,2 +1,2 @@
clickhouse_add_executable(data_type_deserialization_fuzzer data_type_deserialization_fuzzer.cpp ${SRCS}) clickhouse_add_executable(data_type_deserialization_fuzzer data_type_deserialization_fuzzer.cpp ${SRCS})
target_link_libraries(data_type_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions) target_link_libraries(data_type_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions clickhouse_functions)

View File

@ -12,35 +12,30 @@
#include <AggregateFunctions/registerAggregateFunctions.h> #include <AggregateFunctions/registerAggregateFunctions.h>
using namespace DB;
ContextMutablePtr context;
extern "C" int LLVMFuzzerInitialize(int *, char ***)
{
if (context)
return true;
SharedContextHolder shared_context = Context::createShared();
context = Context::createGlobal(shared_context.get());
context->makeGlobalContext();
MainThreadStatus::getInstance();
registerAggregateFunctions();
return 0;
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
{ {
try try
{ {
using namespace DB;
static SharedContextHolder shared_context;
static ContextMutablePtr context;
auto initialize = [&]() mutable
{
if (context)
return true;
shared_context = Context::createShared();
context = Context::createGlobal(shared_context.get());
context->makeGlobalContext();
context->setApplicationType(Context::ApplicationType::LOCAL);
MainThreadStatus::getInstance();
registerAggregateFunctions();
return true;
};
static bool initialized = initialize();
(void) initialized;
total_memory_tracker.resetCounters(); total_memory_tracker.resetCounters();
total_memory_tracker.setHardLimit(1_GiB); total_memory_tracker.setHardLimit(1_GiB);
CurrentThread::get().memory_tracker.resetCounters(); CurrentThread::get().memory_tracker.resetCounters();

View File

@ -44,7 +44,7 @@ namespace ErrorCodes
DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, ContextPtr context_) DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, ContextPtr context_)
: DatabaseOnDisk(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseLazy (" + name_ + ")", context_) : DatabaseOnDisk(name_, metadata_path_, std::filesystem::path("data") / escapeForFileName(name_) / "", "DatabaseLazy (" + name_ + ")", context_)
, expiration_time(expiration_time_) , expiration_time(expiration_time_)
{ {
} }

View File

@ -12,7 +12,7 @@ class DatabaseLazyIterator;
class Context; class Context;
/** Lazy engine of databases. /** Lazy engine of databases.
* Works like DatabaseOrdinary, but stores in memory only cache. * Works like DatabaseOrdinary, but stores in memory only the cache.
* Can be used only with *Log engines. * Can be used only with *Log engines.
*/ */
class DatabaseLazy final : public DatabaseOnDisk class DatabaseLazy final : public DatabaseOnDisk

View File

@ -313,7 +313,7 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri
std::lock_guard lock(mutex); std::lock_guard lock(mutex);
if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end()) if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end())
{ {
throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name); throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table `{}`", table_name);
} }
else else
{ {

View File

@ -16,6 +16,7 @@
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Interpreters/InterpreterCreateQuery.h> #include <Interpreters/InterpreterCreateQuery.h>
#include <Interpreters/FunctionNameNormalizer.h> #include <Interpreters/FunctionNameNormalizer.h>
#include <Interpreters/NormalizeSelectWithUnionQueryVisitor.h>
#include <Parsers/ASTCreateQuery.h> #include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTSetQuery.h> #include <Parsers/ASTSetQuery.h>
#include <Parsers/ParserCreateQuery.h> #include <Parsers/ParserCreateQuery.h>
@ -250,6 +251,8 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables
convertMergeTreeToReplicatedIfNeeded(ast, qualified_name, file_name); convertMergeTreeToReplicatedIfNeeded(ast, qualified_name, file_name);
NormalizeSelectWithUnionQueryVisitor::Data data{local_context->getSettingsRef().union_default_mode};
NormalizeSelectWithUnionQueryVisitor{data}.visit(ast);
std::lock_guard lock{metadata.mutex}; std::lock_guard lock{metadata.mutex};
metadata.parsed_tables[qualified_name] = ParsedTableMetadata{full_path.string(), ast}; metadata.parsed_tables[qualified_name] = ParsedTableMetadata{full_path.string(), ast};
metadata.total_dictionaries += create_query->is_dictionary; metadata.total_dictionaries += create_query->is_dictionary;

View File

@ -1,3 +1,4 @@
#include <optional>
#include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h> #include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
#include "Common/Exception.h" #include "Common/Exception.h"
@ -117,7 +118,8 @@ AzureObjectStorage::AzureObjectStorage(
{ {
} }
ObjectStorageKey AzureObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const ObjectStorageKey
AzureObjectStorage::generateObjectKeyForPath(const std::string & /* path */, const std::optional<std::string> & /* key_prefix */) const
{ {
return ObjectStorageKey::createAsRelative(getRandomASCIIString(32)); return ObjectStorageKey::createAsRelative(getRandomASCIIString(32));
} }

View File

@ -101,7 +101,7 @@ public:
const std::string & config_prefix, const std::string & config_prefix,
ContextPtr context) override; ContextPtr context) override;
ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override; ObjectStorageKey generateObjectKeyForPath(const std::string & path, const std::optional<std::string> & key_prefix) const override;
bool isRemote() const override { return true; } bool isRemote() const override { return true; }

View File

@ -34,14 +34,16 @@ FileCache::Key CachedObjectStorage::getCacheKey(const std::string & path) const
return cache->createKeyForPath(path); return cache->createKeyForPath(path);
} }
ObjectStorageKey CachedObjectStorage::generateObjectKeyForPath(const std::string & path) const ObjectStorageKey
CachedObjectStorage::generateObjectKeyForPath(const std::string & path, const std::optional<std::string> & key_prefix) const
{ {
return object_storage->generateObjectKeyForPath(path); return object_storage->generateObjectKeyForPath(path, key_prefix);
} }
ObjectStorageKey CachedObjectStorage::generateObjectKeyPrefixForDirectoryPath(const std::string & path) const ObjectStorageKey
CachedObjectStorage::generateObjectKeyPrefixForDirectoryPath(const std::string & path, const std::optional<std::string> & key_prefix) const
{ {
return object_storage->generateObjectKeyPrefixForDirectoryPath(path); return object_storage->generateObjectKeyPrefixForDirectoryPath(path, key_prefix);
} }
ReadSettings CachedObjectStorage::patchSettings(const ReadSettings & read_settings) const ReadSettings CachedObjectStorage::patchSettings(const ReadSettings & read_settings) const

View File

@ -98,9 +98,10 @@ public:
const std::string & getCacheName() const override { return cache_config_name; } const std::string & getCacheName() const override { return cache_config_name; }
ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override; ObjectStorageKey generateObjectKeyForPath(const std::string & path, const std::optional<std::string> & key_prefix) const override;
ObjectStorageKey generateObjectKeyPrefixForDirectoryPath(const std::string & path) const override; ObjectStorageKey
generateObjectKeyPrefixForDirectoryPath(const std::string & path, const std::optional<std::string> & key_prefix) const override;
void setKeysGenerator(ObjectStorageKeysGeneratorPtr gen) override { object_storage->setKeysGenerator(gen); } void setKeysGenerator(ObjectStorageKeysGeneratorPtr gen) override { object_storage->setKeysGenerator(gen); }

View File

@ -1,5 +1,7 @@
#include "CommonPathPrefixKeyGenerator.h" #include <Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h>
#include <Disks/ObjectStorages/InMemoryPathMap.h>
#include <Common/SharedLockGuard.h>
#include <Common/getRandomASCIIString.h> #include <Common/getRandomASCIIString.h>
#include <deque> #include <deque>
@ -9,21 +11,22 @@
namespace DB namespace DB
{ {
CommonPathPrefixKeyGenerator::CommonPathPrefixKeyGenerator( CommonPathPrefixKeyGenerator::CommonPathPrefixKeyGenerator(String key_prefix_, std::weak_ptr<InMemoryPathMap> path_map_)
String key_prefix_, SharedMutex & shared_mutex_, std::weak_ptr<PathMap> path_map_) : storage_key_prefix(key_prefix_), path_map(std::move(path_map_))
: storage_key_prefix(key_prefix_), shared_mutex(shared_mutex_), path_map(std::move(path_map_))
{ {
} }
ObjectStorageKey CommonPathPrefixKeyGenerator::generate(const String & path, bool is_directory) const ObjectStorageKey
CommonPathPrefixKeyGenerator::generate(const String & path, bool is_directory, const std::optional<String> & key_prefix) const
{ {
const auto & [object_key_prefix, suffix_parts] = getLongestObjectKeyPrefix(path); const auto & [object_key_prefix, suffix_parts]
= getLongestObjectKeyPrefix(is_directory ? std::filesystem::path(path).parent_path().string() : path);
auto key = std::filesystem::path(object_key_prefix.empty() ? storage_key_prefix : object_key_prefix); auto key = std::filesystem::path(object_key_prefix);
/// The longest prefix is the same as path, meaning that the path is already mapped. /// The longest prefix is the same as path, meaning that the path is already mapped.
if (suffix_parts.empty()) if (suffix_parts.empty())
return ObjectStorageKey::createAsRelative(std::move(key)); return ObjectStorageKey::createAsRelative(key_prefix.has_value() ? *key_prefix : storage_key_prefix, std::move(key));
/// File and top-level directory paths are mapped as is. /// File and top-level directory paths are mapped as is.
if (!is_directory || object_key_prefix.empty()) if (!is_directory || object_key_prefix.empty())
@ -39,7 +42,7 @@ ObjectStorageKey CommonPathPrefixKeyGenerator::generate(const String & path, boo
key /= getRandomASCIIString(part_size); key /= getRandomASCIIString(part_size);
} }
return ObjectStorageKey::createAsRelative(key); return ObjectStorageKey::createAsRelative(key_prefix.has_value() ? *key_prefix : storage_key_prefix, key);
} }
std::tuple<std::string, std::vector<std::string>> CommonPathPrefixKeyGenerator::getLongestObjectKeyPrefix(const std::string & path) const std::tuple<std::string, std::vector<std::string>> CommonPathPrefixKeyGenerator::getLongestObjectKeyPrefix(const std::string & path) const
@ -47,14 +50,13 @@ std::tuple<std::string, std::vector<std::string>> CommonPathPrefixKeyGenerator::
std::filesystem::path p(path); std::filesystem::path p(path);
std::deque<std::string> dq; std::deque<std::string> dq;
std::shared_lock lock(shared_mutex); const auto ptr = path_map.lock();
SharedLockGuard lock(ptr->mutex);
auto ptr = path_map.lock();
while (p != p.root_path()) while (p != p.root_path())
{ {
auto it = ptr->find(p / ""); auto it = ptr->map.find(p);
if (it != ptr->end()) if (it != ptr->map.end())
{ {
std::vector<std::string> vec(std::make_move_iterator(dq.begin()), std::make_move_iterator(dq.end())); std::vector<std::string> vec(std::make_move_iterator(dq.begin()), std::make_move_iterator(dq.end()));
return std::make_tuple(it->second, std::move(vec)); return std::make_tuple(it->second, std::move(vec));

View File

@ -1,14 +1,15 @@
#pragma once #pragma once
#include <Common/ObjectStorageKeyGenerator.h> #include <Common/ObjectStorageKeyGenerator.h>
#include <Common/SharedMutex.h>
#include <filesystem> #include <filesystem>
#include <map> #include <map>
#include <optional>
namespace DB namespace DB
{ {
/// Deprecated. Used for backward compatibility with plain rewritable disks without a separate metadata layout.
/// Object storage key generator used specifically with the /// Object storage key generator used specifically with the
/// MetadataStorageFromPlainObjectStorage if multiple writes are allowed. /// MetadataStorageFromPlainObjectStorage if multiple writes are allowed.
@ -18,15 +19,16 @@ namespace DB
/// ///
/// The key generator ensures that the original directory hierarchy is /// The key generator ensures that the original directory hierarchy is
/// preserved, which is required for the MergeTree family. /// preserved, which is required for the MergeTree family.
struct InMemoryPathMap;
class CommonPathPrefixKeyGenerator : public IObjectStorageKeysGenerator class CommonPathPrefixKeyGenerator : public IObjectStorageKeysGenerator
{ {
public: public:
/// Local to remote path map. Leverages filesystem::path comparator for paths. /// Local to remote path map. Leverages filesystem::path comparator for paths.
using PathMap = std::map<std::filesystem::path, std::string>;
explicit CommonPathPrefixKeyGenerator(String key_prefix_, SharedMutex & shared_mutex_, std::weak_ptr<PathMap> path_map_); explicit CommonPathPrefixKeyGenerator(String key_prefix_, std::weak_ptr<InMemoryPathMap> path_map_);
ObjectStorageKey generate(const String & path, bool is_directory) const override; ObjectStorageKey generate(const String & path, bool is_directory, const std::optional<String> & key_prefix) const override;
private: private:
/// Longest key prefix and unresolved parts of the source path. /// Longest key prefix and unresolved parts of the source path.
@ -34,8 +36,7 @@ private:
const String storage_key_prefix; const String storage_key_prefix;
SharedMutex & shared_mutex; std::weak_ptr<InMemoryPathMap> path_map;
std::weak_ptr<PathMap> path_map;
}; };
} }

View File

@ -537,7 +537,7 @@ struct CopyFileObjectStorageOperation final : public IDiskObjectStorageOperation
for (const auto & object_from : source_blobs) for (const auto & object_from : source_blobs)
{ {
auto object_key = destination_object_storage.generateObjectKeyForPath(to_path); auto object_key = destination_object_storage.generateObjectKeyForPath(to_path, std::nullopt /* key_prefix */);
auto object_to = StoredObject(object_key.serialize()); auto object_to = StoredObject(object_key.serialize());
object_storage.copyObjectToAnotherObjectStorage(object_from, object_to,read_settings,write_settings, destination_object_storage); object_storage.copyObjectToAnotherObjectStorage(object_from, object_to,read_settings,write_settings, destination_object_storage);
@ -738,7 +738,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorageTransaction::writeFile
const WriteSettings & settings, const WriteSettings & settings,
bool autocommit) bool autocommit)
{ {
auto object_key = object_storage.generateObjectKeyForPath(path); auto object_key = object_storage.generateObjectKeyForPath(path, std::nullopt /* key_prefix */);
std::optional<ObjectAttributes> object_attributes; std::optional<ObjectAttributes> object_attributes;
if (metadata_helper) if (metadata_helper)
@ -835,7 +835,7 @@ void DiskObjectStorageTransaction::writeFileUsingBlobWritingFunction(
const String & path, WriteMode mode, WriteBlobFunction && write_blob_function) const String & path, WriteMode mode, WriteBlobFunction && write_blob_function)
{ {
/// This function is a simplified and adapted version of DiskObjectStorageTransaction::writeFile(). /// This function is a simplified and adapted version of DiskObjectStorageTransaction::writeFile().
auto object_key = object_storage.generateObjectKeyForPath(path); auto object_key = object_storage.generateObjectKeyForPath(path, std::nullopt /* key_prefix */);
std::optional<ObjectAttributes> object_attributes; std::optional<ObjectAttributes> object_attributes;
if (metadata_helper) if (metadata_helper)

View File

@ -0,0 +1,51 @@
#include "FlatDirectoryStructureKeyGenerator.h"
#include <Disks/ObjectStorages/InMemoryPathMap.h>
#include "Common/ObjectStorageKey.h"
#include <Common/SharedLockGuard.h>
#include <Common/SharedMutex.h>
#include <Common/getRandomASCIIString.h>
#include <optional>
#include <shared_mutex>
#include <string>
namespace DB
{
FlatDirectoryStructureKeyGenerator::FlatDirectoryStructureKeyGenerator(String storage_key_prefix_, std::weak_ptr<InMemoryPathMap> path_map_)
: storage_key_prefix(storage_key_prefix_), path_map(std::move(path_map_))
{
}
ObjectStorageKey FlatDirectoryStructureKeyGenerator::generate(const String & path, bool is_directory, const std::optional<String> & key_prefix) const
{
if (is_directory)
chassert(path.ends_with('/'));
const auto p = std::filesystem::path(path);
auto directory = p.parent_path();
std::optional<std::filesystem::path> remote_path;
{
const auto ptr = path_map.lock();
SharedLockGuard lock(ptr->mutex);
auto it = ptr->map.find(p);
if (it != ptr->map.end())
return ObjectStorageKey::createAsRelative(key_prefix.has_value() ? *key_prefix : storage_key_prefix, it->second);
it = ptr->map.find(directory);
if (it != ptr->map.end())
remote_path = it->second;
}
constexpr size_t part_size = 32;
std::filesystem::path key = remote_path.has_value() ? *remote_path
: is_directory ? std::filesystem::path(getRandomASCIIString(part_size))
: directory;
if (!is_directory)
key /= p.filename();
return ObjectStorageKey::createAsRelative(key_prefix.has_value() ? *key_prefix : storage_key_prefix, key);
}
}

View File

@ -0,0 +1,23 @@
#pragma once
#include <Common/ObjectStorageKeyGenerator.h>
#include <memory>
namespace DB
{
struct InMemoryPathMap;
class FlatDirectoryStructureKeyGenerator : public IObjectStorageKeysGenerator
{
public:
explicit FlatDirectoryStructureKeyGenerator(String storage_key_prefix_, std::weak_ptr<InMemoryPathMap> path_map_);
ObjectStorageKey generate(const String & path, bool is_directory, const std::optional<String> & key_prefix) const override;
private:
const String storage_key_prefix;
std::weak_ptr<InMemoryPathMap> path_map;
};
}

View File

@ -4,8 +4,8 @@
#include <Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.h> #include <Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.h>
#include <Storages/ObjectStorage/HDFS/HDFSCommon.h> #include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
#include <Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h>
#include <Disks/IO/ReadBufferFromRemoteFSGather.h> #include <Disks/IO/ReadBufferFromRemoteFSGather.h>
#include <Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h>
#include <Common/getRandomASCIIString.h> #include <Common/getRandomASCIIString.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
@ -53,7 +53,8 @@ std::string HDFSObjectStorage::extractObjectKeyFromURL(const StoredObject & obje
return path; return path;
} }
ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const ObjectStorageKey
HDFSObjectStorage::generateObjectKeyForPath(const std::string & /* path */, const std::optional<std::string> & /* key_prefix */) const
{ {
initializeHDFSFS(); initializeHDFSFS();
/// what ever data_source_description.description value is, consider that key as relative key /// what ever data_source_description.description value is, consider that key as relative key

View File

@ -111,7 +111,7 @@ public:
const std::string & config_prefix, const std::string & config_prefix,
ContextPtr context) override; ContextPtr context) override;
ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override; ObjectStorageKey generateObjectKeyForPath(const std::string & path, const std::optional<std::string> & key_prefix) const override;
bool isRemote() const override { return true; } bool isRemote() const override { return true; }

View File

@ -232,10 +232,11 @@ public:
/// Generate blob name for passed absolute local path. /// Generate blob name for passed absolute local path.
/// Path can be generated either independently or based on `path`. /// Path can be generated either independently or based on `path`.
virtual ObjectStorageKey generateObjectKeyForPath(const std::string & path) const = 0; virtual ObjectStorageKey generateObjectKeyForPath(const std::string & path, const std::optional<std::string> & key_prefix) const = 0;
/// Object key prefix for local paths in the directory 'path'. /// Object key prefix for local paths in the directory 'path'.
virtual ObjectStorageKey generateObjectKeyPrefixForDirectoryPath(const std::string & /* path */) const virtual ObjectStorageKey
generateObjectKeyPrefixForDirectoryPath(const std::string & /* path */, const std::optional<std::string> & /* key_prefix */) const
{ {
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'generateObjectKeyPrefixForDirectoryPath' is not implemented"); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'generateObjectKeyPrefixForDirectoryPath' is not implemented");
} }

View File

@ -0,0 +1,37 @@
#pragma once
#include <filesystem>
#include <map>
#include <base/defines.h>
#include <Common/SharedMutex.h>
namespace DB
{
struct InMemoryPathMap
{
struct PathComparator
{
bool operator()(const std::filesystem::path & path1, const std::filesystem::path & path2) const
{
auto d1 = std::distance(path1.begin(), path1.end());
auto d2 = std::distance(path2.begin(), path2.end());
if (d1 != d2)
return d1 < d2;
return path1 < path2;
}
};
/// Local -> Remote path.
using Map = std::map<std::filesystem::path, std::string, PathComparator>;
mutable SharedMutex mutex;
#ifdef OS_LINUX
Map TSA_GUARDED_BY(mutex) map;
/// std::shared_mutex may not be annotated with the 'capability' attribute in libcxx.
#else
Map map;
#endif
};
}

View File

@ -1,15 +1,15 @@
#include <Disks/ObjectStorages/Local/LocalObjectStorage.h> #include <Disks/ObjectStorages/Local/LocalObjectStorage.h>
#include <Interpreters/Context.h> #include <filesystem>
#include <Common/filesystemHelpers.h> #include <Disks/IO/AsynchronousBoundedReadBuffer.h>
#include <Common/logger_useful.h>
#include <Disks/IO/ReadBufferFromRemoteFSGather.h> #include <Disks/IO/ReadBufferFromRemoteFSGather.h>
#include <Disks/IO/createReadBufferFromFileBase.h> #include <Disks/IO/createReadBufferFromFileBase.h>
#include <Disks/IO/AsynchronousBoundedReadBuffer.h>
#include <IO/WriteBufferFromFile.h> #include <IO/WriteBufferFromFile.h>
#include <IO/copyData.h> #include <IO/copyData.h>
#include <Interpreters/Context.h>
#include <Common/filesystemHelpers.h>
#include <Common/getRandomASCIIString.h> #include <Common/getRandomASCIIString.h>
#include <filesystem> #include <Common/logger_useful.h>
namespace fs = std::filesystem; namespace fs = std::filesystem;
@ -222,7 +222,8 @@ std::unique_ptr<IObjectStorage> LocalObjectStorage::cloneObjectStorage(
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "cloneObjectStorage() is not implemented for LocalObjectStorage"); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "cloneObjectStorage() is not implemented for LocalObjectStorage");
} }
ObjectStorageKey LocalObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const ObjectStorageKey
LocalObjectStorage::generateObjectKeyForPath(const std::string & /* path */, const std::optional<std::string> & /* key_prefix */) const
{ {
constexpr size_t key_name_total_size = 32; constexpr size_t key_name_total_size = 32;
return ObjectStorageKey::createAsRelative(key_prefix, getRandomASCIIString(key_name_total_size)); return ObjectStorageKey::createAsRelative(key_prefix, getRandomASCIIString(key_name_total_size));

View File

@ -81,7 +81,7 @@ public:
const std::string & config_prefix, const std::string & config_prefix,
ContextPtr context) override; ContextPtr context) override;
ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override; ObjectStorageKey generateObjectKeyForPath(const std::string & path, const std::optional<std::string> & key_prefix) const override;
bool isRemote() const override { return false; } bool isRemote() const override { return false; }

View File

@ -1,5 +1,6 @@
#include "MetadataStorageFromPlainObjectStorage.h" #include "MetadataStorageFromPlainObjectStorage.h"
#include <Disks/IDisk.h> #include <Disks/IDisk.h>
#include <Disks/ObjectStorages/InMemoryPathMap.h>
#include <Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h> #include <Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h>
#include <Disks/ObjectStorages/StaticDirectoryIterator.h> #include <Disks/ObjectStorages/StaticDirectoryIterator.h>
@ -7,6 +8,7 @@
#include <filesystem> #include <filesystem>
#include <tuple> #include <tuple>
#include <unordered_set>
namespace DB namespace DB
{ {
@ -41,7 +43,7 @@ bool MetadataStorageFromPlainObjectStorage::exists(const std::string & path) con
{ {
/// NOTE: exists() cannot be used here since it works only for existing /// NOTE: exists() cannot be used here since it works only for existing
/// key, and does not work for some intermediate path. /// key, and does not work for some intermediate path.
auto object_key = object_storage->generateObjectKeyForPath(path); auto object_key = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */);
return object_storage->existsOrHasAnyChild(object_key.serialize()); return object_storage->existsOrHasAnyChild(object_key.serialize());
} }
@ -53,7 +55,7 @@ bool MetadataStorageFromPlainObjectStorage::isFile(const std::string & path) con
bool MetadataStorageFromPlainObjectStorage::isDirectory(const std::string & path) const bool MetadataStorageFromPlainObjectStorage::isDirectory(const std::string & path) const
{ {
auto key_prefix = object_storage->generateObjectKeyForPath(path).serialize(); auto key_prefix = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */).serialize();
auto directory = std::filesystem::path(std::move(key_prefix)) / ""; auto directory = std::filesystem::path(std::move(key_prefix)) / "";
return object_storage->existsOrHasAnyChild(directory); return object_storage->existsOrHasAnyChild(directory);
@ -61,7 +63,7 @@ bool MetadataStorageFromPlainObjectStorage::isDirectory(const std::string & path
uint64_t MetadataStorageFromPlainObjectStorage::getFileSize(const String & path) const uint64_t MetadataStorageFromPlainObjectStorage::getFileSize(const String & path) const
{ {
auto object_key = object_storage->generateObjectKeyForPath(path); auto object_key = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */);
auto metadata = object_storage->tryGetObjectMetadata(object_key.serialize()); auto metadata = object_storage->tryGetObjectMetadata(object_key.serialize());
if (metadata) if (metadata)
return metadata->size_bytes; return metadata->size_bytes;
@ -70,7 +72,7 @@ uint64_t MetadataStorageFromPlainObjectStorage::getFileSize(const String & path)
std::vector<std::string> MetadataStorageFromPlainObjectStorage::listDirectory(const std::string & path) const std::vector<std::string> MetadataStorageFromPlainObjectStorage::listDirectory(const std::string & path) const
{ {
auto key_prefix = object_storage->generateObjectKeyForPath(path).serialize(); auto key_prefix = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */).serialize();
RelativePathsWithMetadata files; RelativePathsWithMetadata files;
std::string abs_key = key_prefix; std::string abs_key = key_prefix;
@ -79,14 +81,27 @@ std::vector<std::string> MetadataStorageFromPlainObjectStorage::listDirectory(co
object_storage->listObjects(abs_key, files, 0); object_storage->listObjects(abs_key, files, 0);
return getDirectChildrenOnDisk(abs_key, files, path); std::unordered_set<std::string> result;
for (const auto & elem : files)
{
const auto & p = elem->relative_path;
chassert(p.find(abs_key) == 0);
const auto child_pos = abs_key.size();
/// string::npos is ok.
const auto slash_pos = p.find('/', child_pos);
if (slash_pos == std::string::npos)
result.emplace(p.substr(child_pos));
else
result.emplace(p.substr(child_pos, slash_pos - child_pos));
}
return std::vector<std::string>(std::make_move_iterator(result.begin()), std::make_move_iterator(result.end()));
} }
DirectoryIteratorPtr MetadataStorageFromPlainObjectStorage::iterateDirectory(const std::string & path) const DirectoryIteratorPtr MetadataStorageFromPlainObjectStorage::iterateDirectory(const std::string & path) const
{ {
/// Required for MergeTree /// Required for MergeTree
auto paths = listDirectory(path); auto paths = listDirectory(path);
// Prepend path, since iterateDirectory() includes path, unlike listDirectory() /// Prepend path, since iterateDirectory() includes path, unlike listDirectory()
std::for_each(paths.begin(), paths.end(), [&](auto & child) { child = fs::path(path) / child; }); std::for_each(paths.begin(), paths.end(), [&](auto & child) { child = fs::path(path) / child; });
std::vector<std::filesystem::path> fs_paths(paths.begin(), paths.end()); std::vector<std::filesystem::path> fs_paths(paths.begin(), paths.end());
return std::make_unique<StaticDirectoryIterator>(std::move(fs_paths)); return std::make_unique<StaticDirectoryIterator>(std::move(fs_paths));
@ -95,29 +110,10 @@ DirectoryIteratorPtr MetadataStorageFromPlainObjectStorage::iterateDirectory(con
StoredObjects MetadataStorageFromPlainObjectStorage::getStorageObjects(const std::string & path) const StoredObjects MetadataStorageFromPlainObjectStorage::getStorageObjects(const std::string & path) const
{ {
size_t object_size = getFileSize(path); size_t object_size = getFileSize(path);
auto object_key = object_storage->generateObjectKeyForPath(path); auto object_key = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */);
return {StoredObject(object_key.serialize(), path, object_size)}; return {StoredObject(object_key.serialize(), path, object_size)};
} }
std::vector<std::string> MetadataStorageFromPlainObjectStorage::getDirectChildrenOnDisk(
const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & /* local_path */) const
{
std::unordered_set<std::string> duplicates_filter;
for (const auto & elem : remote_paths)
{
const auto & path = elem->relative_path;
chassert(path.find(storage_key) == 0);
const auto child_pos = storage_key.size();
/// string::npos is ok.
const auto slash_pos = path.find('/', child_pos);
if (slash_pos == std::string::npos)
duplicates_filter.emplace(path.substr(child_pos));
else
duplicates_filter.emplace(path.substr(child_pos, slash_pos - child_pos));
}
return std::vector<std::string>(std::make_move_iterator(duplicates_filter.begin()), std::make_move_iterator(duplicates_filter.end()));
}
const IMetadataStorage & MetadataStorageFromPlainObjectStorageTransaction::getStorageForNonTransactionalReads() const const IMetadataStorage & MetadataStorageFromPlainObjectStorageTransaction::getStorageForNonTransactionalReads() const
{ {
return metadata_storage; return metadata_storage;
@ -125,7 +121,7 @@ const IMetadataStorage & MetadataStorageFromPlainObjectStorageTransaction::getSt
void MetadataStorageFromPlainObjectStorageTransaction::unlinkFile(const std::string & path) void MetadataStorageFromPlainObjectStorageTransaction::unlinkFile(const std::string & path)
{ {
auto object_key = metadata_storage.object_storage->generateObjectKeyForPath(path); auto object_key = metadata_storage.object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */);
auto object = StoredObject(object_key.serialize()); auto object = StoredObject(object_key.serialize());
metadata_storage.object_storage->removeObject(object); metadata_storage.object_storage->removeObject(object);
} }
@ -140,7 +136,7 @@ void MetadataStorageFromPlainObjectStorageTransaction::removeDirectory(const std
else else
{ {
addOperation(std::make_unique<MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation>( addOperation(std::make_unique<MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation>(
normalizeDirectoryPath(path), *metadata_storage.getPathMap(), object_storage)); normalizeDirectoryPath(path), *metadata_storage.getPathMap(), object_storage, metadata_storage.getMetadataKeyPrefix()));
} }
} }
@ -150,9 +146,11 @@ void MetadataStorageFromPlainObjectStorageTransaction::createDirectory(const std
return; return;
auto normalized_path = normalizeDirectoryPath(path); auto normalized_path = normalizeDirectoryPath(path);
auto key_prefix = object_storage->generateObjectKeyPrefixForDirectoryPath(normalized_path).serialize();
auto op = std::make_unique<MetadataStorageFromPlainObjectStorageCreateDirectoryOperation>( auto op = std::make_unique<MetadataStorageFromPlainObjectStorageCreateDirectoryOperation>(
std::move(normalized_path), std::move(key_prefix), *metadata_storage.getPathMap(), object_storage); std::move(normalized_path),
*metadata_storage.getPathMap(),
object_storage,
metadata_storage.getMetadataKeyPrefix());
addOperation(std::move(op)); addOperation(std::move(op));
} }
@ -167,7 +165,11 @@ void MetadataStorageFromPlainObjectStorageTransaction::moveDirectory(const std::
throwNotImplemented(); throwNotImplemented();
addOperation(std::make_unique<MetadataStorageFromPlainObjectStorageMoveDirectoryOperation>( addOperation(std::make_unique<MetadataStorageFromPlainObjectStorageMoveDirectoryOperation>(
normalizeDirectoryPath(path_from), normalizeDirectoryPath(path_to), *metadata_storage.getPathMap(), object_storage)); normalizeDirectoryPath(path_from),
normalizeDirectoryPath(path_to),
*metadata_storage.getPathMap(),
object_storage,
metadata_storage.getMetadataKeyPrefix()));
} }
void MetadataStorageFromPlainObjectStorageTransaction::addBlobToMetadata( void MetadataStorageFromPlainObjectStorageTransaction::addBlobToMetadata(

View File

@ -2,14 +2,18 @@
#include <Disks/IDisk.h> #include <Disks/IDisk.h>
#include <Disks/ObjectStorages/IMetadataStorage.h> #include <Disks/ObjectStorages/IMetadataStorage.h>
#include <Disks/ObjectStorages/InMemoryPathMap.h>
#include <Disks/ObjectStorages/MetadataOperationsHolder.h> #include <Disks/ObjectStorages/MetadataOperationsHolder.h>
#include <Disks/ObjectStorages/MetadataStorageTransactionState.h> #include <Disks/ObjectStorages/MetadataStorageTransactionState.h>
#include <map> #include <map>
#include <string>
#include <unordered_set>
namespace DB namespace DB
{ {
struct InMemoryPathMap;
struct UnlinkMetadataFileOperationOutcome; struct UnlinkMetadataFileOperationOutcome;
using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr<UnlinkMetadataFileOperationOutcome>; using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr<UnlinkMetadataFileOperationOutcome>;
@ -25,10 +29,6 @@ using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr<UnlinkMetadataFile
/// structure as on disk MergeTree, and does not require metadata from a local disk to restore. /// structure as on disk MergeTree, and does not require metadata from a local disk to restore.
class MetadataStorageFromPlainObjectStorage : public IMetadataStorage class MetadataStorageFromPlainObjectStorage : public IMetadataStorage
{ {
public:
/// Local path prefixes mapped to storage key prefixes.
using PathMap = std::map<std::filesystem::path, std::string>;
private: private:
friend class MetadataStorageFromPlainObjectStorageTransaction; friend class MetadataStorageFromPlainObjectStorageTransaction;
@ -78,10 +78,11 @@ public:
bool supportsStat() const override { return false; } bool supportsStat() const override { return false; }
protected: protected:
virtual std::shared_ptr<PathMap> getPathMap() const { throwNotImplemented(); } /// Get the object storage prefix for storing metadata files.
virtual std::string getMetadataKeyPrefix() const { return object_storage->getCommonKeyPrefix(); }
virtual std::vector<std::string> getDirectChildrenOnDisk( /// Returns a map of virtual filesystem paths to paths in the object storage.
const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & local_path) const; virtual std::shared_ptr<InMemoryPathMap> getPathMap() const { throwNotImplemented(); }
}; };
class MetadataStorageFromPlainObjectStorageTransaction final : public IMetadataTransaction, private MetadataOperationsHolder class MetadataStorageFromPlainObjectStorageTransaction final : public IMetadataTransaction, private MetadataOperationsHolder

View File

@ -1,8 +1,10 @@
#include "MetadataStorageFromPlainObjectStorageOperations.h" #include "MetadataStorageFromPlainObjectStorageOperations.h"
#include <Disks/ObjectStorages/InMemoryPathMap.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Common/SharedLockGuard.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
namespace DB namespace DB
@ -20,29 +22,45 @@ namespace
constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path"; constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path";
ObjectStorageKey createMetadataObjectKey(const std::string & object_key_prefix, const std::string & metadata_key_prefix)
{
auto prefix = std::filesystem::path(metadata_key_prefix) / object_key_prefix;
return ObjectStorageKey::createAsRelative(prefix.string(), PREFIX_PATH_FILE_NAME);
}
} }
MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::MetadataStorageFromPlainObjectStorageCreateDirectoryOperation( MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::MetadataStorageFromPlainObjectStorageCreateDirectoryOperation(
std::filesystem::path && path_, std::filesystem::path && path_, InMemoryPathMap & path_map_, ObjectStoragePtr object_storage_, const std::string & metadata_key_prefix_)
std::string && key_prefix_, : path(std::move(path_))
MetadataStorageFromPlainObjectStorage::PathMap & path_map_, , path_map(path_map_)
ObjectStoragePtr object_storage_) , object_storage(object_storage_)
: path(std::move(path_)), key_prefix(key_prefix_), path_map(path_map_), object_storage(object_storage_) , metadata_key_prefix(metadata_key_prefix_)
, object_key_prefix(object_storage->generateObjectKeyPrefixForDirectoryPath(path, "" /* object_key_prefix */).serialize())
{ {
chassert(path.string().ends_with('/'));
} }
void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std::unique_lock<SharedMutex> &) void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std::unique_lock<SharedMutex> &)
{ {
if (path_map.contains(path)) /// parent_path() removes the trailing '/'
return; const auto base_path = path.parent_path();
{
SharedLockGuard lock(path_map.mutex);
if (path_map.map.contains(base_path))
return;
}
LOG_TRACE(getLogger("MetadataStorageFromPlainObjectStorageCreateDirectoryOperation"), "Creating metadata for directory '{}'", path); auto metadata_object_key = createMetadataObjectKey(object_key_prefix, metadata_key_prefix);
auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME); LOG_TRACE(
getLogger("MetadataStorageFromPlainObjectStorageCreateDirectoryOperation"),
"Creating metadata for directory '{}' with remote path='{}'",
path,
metadata_object_key.serialize());
auto object = StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME); auto metadata_object = StoredObject(/*remote_path*/ metadata_object_key.serialize(), /*local_path*/ path / PREFIX_PATH_FILE_NAME);
auto buf = object_storage->writeObject( auto buf = object_storage->writeObject(
object, metadata_object,
WriteMode::Rewrite, WriteMode::Rewrite,
/* object_attributes */ std::nullopt, /* object_attributes */ std::nullopt,
/* buf_size */ DBMS_DEFAULT_BUFFER_SIZE, /* buf_size */ DBMS_DEFAULT_BUFFER_SIZE,
@ -50,8 +68,12 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std:
write_created = true; write_created = true;
[[maybe_unused]] auto result = path_map.emplace(path, std::move(key_prefix)); {
chassert(result.second); std::lock_guard lock(path_map.mutex);
auto & map = path_map.map;
[[maybe_unused]] auto result = map.emplace(base_path, object_key_prefix);
chassert(result.second);
}
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
CurrentMetrics::add(metric, 1); CurrentMetrics::add(metric, 1);
@ -66,58 +88,81 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std:
void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo(std::unique_lock<SharedMutex> &) void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo(std::unique_lock<SharedMutex> &)
{ {
auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME); auto metadata_object_key = createMetadataObjectKey(object_key_prefix, metadata_key_prefix);
if (write_finalized) if (write_finalized)
{ {
path_map.erase(path); const auto base_path = path.parent_path();
{
std::lock_guard lock(path_map.mutex);
path_map.map.erase(base_path);
}
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
CurrentMetrics::sub(metric, 1); CurrentMetrics::sub(metric, 1);
object_storage->removeObject(StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME)); object_storage->removeObject(StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME));
} }
else if (write_created) else if (write_created)
object_storage->removeObjectIfExists(StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME)); object_storage->removeObjectIfExists(StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME));
} }
MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::MetadataStorageFromPlainObjectStorageMoveDirectoryOperation( MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::MetadataStorageFromPlainObjectStorageMoveDirectoryOperation(
std::filesystem::path && path_from_, std::filesystem::path && path_from_,
std::filesystem::path && path_to_, std::filesystem::path && path_to_,
MetadataStorageFromPlainObjectStorage::PathMap & path_map_, InMemoryPathMap & path_map_,
ObjectStoragePtr object_storage_) ObjectStoragePtr object_storage_,
: path_from(std::move(path_from_)), path_to(std::move(path_to_)), path_map(path_map_), object_storage(object_storage_) const std::string & metadata_key_prefix_)
: path_from(std::move(path_from_))
, path_to(std::move(path_to_))
, path_map(path_map_)
, object_storage(object_storage_)
, metadata_key_prefix(metadata_key_prefix_)
{ {
chassert(path_from.string().ends_with('/'));
chassert(path_to.string().ends_with('/'));
} }
std::unique_ptr<WriteBufferFromFileBase> MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::createWriteBuf( std::unique_ptr<WriteBufferFromFileBase> MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::createWriteBuf(
const std::filesystem::path & expected_path, const std::filesystem::path & new_path, bool validate_content) const std::filesystem::path & expected_path, const std::filesystem::path & new_path, bool validate_content)
{ {
auto expected_it = path_map.find(expected_path); std::filesystem::path remote_path;
if (expected_it == path_map.end()) {
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Metadata object for the expected (source) path '{}' does not exist", expected_path); SharedLockGuard lock(path_map.mutex);
auto & map = path_map.map;
/// parent_path() removes the trailing '/'.
auto expected_it = map.find(expected_path.parent_path());
if (expected_it == map.end())
throw Exception(
ErrorCodes::FILE_DOESNT_EXIST, "Metadata object for the expected (source) path '{}' does not exist", expected_path);
if (path_map.contains(new_path)) if (map.contains(new_path.parent_path()))
throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "Metadata object for the new (destination) path '{}' already exists", new_path); throw Exception(
ErrorCodes::FILE_ALREADY_EXISTS, "Metadata object for the new (destination) path '{}' already exists", new_path);
auto object_key = ObjectStorageKey::createAsRelative(expected_it->second, PREFIX_PATH_FILE_NAME); remote_path = expected_it->second;
}
auto object = StoredObject(object_key.serialize(), expected_path / PREFIX_PATH_FILE_NAME); auto metadata_object_key = createMetadataObjectKey(remote_path, metadata_key_prefix);
auto metadata_object
= StoredObject(/*remote_path*/ metadata_object_key.serialize(), /*local_path*/ expected_path / PREFIX_PATH_FILE_NAME);
if (validate_content) if (validate_content)
{ {
std::string data; std::string data;
auto read_buf = object_storage->readObject(object); auto read_buf = object_storage->readObject(metadata_object);
readStringUntilEOF(data, *read_buf); readStringUntilEOF(data, *read_buf);
if (data != path_from) if (data != path_from)
throw Exception( throw Exception(
ErrorCodes::INCORRECT_DATA, ErrorCodes::INCORRECT_DATA,
"Incorrect data for object key {}, expected {}, got {}", "Incorrect data for object key {}, expected {}, got {}",
object_key.serialize(), metadata_object_key.serialize(),
expected_path, expected_path,
data); data);
} }
auto write_buf = object_storage->writeObject( auto write_buf = object_storage->writeObject(
object, metadata_object,
WriteMode::Rewrite, WriteMode::Rewrite,
/* object_attributes */ std::nullopt, /* object_attributes */ std::nullopt,
/*buf_size*/ DBMS_DEFAULT_BUFFER_SIZE, /*buf_size*/ DBMS_DEFAULT_BUFFER_SIZE,
@ -136,8 +181,16 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::execute(std::u
writeString(path_to.string(), *write_buf); writeString(path_to.string(), *write_buf);
write_buf->finalize(); write_buf->finalize();
[[maybe_unused]] auto result = path_map.emplace(path_to, path_map.extract(path_from).mapped()); /// parent_path() removes the trailing '/'.
chassert(result.second); auto base_path_to = path_to.parent_path();
auto base_path_from = path_from.parent_path();
{
std::lock_guard lock(path_map.mutex);
auto & map = path_map.map;
[[maybe_unused]] auto result = map.emplace(base_path_to, map.extract(base_path_from).mapped());
chassert(result.second);
}
write_finalized = true; write_finalized = true;
} }
@ -145,7 +198,11 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::execute(std::u
void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::undo(std::unique_lock<SharedMutex> &) void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::undo(std::unique_lock<SharedMutex> &)
{ {
if (write_finalized) if (write_finalized)
path_map.emplace(path_from, path_map.extract(path_to).mapped()); {
std::lock_guard lock(path_map.mutex);
auto & map = path_map.map;
map.emplace(path_from.parent_path(), map.extract(path_to.parent_path()).mapped());
}
if (write_created) if (write_created)
{ {
@ -156,25 +213,37 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::undo(std::uniq
} }
MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation( MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation(
std::filesystem::path && path_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, ObjectStoragePtr object_storage_) std::filesystem::path && path_, InMemoryPathMap & path_map_, ObjectStoragePtr object_storage_, const std::string & metadata_key_prefix_)
: path(std::move(path_)), path_map(path_map_), object_storage(object_storage_) : path(std::move(path_)), path_map(path_map_), object_storage(object_storage_), metadata_key_prefix(metadata_key_prefix_)
{ {
chassert(path.string().ends_with('/'));
} }
void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std::unique_lock<SharedMutex> & /* metadata_lock */) void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std::unique_lock<SharedMutex> & /* metadata_lock */)
{ {
auto path_it = path_map.find(path); /// parent_path() removes the trailing '/'
if (path_it == path_map.end()) const auto base_path = path.parent_path();
return; {
SharedLockGuard lock(path_map.mutex);
auto & map = path_map.map;
auto path_it = map.find(base_path);
if (path_it == map.end())
return;
key_prefix = path_it->second;
}
LOG_TRACE(getLogger("MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation"), "Removing directory '{}'", path); LOG_TRACE(getLogger("MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation"), "Removing directory '{}'", path);
key_prefix = path_it->second; auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix);
auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME); auto metadata_object = StoredObject(/*remote_path*/ metadata_object_key.serialize(), /*local_path*/ path / PREFIX_PATH_FILE_NAME);
auto object = StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME); object_storage->removeObject(metadata_object);
object_storage->removeObject(object);
{
std::lock_guard lock(path_map.mutex);
auto & map = path_map.map;
map.erase(base_path);
}
path_map.erase(path_it);
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
CurrentMetrics::sub(metric, 1); CurrentMetrics::sub(metric, 1);
@ -189,10 +258,10 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::undo(std::un
if (!removed) if (!removed)
return; return;
auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME); auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix);
auto object = StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME); auto metadata_object = StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME);
auto buf = object_storage->writeObject( auto buf = object_storage->writeObject(
object, metadata_object,
WriteMode::Rewrite, WriteMode::Rewrite,
/* object_attributes */ std::nullopt, /* object_attributes */ std::nullopt,
/* buf_size */ DBMS_DEFAULT_BUFFER_SIZE, /* buf_size */ DBMS_DEFAULT_BUFFER_SIZE,
@ -200,7 +269,11 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::undo(std::un
writeString(path.string(), *buf); writeString(path.string(), *buf);
buf->finalize(); buf->finalize();
path_map.emplace(path, std::move(key_prefix)); {
std::lock_guard lock(path_map.mutex);
auto & map = path_map.map;
map.emplace(path.parent_path(), std::move(key_prefix));
}
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
CurrentMetrics::add(metric, 1); CurrentMetrics::add(metric, 1);
} }

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <Disks/ObjectStorages/IMetadataOperation.h> #include <Disks/ObjectStorages/IMetadataOperation.h>
#include <Disks/ObjectStorages/InMemoryPathMap.h>
#include <Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h> #include <Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h>
#include <filesystem> #include <filesystem>
@ -13,20 +14,21 @@ class MetadataStorageFromPlainObjectStorageCreateDirectoryOperation final : publ
{ {
private: private:
std::filesystem::path path; std::filesystem::path path;
std::string key_prefix; InMemoryPathMap & path_map;
MetadataStorageFromPlainObjectStorage::PathMap & path_map;
ObjectStoragePtr object_storage; ObjectStoragePtr object_storage;
const std::string metadata_key_prefix;
const std::string object_key_prefix;
bool write_created = false; bool write_created = false;
bool write_finalized = false; bool write_finalized = false;
public: public:
// Assuming that paths are normalized.
MetadataStorageFromPlainObjectStorageCreateDirectoryOperation( MetadataStorageFromPlainObjectStorageCreateDirectoryOperation(
/// path_ must end with a trailing '/'.
std::filesystem::path && path_, std::filesystem::path && path_,
std::string && key_prefix_, InMemoryPathMap & path_map_,
MetadataStorageFromPlainObjectStorage::PathMap & path_map_, ObjectStoragePtr object_storage_,
ObjectStoragePtr object_storage_); const std::string & metadata_key_prefix_);
void execute(std::unique_lock<SharedMutex> & metadata_lock) override; void execute(std::unique_lock<SharedMutex> & metadata_lock) override;
void undo(std::unique_lock<SharedMutex> & metadata_lock) override; void undo(std::unique_lock<SharedMutex> & metadata_lock) override;
@ -37,8 +39,9 @@ class MetadataStorageFromPlainObjectStorageMoveDirectoryOperation final : public
private: private:
std::filesystem::path path_from; std::filesystem::path path_from;
std::filesystem::path path_to; std::filesystem::path path_to;
MetadataStorageFromPlainObjectStorage::PathMap & path_map; InMemoryPathMap & path_map;
ObjectStoragePtr object_storage; ObjectStoragePtr object_storage;
const std::string metadata_key_prefix;
bool write_created = false; bool write_created = false;
bool write_finalized = false; bool write_finalized = false;
@ -48,10 +51,12 @@ private:
public: public:
MetadataStorageFromPlainObjectStorageMoveDirectoryOperation( MetadataStorageFromPlainObjectStorageMoveDirectoryOperation(
/// Both path_from_ and path_to_ must end with a trailing '/'.
std::filesystem::path && path_from_, std::filesystem::path && path_from_,
std::filesystem::path && path_to_, std::filesystem::path && path_to_,
MetadataStorageFromPlainObjectStorage::PathMap & path_map_, InMemoryPathMap & path_map_,
ObjectStoragePtr object_storage_); ObjectStoragePtr object_storage_,
const std::string & metadata_key_prefix_);
void execute(std::unique_lock<SharedMutex> & metadata_lock) override; void execute(std::unique_lock<SharedMutex> & metadata_lock) override;
@ -63,15 +68,20 @@ class MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation final : publ
private: private:
std::filesystem::path path; std::filesystem::path path;
MetadataStorageFromPlainObjectStorage::PathMap & path_map; InMemoryPathMap & path_map;
ObjectStoragePtr object_storage; ObjectStoragePtr object_storage;
const std::string metadata_key_prefix;
std::string key_prefix; std::string key_prefix;
bool removed = false; bool removed = false;
public: public:
MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation( MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation(
std::filesystem::path && path_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, ObjectStoragePtr object_storage_); /// path_ must end with a trailing '/'.
std::filesystem::path && path_,
InMemoryPathMap & path_map_,
ObjectStoragePtr object_storage_,
const std::string & metadata_key_prefix_);
void execute(std::unique_lock<SharedMutex> & metadata_lock) override; void execute(std::unique_lock<SharedMutex> & metadata_lock) override;
void undo(std::unique_lock<SharedMutex> & metadata_lock) override; void undo(std::unique_lock<SharedMutex> & metadata_lock) override;

View File

@ -1,9 +1,14 @@
#include <Disks/ObjectStorages/FlatDirectoryStructureKeyGenerator.h>
#include <Disks/ObjectStorages/InMemoryPathMap.h>
#include <Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h> #include <Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h>
#include <Disks/ObjectStorages/ObjectStorageIterator.h> #include <Disks/ObjectStorages/ObjectStorageIterator.h>
#include <unordered_set>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/SharedThreadPools.h>
#include <IO/S3Common.h> #include <IO/S3Common.h>
#include <IO/SharedThreadPools.h>
#include "Common/SharedLockGuard.h"
#include "Common/SharedMutex.h"
#include <Common/ErrorCodes.h> #include <Common/ErrorCodes.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include "CommonPathPrefixKeyGenerator.h" #include "CommonPathPrefixKeyGenerator.h"
@ -21,14 +26,28 @@ namespace
{ {
constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path"; constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path";
constexpr auto METADATA_PATH_TOKEN = "__meta/";
MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::string & root, ObjectStoragePtr object_storage) /// Use a separate layout for metadata if:
/// 1. The disk endpoint does not contain any objects yet (empty), OR
/// 2. The metadata is already stored behind a separate endpoint.
/// Otherwise, store metadata along with regular data for backward compatibility.
std::string getMetadataKeyPrefix(ObjectStoragePtr object_storage)
{ {
MetadataStorageFromPlainObjectStorage::PathMap result; const auto common_key_prefix = std::filesystem::path(object_storage->getCommonKeyPrefix());
const auto metadata_key_prefix = std::filesystem::path(common_key_prefix) / METADATA_PATH_TOKEN;
return !object_storage->existsOrHasAnyChild(metadata_key_prefix / "") && object_storage->existsOrHasAnyChild(common_key_prefix / "")
? common_key_prefix
: metadata_key_prefix;
}
std::shared_ptr<InMemoryPathMap> loadPathPrefixMap(const std::string & metadata_key_prefix, ObjectStoragePtr object_storage)
{
auto result = std::make_shared<InMemoryPathMap>();
using Map = InMemoryPathMap::Map;
ThreadPool & pool = getIOThreadPool().get(); ThreadPool & pool = getIOThreadPool().get();
ThreadPoolCallbackRunnerLocal<void> runner(pool, "PlainRWMetaLoad"); ThreadPoolCallbackRunnerLocal<void> runner(pool, "PlainRWMetaLoad");
std::mutex mutex;
LoggerPtr log = getLogger("MetadataStorageFromPlainObjectStorage"); LoggerPtr log = getLogger("MetadataStorageFromPlainObjectStorage");
@ -39,102 +58,107 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri
LOG_DEBUG(log, "Loading metadata"); LOG_DEBUG(log, "Loading metadata");
size_t num_files = 0; size_t num_files = 0;
for (auto iterator = object_storage->iterate(root, 0); iterator->isValid(); iterator->next()) for (auto iterator = object_storage->iterate(metadata_key_prefix, 0); iterator->isValid(); iterator->next())
{ {
++num_files; ++num_files;
auto file = iterator->current(); auto file = iterator->current();
String path = file->getPath(); String path = file->getPath();
auto remote_path = std::filesystem::path(path); auto remote_metadata_path = std::filesystem::path(path);
if (remote_path.filename() != PREFIX_PATH_FILE_NAME) if (remote_metadata_path.filename() != PREFIX_PATH_FILE_NAME)
continue; continue;
runner([remote_path, path, &object_storage, &result, &mutex, &log, &settings] runner(
{ [remote_metadata_path, path, &object_storage, &result, &log, &settings, &metadata_key_prefix]
setThreadName("PlainRWMetaLoad");
StoredObject object{path};
String local_path;
try
{ {
auto read_buf = object_storage->readObject(object, settings); setThreadName("PlainRWMetaLoad");
readStringUntilEOF(local_path, *read_buf);
} StoredObject object{path};
String local_path;
try
{
auto read_buf = object_storage->readObject(object, settings);
readStringUntilEOF(local_path, *read_buf);
}
#if USE_AWS_S3 #if USE_AWS_S3
catch (const S3Exception & e) catch (const S3Exception & e)
{ {
/// It is ok if a directory was removed just now. /// It is ok if a directory was removed just now.
/// We support attaching a filesystem that is concurrently modified by someone else. /// We support attaching a filesystem that is concurrently modified by someone else.
if (e.getS3ErrorCode() == Aws::S3::S3Errors::NO_SUCH_KEY) if (e.getS3ErrorCode() == Aws::S3::S3Errors::NO_SUCH_KEY)
return; return;
throw; throw;
} }
#endif #endif
catch (...) catch (...)
{ {
throw; throw;
} }
chassert(remote_path.has_parent_path()); chassert(remote_metadata_path.has_parent_path());
std::pair<MetadataStorageFromPlainObjectStorage::PathMap::iterator, bool> res; chassert(remote_metadata_path.string().starts_with(metadata_key_prefix));
{ auto suffix = remote_metadata_path.string().substr(metadata_key_prefix.size());
std::lock_guard lock(mutex); auto remote_path = std::filesystem::path(std::move(suffix));
res = result.emplace(local_path, remote_path.parent_path()); std::pair<Map::iterator, bool> res;
} {
std::lock_guard lock(result->mutex);
res = result->map.emplace(std::filesystem::path(local_path).parent_path(), remote_path.parent_path());
}
/// This can happen if table replication is enabled, then the same local path is written /// This can happen if table replication is enabled, then the same local path is written
/// in `prefix.path` of each replica. /// in `prefix.path` of each replica.
/// TODO: should replicated tables (e.g., RMT) be explicitly disallowed? /// TODO: should replicated tables (e.g., RMT) be explicitly disallowed?
if (!res.second) if (!res.second)
LOG_WARNING( LOG_WARNING(
log, log,
"The local path '{}' is already mapped to a remote path '{}', ignoring: '{}'", "The local path '{}' is already mapped to a remote path '{}', ignoring: '{}'",
local_path, local_path,
res.first->second, res.first->second,
remote_path.parent_path().string()); remote_path.parent_path().string());
}); });
} }
runner.waitForAllToFinishAndRethrowFirstError(); runner.waitForAllToFinishAndRethrowFirstError();
LOG_DEBUG(log, "Loaded metadata for {} files, found {} directories", num_files, result.size()); {
SharedLockGuard lock(result->mutex);
LOG_DEBUG(log, "Loaded metadata for {} files, found {} directories", num_files, result->map.size());
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
CurrentMetrics::add(metric, result.size()); CurrentMetrics::add(metric, result->map.size());
}
return result; return result;
} }
std::vector<std::string> getDirectChildrenOnRewritableDisk( void getDirectChildrenOnDiskImpl(
const std::string & storage_key, const std::string & storage_key,
const RelativePathsWithMetadata & remote_paths, const RelativePathsWithMetadata & remote_paths,
const std::string & local_path, const std::string & local_path,
const MetadataStorageFromPlainObjectStorage::PathMap & local_path_prefixes, const InMemoryPathMap & path_map,
SharedMutex & shared_mutex) std::unordered_set<std::string> & result)
{ {
using PathMap = MetadataStorageFromPlainObjectStorage::PathMap; /// Directories are retrieved from the in-memory path map.
std::unordered_set<std::string> duplicates_filter;
/// Map remote paths into local subdirectories.
std::unordered_map<PathMap::mapped_type, PathMap::key_type> remote_to_local_subdir;
{ {
std::shared_lock lock(shared_mutex); SharedLockGuard lock(path_map.mutex);
auto end_it = local_path_prefixes.end(); const auto & local_path_prefixes = path_map.map;
const auto end_it = local_path_prefixes.end();
for (auto it = local_path_prefixes.lower_bound(local_path); it != end_it; ++it) for (auto it = local_path_prefixes.lower_bound(local_path); it != end_it; ++it)
{ {
const auto & [k, v] = std::make_tuple(it->first.string(), it->second); const auto & [k, _] = std::make_tuple(it->first.string(), it->second);
if (!k.starts_with(local_path)) if (!k.starts_with(local_path))
break; break;
auto slash_num = count(k.begin() + local_path.size(), k.end(), '/'); auto slash_num = count(k.begin() + local_path.size(), k.end(), '/');
if (slash_num != 1) /// The local_path_prefixes comparator ensures that the paths with the smallest number of
continue; /// hops from the local_path are iterated first. The paths do not end with '/', hence
/// break the loop if the number of slashes is greater than 0.
if (slash_num != 0)
break;
chassert(k.back() == '/'); result.emplace(std::string(k.begin() + local_path.size(), k.end()) + "/");
remote_to_local_subdir.emplace(v, std::string(k.begin() + local_path.size(), k.end() - 1));
} }
} }
/// Files.
auto skip_list = std::set<std::string>{PREFIX_PATH_FILE_NAME}; auto skip_list = std::set<std::string>{PREFIX_PATH_FILE_NAME};
for (const auto & elem : remote_paths) for (const auto & elem : remote_paths)
{ {
@ -149,22 +173,9 @@ std::vector<std::string> getDirectChildrenOnRewritableDisk(
/// File names. /// File names.
auto filename = path.substr(child_pos); auto filename = path.substr(child_pos);
if (!skip_list.contains(filename)) if (!skip_list.contains(filename))
duplicates_filter.emplace(std::move(filename)); result.emplace(std::move(filename));
}
else
{
/// Subdirectories.
auto it = remote_to_local_subdir.find(path.substr(0, slash_pos));
/// Mapped subdirectories.
if (it != remote_to_local_subdir.end())
duplicates_filter.emplace(it->second);
/// The remote subdirectory name is the same as the local subdirectory.
else
duplicates_filter.emplace(path.substr(child_pos, slash_pos - child_pos));
} }
} }
return std::vector<std::string>(std::make_move_iterator(duplicates_filter.begin()), std::make_move_iterator(duplicates_filter.end()));
} }
} }
@ -172,7 +183,8 @@ std::vector<std::string> getDirectChildrenOnRewritableDisk(
MetadataStorageFromPlainRewritableObjectStorage::MetadataStorageFromPlainRewritableObjectStorage( MetadataStorageFromPlainRewritableObjectStorage::MetadataStorageFromPlainRewritableObjectStorage(
ObjectStoragePtr object_storage_, String storage_path_prefix_) ObjectStoragePtr object_storage_, String storage_path_prefix_)
: MetadataStorageFromPlainObjectStorage(object_storage_, storage_path_prefix_) : MetadataStorageFromPlainObjectStorage(object_storage_, storage_path_prefix_)
, path_map(std::make_shared<PathMap>(loadPathPrefixMap(object_storage->getCommonKeyPrefix(), object_storage))) , metadata_key_prefix(DB::getMetadataKeyPrefix(object_storage))
, path_map(loadPathPrefixMap(metadata_key_prefix, object_storage))
{ {
if (object_storage->isWriteOnce()) if (object_storage->isWriteOnce())
throw Exception( throw Exception(
@ -180,20 +192,85 @@ MetadataStorageFromPlainRewritableObjectStorage::MetadataStorageFromPlainRewrita
"MetadataStorageFromPlainRewritableObjectStorage is not compatible with write-once storage '{}'", "MetadataStorageFromPlainRewritableObjectStorage is not compatible with write-once storage '{}'",
object_storage->getName()); object_storage->getName());
auto keys_gen = std::make_shared<CommonPathPrefixKeyGenerator>(object_storage->getCommonKeyPrefix(), metadata_mutex, path_map); if (useSeparateLayoutForMetadata())
object_storage->setKeysGenerator(keys_gen); {
/// Use flat directory structure if the metadata is stored separately from the table data.
auto keys_gen = std::make_shared<FlatDirectoryStructureKeyGenerator>(object_storage->getCommonKeyPrefix(), path_map);
object_storage->setKeysGenerator(keys_gen);
}
else
{
auto keys_gen = std::make_shared<CommonPathPrefixKeyGenerator>(object_storage->getCommonKeyPrefix(), path_map);
object_storage->setKeysGenerator(keys_gen);
}
} }
MetadataStorageFromPlainRewritableObjectStorage::~MetadataStorageFromPlainRewritableObjectStorage() MetadataStorageFromPlainRewritableObjectStorage::~MetadataStorageFromPlainRewritableObjectStorage()
{ {
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
CurrentMetrics::sub(metric, path_map->size()); CurrentMetrics::sub(metric, path_map->map.size());
} }
std::vector<std::string> MetadataStorageFromPlainRewritableObjectStorage::getDirectChildrenOnDisk( bool MetadataStorageFromPlainRewritableObjectStorage::exists(const std::string & path) const
const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & local_path) const
{ {
return getDirectChildrenOnRewritableDisk(storage_key, remote_paths, local_path, *getPathMap(), metadata_mutex); if (MetadataStorageFromPlainObjectStorage::exists(path))
return true;
if (useSeparateLayoutForMetadata())
{
auto key_prefix = object_storage->generateObjectKeyForPath(path, getMetadataKeyPrefix()).serialize();
return object_storage->existsOrHasAnyChild(key_prefix);
}
return false;
} }
bool MetadataStorageFromPlainRewritableObjectStorage::isDirectory(const std::string & path) const
{
if (useSeparateLayoutForMetadata())
{
auto directory = std::filesystem::path(object_storage->generateObjectKeyForPath(path, getMetadataKeyPrefix()).serialize()) / "";
return object_storage->existsOrHasAnyChild(directory);
}
else
return MetadataStorageFromPlainObjectStorage::isDirectory(path);
}
std::vector<std::string> MetadataStorageFromPlainRewritableObjectStorage::listDirectory(const std::string & path) const
{
auto key_prefix = object_storage->generateObjectKeyForPath(path, "" /* key_prefix */).serialize();
RelativePathsWithMetadata files;
auto abs_key = std::filesystem::path(object_storage->getCommonKeyPrefix()) / key_prefix / "";
object_storage->listObjects(abs_key, files, 0);
std::unordered_set<std::string> directories;
getDirectChildrenOnDisk(abs_key, files, std::filesystem::path(path) / "", directories);
/// List empty directories that are identified by the `prefix.path` metadata files. This is required to, e.g., remove
/// metadata along with regular files.
if (useSeparateLayoutForMetadata())
{
auto metadata_key = std::filesystem::path(getMetadataKeyPrefix()) / key_prefix / "";
RelativePathsWithMetadata metadata_files;
object_storage->listObjects(metadata_key, metadata_files, 0);
getDirectChildrenOnDisk(metadata_key, metadata_files, std::filesystem::path(path) / "", directories);
}
return std::vector<std::string>(std::make_move_iterator(directories.begin()), std::make_move_iterator(directories.end()));
}
void MetadataStorageFromPlainRewritableObjectStorage::getDirectChildrenOnDisk(
const std::string & storage_key,
const RelativePathsWithMetadata & remote_paths,
const std::string & local_path,
std::unordered_set<std::string> & result) const
{
getDirectChildrenOnDiskImpl(storage_key, remote_paths, local_path, *getPathMap(), result);
}
bool MetadataStorageFromPlainRewritableObjectStorage::useSeparateLayoutForMetadata() const
{
return getMetadataKeyPrefix() != object_storage->getCommonKeyPrefix();
}
} }

View File

@ -3,6 +3,7 @@
#include <Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h> #include <Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h>
#include <memory> #include <memory>
#include <unordered_set>
namespace DB namespace DB
@ -11,18 +12,29 @@ namespace DB
class MetadataStorageFromPlainRewritableObjectStorage final : public MetadataStorageFromPlainObjectStorage class MetadataStorageFromPlainRewritableObjectStorage final : public MetadataStorageFromPlainObjectStorage
{ {
private: private:
std::shared_ptr<PathMap> path_map; const std::string metadata_key_prefix;
std::shared_ptr<InMemoryPathMap> path_map;
public: public:
MetadataStorageFromPlainRewritableObjectStorage(ObjectStoragePtr object_storage_, String storage_path_prefix_); MetadataStorageFromPlainRewritableObjectStorage(ObjectStoragePtr object_storage_, String storage_path_prefix_);
~MetadataStorageFromPlainRewritableObjectStorage() override; ~MetadataStorageFromPlainRewritableObjectStorage() override;
MetadataStorageType getType() const override { return MetadataStorageType::PlainRewritable; } MetadataStorageType getType() const override { return MetadataStorageType::PlainRewritable; }
bool exists(const std::string & path) const override;
bool isDirectory(const std::string & path) const override;
std::vector<std::string> listDirectory(const std::string & path) const override;
protected: protected:
std::shared_ptr<PathMap> getPathMap() const override { return path_map; } std::string getMetadataKeyPrefix() const override { return metadata_key_prefix; }
std::vector<std::string> getDirectChildrenOnDisk( std::shared_ptr<InMemoryPathMap> getPathMap() const override { return path_map; }
const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & local_path) const override; void getDirectChildrenOnDisk(
const std::string & storage_key,
const RelativePathsWithMetadata & remote_paths,
const std::string & local_path,
std::unordered_set<std::string> & result) const;
private:
bool useSeparateLayoutForMetadata() const;
}; };
} }

View File

@ -26,7 +26,7 @@ public:
bool isPlain() const override { return true; } bool isPlain() const override { return true; }
ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override ObjectStorageKey generateObjectKeyForPath(const std::string & path, const std::optional<std::string> & /* key_prefix */) const override
{ {
return ObjectStorageKey::createAsRelative(BaseObjectStorage::getCommonKeyPrefix(), path); return ObjectStorageKey::createAsRelative(BaseObjectStorage::getCommonKeyPrefix(), path);
} }

View File

@ -1,5 +1,7 @@
#pragma once #pragma once
#include <optional>
#include <string>
#include <Disks/ObjectStorages/IObjectStorage.h> #include <Disks/ObjectStorages/IObjectStorage.h>
#include <Common/ObjectStorageKeyGenerator.h> #include <Common/ObjectStorageKeyGenerator.h>
#include "CommonPathPrefixKeyGenerator.h" #include "CommonPathPrefixKeyGenerator.h"
@ -33,9 +35,10 @@ public:
bool isPlain() const override { return true; } bool isPlain() const override { return true; }
ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override; ObjectStorageKey generateObjectKeyForPath(const std::string & path, const std::optional<std::string> & key_prefix) const override;
ObjectStorageKey generateObjectKeyPrefixForDirectoryPath(const std::string & path) const override; ObjectStorageKey
generateObjectKeyPrefixForDirectoryPath(const std::string & path, const std::optional<std::string> & key_prefix) const override;
void setKeysGenerator(ObjectStorageKeysGeneratorPtr gen) override { key_generator = gen; } void setKeysGenerator(ObjectStorageKeysGeneratorPtr gen) override { key_generator = gen; }
@ -46,20 +49,22 @@ private:
template <typename BaseObjectStorage> template <typename BaseObjectStorage>
ObjectStorageKey PlainRewritableObjectStorage<BaseObjectStorage>::generateObjectKeyForPath(const std::string & path) const ObjectStorageKey PlainRewritableObjectStorage<BaseObjectStorage>::generateObjectKeyForPath(
const std::string & path, const std::optional<std::string> & key_prefix) const
{ {
if (!key_generator) if (!key_generator)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Key generator is not set"); throw Exception(ErrorCodes::LOGICAL_ERROR, "Key generator is not set");
return key_generator->generate(path, /* is_directory */ false); return key_generator->generate(path, /* is_directory */ false, key_prefix);
} }
template <typename BaseObjectStorage> template <typename BaseObjectStorage>
ObjectStorageKey PlainRewritableObjectStorage<BaseObjectStorage>::generateObjectKeyPrefixForDirectoryPath(const std::string & path) const ObjectStorageKey PlainRewritableObjectStorage<BaseObjectStorage>::generateObjectKeyPrefixForDirectoryPath(
const std::string & path, const std::optional<std::string> & key_prefix) const
{ {
if (!key_generator) if (!key_generator)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Key generator is not set"); throw Exception(ErrorCodes::LOGICAL_ERROR, "Key generator is not set");
return key_generator->generate(path, /* is_directory */ true); return key_generator->generate(path, /* is_directory */ true, key_prefix);
} }
} }

View File

@ -79,7 +79,7 @@ bool checkBatchRemove(S3ObjectStorage & storage)
/// We are using generateObjectKeyForPath() which returns random object key. /// We are using generateObjectKeyForPath() which returns random object key.
/// That generated key is placed in a right directory where we should have write access. /// That generated key is placed in a right directory where we should have write access.
const String path = fmt::format("clickhouse_remove_objects_capability_{}", getServerUUID()); const String path = fmt::format("clickhouse_remove_objects_capability_{}", getServerUUID());
const auto key = storage.generateObjectKeyForPath(path); const auto key = storage.generateObjectKeyForPath(path, {} /* key_prefix */);
StoredObject object(key.serialize(), path); StoredObject object(key.serialize(), path);
try try
{ {

View File

@ -624,12 +624,12 @@ std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
std::move(new_client), std::move(new_s3_settings), new_uri, s3_capabilities, key_generator, disk_name); std::move(new_client), std::move(new_s3_settings), new_uri, s3_capabilities, key_generator, disk_name);
} }
ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string & path) const ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string & path, const std::optional<std::string> & key_prefix) const
{ {
if (!key_generator) if (!key_generator)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Key generator is not set"); throw Exception(ErrorCodes::LOGICAL_ERROR, "Key generator is not set");
return key_generator->generate(path, /* is_directory */ false); return key_generator->generate(path, /* is_directory */ false, key_prefix);
} }
std::shared_ptr<const S3::Client> S3ObjectStorage::getS3StorageClient() std::shared_ptr<const S3::Client> S3ObjectStorage::getS3StorageClient()

View File

@ -164,7 +164,7 @@ public:
bool supportParallelWrite() const override { return true; } bool supportParallelWrite() const override { return true; }
ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override; ObjectStorageKey generateObjectKeyForPath(const std::string & path, const std::optional<std::string> & key_prefix) const override;
bool isReadOnly() const override { return s3_settings.get()->read_only; } bool isReadOnly() const override { return s3_settings.get()->read_only; }

View File

@ -82,7 +82,7 @@ public:
const std::string & config_prefix, const std::string & config_prefix,
ContextPtr context) override; ContextPtr context) override;
ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override ObjectStorageKey generateObjectKeyForPath(const std::string & path, const std::optional<std::string> & /* key_prefix */) const override
{ {
return ObjectStorageKey::createAsRelative(path); return ObjectStorageKey::createAsRelative(path);
} }

View File

@ -1,2 +1,2 @@
clickhouse_add_executable(format_fuzzer format_fuzzer.cpp ${SRCS}) clickhouse_add_executable(format_fuzzer format_fuzzer.cpp ${SRCS})
target_link_libraries(format_fuzzer PRIVATE dbms clickhouse_aggregate_functions) target_link_libraries(format_fuzzer PRIVATE dbms clickhouse_aggregate_functions clickhouse_functions)

View File

@ -20,37 +20,32 @@
#include <AggregateFunctions/registerAggregateFunctions.h> #include <AggregateFunctions/registerAggregateFunctions.h>
using namespace DB;
ContextMutablePtr context;
extern "C" int LLVMFuzzerInitialize(int *, char ***)
{
if (context)
return true;
SharedContextHolder shared_context = Context::createShared();
context = Context::createGlobal(shared_context.get());
context->makeGlobalContext();
MainThreadStatus::getInstance();
registerAggregateFunctions();
registerFormats();
return 0;
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
{ {
try try
{ {
using namespace DB;
static SharedContextHolder shared_context;
static ContextMutablePtr context;
auto initialize = [&]() mutable
{
if (context)
return true;
shared_context = Context::createShared();
context = Context::createGlobal(shared_context.get());
context->makeGlobalContext();
context->setApplicationType(Context::ApplicationType::LOCAL);
MainThreadStatus::getInstance();
registerAggregateFunctions();
registerFormats();
return true;
};
static bool initialized = initialize();
(void) initialized;
total_memory_tracker.resetCounters(); total_memory_tracker.resetCounters();
total_memory_tracker.setHardLimit(1_GiB); total_memory_tracker.setHardLimit(1_GiB);
CurrentThread::get().memory_tracker.resetCounters(); CurrentThread::get().memory_tracker.resetCounters();

Some files were not shown because too many files have changed in this diff Show More