mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Merge branch 'master' into client-cancel
This commit is contained in:
commit
aa8cefa847
32
.github/workflows/backport_branches.yml
vendored
32
.github/workflows/backport_branches.yml
vendored
@ -9,6 +9,18 @@ on: # yamllint disable-line rule:truthy
|
||||
branches:
|
||||
- 'backport/**'
|
||||
jobs:
|
||||
PythonUnitTests:
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Python unit tests
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 -m unittest discover -s . -p '*_test.py'
|
||||
DockerHubPushAarch64:
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
@ -143,8 +155,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -184,8 +196,8 @@ jobs:
|
||||
- name: Upload build URLs to artifacts
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -229,8 +241,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -274,8 +286,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -319,8 +331,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
|
68
.github/workflows/master.yml
vendored
68
.github/workflows/master.yml
vendored
@ -219,8 +219,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -260,8 +260,8 @@ jobs:
|
||||
- name: Upload build URLs to artifacts
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -305,8 +305,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -350,8 +350,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -395,8 +395,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -440,8 +440,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -485,8 +485,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -530,8 +530,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -575,8 +575,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -620,8 +620,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -668,8 +668,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -713,8 +713,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -758,8 +758,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -803,8 +803,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -848,8 +848,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -893,8 +893,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -938,8 +938,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
|
2
.github/workflows/nightly.yml
vendored
2
.github/workflows/nightly.yml
vendored
@ -112,7 +112,7 @@ jobs:
|
||||
run: |
|
||||
curl --form token="${COVERITY_TOKEN}" \
|
||||
--form email='security+coverity@clickhouse.com' \
|
||||
--form file="@$TEMP_PATH/$BUILD_NAME/clickhouse-scan.tgz" \
|
||||
--form file="@$TEMP_PATH/$BUILD_NAME/coverity-scan.tgz" \
|
||||
--form version="${GITHUB_REF#refs/heads/}-${GITHUB_SHA::6}" \
|
||||
--form description="Nighly Scan: $(date +'%Y-%m-%dT%H:%M:%S')" \
|
||||
https://scan.coverity.com/builds?project=ClickHouse%2FClickHouse
|
||||
|
68
.github/workflows/pull_request.yml
vendored
68
.github/workflows/pull_request.yml
vendored
@ -272,8 +272,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -317,8 +317,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -362,8 +362,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -404,8 +404,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -446,8 +446,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -491,8 +491,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -536,8 +536,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -581,8 +581,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -626,8 +626,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -671,8 +671,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -719,8 +719,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -764,8 +764,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -809,8 +809,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -854,8 +854,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -899,8 +899,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -944,8 +944,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -989,8 +989,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
|
4
.github/workflows/release.yml
vendored
4
.github/workflows/release.yml
vendored
@ -52,8 +52,8 @@ jobs:
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type auto
|
||||
python3 docker_server.py --release-type auto --no-ubuntu \
|
||||
python3 docker_server.py --release-type auto --version "${{ github.ref }}"
|
||||
python3 docker_server.py --release-type auto --version "${{ github.ref }}" --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
|
28
.github/workflows/release_branches.yml
vendored
28
.github/workflows/release_branches.yml
vendored
@ -146,8 +146,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -187,8 +187,8 @@ jobs:
|
||||
- name: Upload build URLs to artifacts
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -232,8 +232,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -277,8 +277,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -322,8 +322,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -367,8 +367,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
@ -412,8 +412,8 @@ jobs:
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
|
@ -222,6 +222,12 @@ else ()
|
||||
set(NO_WHOLE_ARCHIVE --no-whole-archive)
|
||||
endif ()
|
||||
|
||||
option(ENABLE_CURL_BUILD "Enable curl, azure, sentry build on by default except MacOS." ON)
|
||||
if (OS_DARWIN)
|
||||
# Disable the curl, azure, senry build on MacOS
|
||||
set (ENABLE_CURL_BUILD OFF)
|
||||
endif ()
|
||||
|
||||
# Ignored if `lld` is used
|
||||
option(ADD_GDB_INDEX_FOR_GOLD "Add .gdb-index to resulting binaries for gold linker.")
|
||||
|
||||
|
@ -828,7 +828,6 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
|
||||
|
||||
/// Setup signal handlers.
|
||||
/// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime.
|
||||
|
||||
addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP, SIGTRAP}, signalHandler, &handled_signals);
|
||||
addSignalHandler({SIGHUP}, closeLogsSignalHandler, &handled_signals);
|
||||
addSignalHandler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler, &handled_signals);
|
||||
|
10
contrib/CMakeLists.txt
vendored
10
contrib/CMakeLists.txt
vendored
@ -119,9 +119,13 @@ add_contrib (fastops-cmake fastops)
|
||||
add_contrib (libuv-cmake libuv)
|
||||
add_contrib (amqpcpp-cmake AMQP-CPP) # requires: libuv
|
||||
add_contrib (cassandra-cmake cassandra) # requires: libuv
|
||||
add_contrib (curl-cmake curl)
|
||||
add_contrib (azure-cmake azure)
|
||||
add_contrib (sentry-native-cmake sentry-native) # requires: curl
|
||||
|
||||
if (ENABLE_CURL_BUILD)
|
||||
add_contrib (curl-cmake curl)
|
||||
add_contrib (azure-cmake azure)
|
||||
add_contrib (sentry-native-cmake sentry-native) # requires: curl
|
||||
endif()
|
||||
|
||||
add_contrib (fmtlib-cmake fmtlib)
|
||||
add_contrib (krb5-cmake krb5)
|
||||
add_contrib (cyrus-sasl-cmake cyrus-sasl) # for krb5
|
||||
|
2
contrib/curl
vendored
2
contrib/curl
vendored
@ -1 +1 @@
|
||||
Subproject commit 3b8bbbbd1609c638a3d3d0acb148a33dedb67be3
|
||||
Subproject commit 801bd5138ce31aa0d906fa4e2eabfc599d74e793
|
@ -32,7 +32,6 @@ set (SRCS
|
||||
"${LIBRARY_DIR}/lib/transfer.c"
|
||||
"${LIBRARY_DIR}/lib/strcase.c"
|
||||
"${LIBRARY_DIR}/lib/easy.c"
|
||||
"${LIBRARY_DIR}/lib/security.c"
|
||||
"${LIBRARY_DIR}/lib/curl_fnmatch.c"
|
||||
"${LIBRARY_DIR}/lib/fileinfo.c"
|
||||
"${LIBRARY_DIR}/lib/wildcard.c"
|
||||
@ -115,6 +114,12 @@ set (SRCS
|
||||
"${LIBRARY_DIR}/lib/curl_get_line.c"
|
||||
"${LIBRARY_DIR}/lib/altsvc.c"
|
||||
"${LIBRARY_DIR}/lib/socketpair.c"
|
||||
"${LIBRARY_DIR}/lib/bufref.c"
|
||||
"${LIBRARY_DIR}/lib/dynbuf.c"
|
||||
"${LIBRARY_DIR}/lib/hsts.c"
|
||||
"${LIBRARY_DIR}/lib/http_aws_sigv4.c"
|
||||
"${LIBRARY_DIR}/lib/mqtt.c"
|
||||
"${LIBRARY_DIR}/lib/rename.c"
|
||||
"${LIBRARY_DIR}/lib/vauth/vauth.c"
|
||||
"${LIBRARY_DIR}/lib/vauth/cleartext.c"
|
||||
"${LIBRARY_DIR}/lib/vauth/cram.c"
|
||||
@ -131,8 +136,6 @@ set (SRCS
|
||||
"${LIBRARY_DIR}/lib/vtls/gtls.c"
|
||||
"${LIBRARY_DIR}/lib/vtls/vtls.c"
|
||||
"${LIBRARY_DIR}/lib/vtls/nss.c"
|
||||
"${LIBRARY_DIR}/lib/vtls/polarssl.c"
|
||||
"${LIBRARY_DIR}/lib/vtls/polarssl_threadlock.c"
|
||||
"${LIBRARY_DIR}/lib/vtls/wolfssl.c"
|
||||
"${LIBRARY_DIR}/lib/vtls/schannel.c"
|
||||
"${LIBRARY_DIR}/lib/vtls/schannel_verify.c"
|
||||
@ -141,6 +144,7 @@ set (SRCS
|
||||
"${LIBRARY_DIR}/lib/vtls/mbedtls.c"
|
||||
"${LIBRARY_DIR}/lib/vtls/mesalink.c"
|
||||
"${LIBRARY_DIR}/lib/vtls/bearssl.c"
|
||||
"${LIBRARY_DIR}/lib/vtls/keylog.c"
|
||||
"${LIBRARY_DIR}/lib/vquic/ngtcp2.c"
|
||||
"${LIBRARY_DIR}/lib/vquic/quiche.c"
|
||||
"${LIBRARY_DIR}/lib/vssh/libssh2.c"
|
||||
|
@ -96,7 +96,7 @@ else
|
||||
clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
|
||||
clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
|
||||
clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits"
|
||||
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0"
|
||||
fi
|
||||
|
||||
clickhouse-client --query "SHOW TABLES FROM test"
|
||||
|
@ -139,7 +139,7 @@ pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhous
|
||||
# directly
|
||||
# - even though ci auto-compress some files (but not *.tsv) it does this only
|
||||
# for files >64MB, we want this files to be compressed explicitly
|
||||
for table in query_log zookeeper_log trace_log
|
||||
for table in query_log zookeeper_log trace_log transactions_info_log
|
||||
do
|
||||
clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz &
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
|
@ -10,7 +10,7 @@ import logging
|
||||
import time
|
||||
|
||||
|
||||
def get_options(i):
|
||||
def get_options(i, backward_compatibility_check):
|
||||
options = []
|
||||
client_options = []
|
||||
if 0 < i:
|
||||
@ -19,7 +19,7 @@ def get_options(i):
|
||||
if i % 3 == 1:
|
||||
options.append("--db-engine=Ordinary")
|
||||
|
||||
if i % 3 == 2:
|
||||
if i % 3 == 2 and not backward_compatibility_check:
|
||||
options.append('''--db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i))
|
||||
client_options.append('allow_experimental_database_replicated=1')
|
||||
|
||||
@ -57,7 +57,7 @@ def run_func_test(cmd, output_prefix, num_processes, skip_tests_option, global_t
|
||||
pipes = []
|
||||
for i in range(0, len(output_paths)):
|
||||
f = open(output_paths[i], 'w')
|
||||
full_command = "{} {} {} {} {}".format(cmd, get_options(i), global_time_limit_option, skip_tests_option, backward_compatibility_check_option)
|
||||
full_command = "{} {} {} {} {}".format(cmd, get_options(i, backward_compatibility_check), global_time_limit_option, skip_tests_option, backward_compatibility_check_option)
|
||||
logging.info("Run func tests '%s'", full_command)
|
||||
p = Popen(full_command, shell=True, stdout=f, stderr=f)
|
||||
pipes.append(p)
|
||||
|
@ -158,6 +158,7 @@ toc_title: Adopters
|
||||
| <a href="https://www.staffcop.ru/" class="favicon">Staffcop</a> | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) |
|
||||
| <a href="https://www.suning.com/" class="favicon">Suning</a> | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) |
|
||||
| <a href="https://superwall.me/" class="favicon">Superwall</a> | Monetization Tooling | Main product | — | — | [Word of mouth, Jan 2022](https://github.com/ClickHouse/ClickHouse/pull/33573) |
|
||||
| <a href="https://swetrix.com" class="favicon">Swetrix</a> | Analytics | Main Product | — | — | [Source code](https://github.com/swetrix/swetrix-api) |
|
||||
| <a href="https://www.teralytics.net/" class="favicon">Teralytics</a> | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) |
|
||||
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) |
|
||||
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |
|
||||
|
@ -519,6 +519,33 @@ Possible values:
|
||||
|
||||
Default value: `1`.
|
||||
|
||||
## allow_settings_after_format_in_insert {#allow_settings_after_format_in_insert}
|
||||
|
||||
Control whether `SETTINGS` after `FORMAT` in `INSERT` queries is allowed or not. It is not recommended to use this, since this may interpret part of `SETTINGS` as values.
|
||||
|
||||
Example:
|
||||
|
||||
```sql
|
||||
INSERT INTO FUNCTION null('foo String') SETTINGS max_threads=1 VALUES ('bar');
|
||||
```
|
||||
|
||||
But the following query will work only with `allow_settings_after_format_in_insert`:
|
||||
|
||||
```sql
|
||||
SET allow_settings_after_format_in_insert=1;
|
||||
INSERT INTO FUNCTION null('foo String') VALUES ('bar') SETTINGS max_threads=1;
|
||||
```
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Disallow.
|
||||
- 1 — Allow.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
!!! note "Warning"
|
||||
Use this setting only for backward compatibility if your use cases depend on old syntax.
|
||||
|
||||
## input_format_skip_unknown_fields {#settings-input-format-skip-unknown-fields}
|
||||
|
||||
Enables or disables skipping insertion of extra data.
|
||||
|
@ -54,6 +54,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
|
||||
("multiquery,n", "allow multiple queries in the same file")
|
||||
("obfuscate", "obfuscate instead of formatting")
|
||||
("backslash", "add a backslash at the end of each line of the formatted query")
|
||||
("allow_settings_after_format_in_insert", "Allow SETTINGS after FORMAT, but note, that this is not always safe")
|
||||
("seed", po::value<std::string>(), "seed (arbitrary string) that determines the result of obfuscation")
|
||||
;
|
||||
|
||||
@ -83,6 +84,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
|
||||
bool multiple = options.count("multiquery");
|
||||
bool obfuscate = options.count("obfuscate");
|
||||
bool backslash = options.count("backslash");
|
||||
bool allow_settings_after_format_in_insert = options.count("allow_settings_after_format_in_insert");
|
||||
|
||||
if (quiet && (hilite || oneline || obfuscate))
|
||||
{
|
||||
@ -154,7 +156,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
|
||||
const char * pos = query.data();
|
||||
const char * end = pos + query.size();
|
||||
|
||||
ParserQuery parser(end);
|
||||
ParserQuery parser(end, allow_settings_after_format_in_insert);
|
||||
do
|
||||
{
|
||||
ASTPtr res = parseQueryAndMovePosition(
|
||||
|
@ -1639,6 +1639,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
server.start();
|
||||
LOG_INFO(log, "Listening for {}", server.getDescription());
|
||||
}
|
||||
|
||||
global_context->setServerCompletelyStarted();
|
||||
LOG_INFO(log, "Ready for connections.");
|
||||
}
|
||||
|
||||
|
@ -102,6 +102,7 @@ enum class AccessType
|
||||
\
|
||||
M(KILL_QUERY, "", GLOBAL, ALL) /* allows to kill a query started by another user
|
||||
(anyone can kill his own queries) */\
|
||||
M(KILL_TRANSACTION, "", GLOBAL, ALL) \
|
||||
\
|
||||
M(MOVE_PARTITION_BETWEEN_SHARDS, "", GLOBAL, ALL) /* required to be able to move a part/partition to a table
|
||||
identified by its ZooKeeper path */\
|
||||
|
@ -42,6 +42,14 @@ void ArchiveBackup::openImpl(OpenMode open_mode_)
|
||||
/// mutex is already locked
|
||||
if (open_mode_ == OpenMode::WRITE)
|
||||
{
|
||||
/// Create a directory to contain the archive.
|
||||
auto dir_path = fs::path(path).parent_path();
|
||||
if (disk)
|
||||
disk->createDirectories(dir_path);
|
||||
else
|
||||
std::filesystem::create_directories(dir_path);
|
||||
|
||||
/// Start writing the archive.
|
||||
if (disk)
|
||||
writer = createArchiveWriter(path, disk->writeFile(path));
|
||||
else
|
||||
@ -65,7 +73,7 @@ void ArchiveBackup::openImpl(OpenMode open_mode_)
|
||||
}
|
||||
}
|
||||
|
||||
void ArchiveBackup::closeImpl(bool writing_finalized_)
|
||||
void ArchiveBackup::closeImpl(const Strings &, bool writing_finalized_)
|
||||
{
|
||||
/// mutex is already locked
|
||||
if (writer && writer->isWritingFile())
|
||||
|
@ -35,7 +35,7 @@ public:
|
||||
private:
|
||||
bool backupExists() const override;
|
||||
void openImpl(OpenMode open_mode_) override;
|
||||
void closeImpl(bool writing_finalized_) override;
|
||||
void closeImpl(const Strings & written_files_, bool writing_finalized_) override;
|
||||
bool supportsWritingInMultipleThreads() const override { return false; }
|
||||
std::unique_ptr<ReadBuffer> readFileImpl(const String & file_name) const override;
|
||||
std::unique_ptr<WriteBuffer> writeFileImpl(const String & file_name) override;
|
||||
|
@ -107,6 +107,7 @@ void BackupImpl::open(OpenMode open_mode_)
|
||||
timestamp = std::time(nullptr);
|
||||
uuid = UUIDHelpers::generateV4();
|
||||
writing_finalized = false;
|
||||
written_files.clear();
|
||||
}
|
||||
|
||||
if (open_mode_ == OpenMode::READ)
|
||||
@ -145,7 +146,7 @@ void BackupImpl::close()
|
||||
if (open_mode == OpenMode::NONE)
|
||||
return;
|
||||
|
||||
closeImpl(writing_finalized);
|
||||
closeImpl(written_files, writing_finalized);
|
||||
|
||||
uuid = UUIDHelpers::Nil;
|
||||
timestamp = 0;
|
||||
@ -202,9 +203,12 @@ void BackupImpl::writeBackupMetadata()
|
||||
config->setString(prefix + "checksum", getHexUIntLowercase(info.checksum));
|
||||
if (info.base_size)
|
||||
{
|
||||
config->setUInt(prefix + "base_size", info.base_size);
|
||||
if (info.base_checksum != info.checksum)
|
||||
config->setBool(prefix + "use_base", true);
|
||||
if (info.base_size != info.size)
|
||||
{
|
||||
config->setUInt(prefix + "base_size", info.base_size);
|
||||
config->setString(prefix + "base_checksum", getHexUIntLowercase(info.base_checksum));
|
||||
}
|
||||
}
|
||||
}
|
||||
++index;
|
||||
@ -213,6 +217,7 @@ void BackupImpl::writeBackupMetadata()
|
||||
std::ostringstream stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
config->save(stream);
|
||||
String str = stream.str();
|
||||
written_files.push_back(".backup");
|
||||
auto out = writeFileImpl(".backup");
|
||||
out->write(str.data(), str.size());
|
||||
}
|
||||
@ -253,13 +258,14 @@ void BackupImpl::readBackupMetadata()
|
||||
if (info.size)
|
||||
{
|
||||
info.checksum = unhexChecksum(config->getString(prefix + "checksum"));
|
||||
info.base_size = config->getUInt(prefix + "base_size", 0);
|
||||
bool use_base = config->getBool(prefix + "use_base", false);
|
||||
info.base_size = config->getUInt(prefix + "base_size", use_base ? info.size : 0);
|
||||
if (info.base_size)
|
||||
{
|
||||
if (config->has(prefix + "base_checksum"))
|
||||
info.base_checksum = unhexChecksum(config->getString(prefix + "base_checksum"));
|
||||
else
|
||||
if (info.base_size == info.size)
|
||||
info.base_checksum = info.checksum;
|
||||
else
|
||||
info.base_checksum = unhexChecksum(config->getString(prefix + "base_checksum"));
|
||||
}
|
||||
}
|
||||
file_infos.emplace(name, info);
|
||||
@ -345,11 +351,6 @@ BackupEntryPtr BackupImpl::readFile(const String & file_name) const
|
||||
return std::make_unique<BackupEntryFromMemory>(nullptr, 0, UInt128{0, 0});
|
||||
}
|
||||
|
||||
auto read_callback = [backup = std::static_pointer_cast<const BackupImpl>(shared_from_this()), file_name]()
|
||||
{
|
||||
return backup->readFileImpl(file_name);
|
||||
};
|
||||
|
||||
if (!info.base_size)
|
||||
{
|
||||
/// Data goes completely from this backup, the base backup isn't used.
|
||||
@ -526,6 +527,7 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
|
||||
}
|
||||
|
||||
/// Copy the entry's data after `copy_pos`.
|
||||
written_files.push_back(file_name);
|
||||
auto out = writeFileImpl(file_name);
|
||||
copyData(*read_buffer, *out);
|
||||
|
||||
|
@ -47,7 +47,7 @@ protected:
|
||||
virtual void openImpl(OpenMode open_mode_) = 0;
|
||||
OpenMode getOpenModeNoLock() const { return open_mode; }
|
||||
|
||||
virtual void closeImpl(bool writing_finalized_) = 0;
|
||||
virtual void closeImpl(const Strings & written_files_, bool writing_finalized_) = 0;
|
||||
|
||||
/// Read a file from the backup.
|
||||
/// Low level: the function doesn't check base backup or checksums.
|
||||
@ -86,6 +86,7 @@ private:
|
||||
std::optional<UUID> base_backup_uuid;
|
||||
std::map<String, FileInfo> file_infos; /// Should be ordered alphabetically, see listFiles().
|
||||
std::unordered_map<UInt128, String> file_checksums;
|
||||
Strings written_files;
|
||||
bool writing_finalized = false;
|
||||
};
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Backups/BackupInfo.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
@ -23,7 +24,11 @@ String BackupInfo::toString() const
|
||||
auto list = std::make_shared<ASTExpressionList>();
|
||||
func->arguments = list;
|
||||
func->children.push_back(list);
|
||||
list->children.reserve(args.size());
|
||||
list->children.reserve(args.size() + !id_arg.empty());
|
||||
|
||||
if (!id_arg.empty())
|
||||
list->children.push_back(std::make_shared<ASTIdentifier>(id_arg));
|
||||
|
||||
for (const auto & arg : args)
|
||||
list->children.push_back(std::make_shared<ASTLiteral>(arg));
|
||||
|
||||
@ -53,9 +58,22 @@ BackupInfo BackupInfo::fromAST(const IAST & ast)
|
||||
const auto * list = func->arguments->as<const ASTExpressionList>();
|
||||
if (!list)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected list, got {}", serializeAST(*func->arguments));
|
||||
res.args.reserve(list->children.size());
|
||||
for (const auto & elem : list->children)
|
||||
|
||||
size_t index = 0;
|
||||
if (!list->children.empty())
|
||||
{
|
||||
const auto * id = list->children[0]->as<const ASTIdentifier>();
|
||||
if (id)
|
||||
{
|
||||
res.id_arg = id->name();
|
||||
++index;
|
||||
}
|
||||
}
|
||||
|
||||
res.args.reserve(list->children.size() - index);
|
||||
for (; index < list->children.size(); ++index)
|
||||
{
|
||||
const auto & elem = list->children[index];
|
||||
const auto * lit = elem->as<const ASTLiteral>();
|
||||
if (!lit)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected literal, got {}", serializeAST(*elem));
|
||||
|
@ -11,6 +11,7 @@ class IAST;
|
||||
struct BackupInfo
|
||||
{
|
||||
String backup_engine_name;
|
||||
String id_arg;
|
||||
std::vector<Field> args;
|
||||
|
||||
String toString() const;
|
||||
|
@ -1,16 +1,9 @@
|
||||
#include <Backups/DirectoryBackup.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <Disks/DiskLocal.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
|
||||
DirectoryBackup::DirectoryBackup(
|
||||
const String & backup_name_,
|
||||
@ -19,23 +12,16 @@ DirectoryBackup::DirectoryBackup(
|
||||
const ContextPtr & context_,
|
||||
const std::optional<BackupInfo> & base_backup_info_)
|
||||
: BackupImpl(backup_name_, context_, base_backup_info_)
|
||||
, disk(disk_), path(path_)
|
||||
, disk(disk_)
|
||||
{
|
||||
/// Path to backup must end with '/'
|
||||
if (!path.ends_with("/"))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Backup {}: Path to backup must end with '/', but {} doesn't.", getName(), quoteString(path));
|
||||
dir_path = fs::path(path).parent_path(); /// get path without terminating slash
|
||||
/// Remove terminating slash.
|
||||
path = (std::filesystem::path(path_) / "").parent_path();
|
||||
|
||||
/// If `disk` is not specified, we create an internal instance of `DiskLocal` here.
|
||||
if (!disk)
|
||||
{
|
||||
auto fspath = fs::path{dir_path};
|
||||
if (!fspath.has_filename())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Backup {}: Path to a backup must be a directory path.", getName(), quoteString(path));
|
||||
path = fspath.filename() / "";
|
||||
dir_path = fs::path(path).parent_path(); /// get path without terminating slash
|
||||
String disk_path = fspath.remove_filename();
|
||||
disk = std::make_shared<DiskLocal>(disk_path, disk_path, 0);
|
||||
disk = std::make_shared<DiskLocal>(path, path, 0);
|
||||
path = ".";
|
||||
}
|
||||
}
|
||||
|
||||
@ -47,34 +33,38 @@ DirectoryBackup::~DirectoryBackup()
|
||||
|
||||
bool DirectoryBackup::backupExists() const
|
||||
{
|
||||
return disk->isDirectory(dir_path);
|
||||
return disk->isDirectory(path);
|
||||
}
|
||||
|
||||
void DirectoryBackup::openImpl(OpenMode open_mode_)
|
||||
{
|
||||
if (open_mode_ == OpenMode::WRITE)
|
||||
disk->createDirectories(dir_path);
|
||||
disk->createDirectories(path);
|
||||
}
|
||||
|
||||
void DirectoryBackup::closeImpl(bool writing_finalized_)
|
||||
void DirectoryBackup::closeImpl(const Strings & written_files_, bool writing_finalized_)
|
||||
{
|
||||
if ((getOpenModeNoLock() == OpenMode::WRITE) && !writing_finalized_ && disk->isDirectory(dir_path))
|
||||
if ((getOpenModeNoLock() == OpenMode::WRITE) && !writing_finalized_ && !written_files_.empty())
|
||||
{
|
||||
/// Creating of the backup wasn't finished correctly,
|
||||
/// so the backup cannot be used and it's better to remove its files.
|
||||
disk->removeRecursive(dir_path);
|
||||
const auto & files_to_delete = written_files_;
|
||||
for (const String & file_name : files_to_delete)
|
||||
disk->removeFileIfExists(path / file_name);
|
||||
if (disk->isDirectory(path) && disk->isDirectoryEmpty(path))
|
||||
disk->removeDirectory(path);
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBuffer> DirectoryBackup::readFileImpl(const String & file_name) const
|
||||
{
|
||||
String file_path = path + file_name;
|
||||
auto file_path = path / file_name;
|
||||
return disk->readFile(file_path);
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBuffer> DirectoryBackup::writeFileImpl(const String & file_name)
|
||||
{
|
||||
String file_path = path + file_name;
|
||||
auto file_path = path / file_name;
|
||||
disk->createDirectories(fs::path(file_path).parent_path());
|
||||
return disk->writeFile(file_path);
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/BackupImpl.h>
|
||||
#include <filesystem>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -25,13 +26,12 @@ public:
|
||||
private:
|
||||
bool backupExists() const override;
|
||||
void openImpl(OpenMode open_mode_) override;
|
||||
void closeImpl(bool writing_finalized_) override;
|
||||
void closeImpl(const Strings & written_files_, bool writing_finalized_) override;
|
||||
std::unique_ptr<ReadBuffer> readFileImpl(const String & file_name) const override;
|
||||
std::unique_ptr<WriteBuffer> writeFileImpl(const String & file_name) override;
|
||||
|
||||
DiskPtr disk;
|
||||
String path;
|
||||
String dir_path; /// `path` without terminating slash
|
||||
std::filesystem::path path;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <Backups/DirectoryBackup.h>
|
||||
#include <Backups/ArchiveBackup.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <IO/Archives/hasRegisteredArchiveFileExtension.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
@ -13,8 +14,9 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int INVALID_CONFIG_PARAMETER;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
@ -22,83 +24,70 @@ namespace
|
||||
{
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
[[noreturn]] void throwDiskIsAllowed(const String & disk_name)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk {} is not allowed for backups", disk_name);
|
||||
}
|
||||
|
||||
[[noreturn]] void throwPathNotAllowed(const fs::path & path)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} is not allowed for backups", quoteString(String{path}));
|
||||
}
|
||||
|
||||
void checkAllowedPathInConfigIsValid(const String & key, const fs::path & value)
|
||||
{
|
||||
if (value.empty() || value.is_relative())
|
||||
throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Configuration parameter {} has a wrong value {}", key, String{value});
|
||||
}
|
||||
|
||||
/// Checks that a disk name and a path specified as parameters of Disk() are valid.
|
||||
void checkDiskNameAndPath(const String & disk_name, fs::path & path, const Poco::Util::AbstractConfiguration & config)
|
||||
/// Checks that a disk name specified as parameters of Disk() is valid.
|
||||
void checkDiskName(const String & disk_name, const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
String key = "backups.allowed_disk";
|
||||
bool disk_name_found = false;
|
||||
if (!config.has(key))
|
||||
throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "The \"backups.allowed_disk\" configuration parameter is not set, cannot use Disk() backup engine");
|
||||
|
||||
size_t counter = 0;
|
||||
while (config.has(key))
|
||||
while (config.getString(key) != disk_name)
|
||||
{
|
||||
if (config.getString(key) == disk_name)
|
||||
{
|
||||
disk_name_found = true;
|
||||
break;
|
||||
}
|
||||
key = "backups.allowed_disk[" + std::to_string(++counter) + "]";
|
||||
if (!config.has(key))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk {} is not allowed for backups, see the \"backups.allowed_disk\" configuration parameter", disk_name);
|
||||
}
|
||||
|
||||
if (!disk_name_found)
|
||||
throwDiskIsAllowed(disk_name);
|
||||
|
||||
path = path.lexically_normal();
|
||||
if (!path.is_relative() || path.empty() || (*path.begin() == ".."))
|
||||
throwPathNotAllowed(path);
|
||||
}
|
||||
|
||||
/// Checks that a path specified as a parameter of File() is valid.
|
||||
void checkPath(fs::path & path, const Poco::Util::AbstractConfiguration & config)
|
||||
/// Checks that a path specified as parameters of Disk() is valid.
|
||||
void checkPath(const String & disk_name, const DiskPtr & disk, fs::path & path)
|
||||
{
|
||||
String key = "backups.allowed_path";
|
||||
path = path.lexically_normal();
|
||||
if (!path.is_relative() && (disk->getType() == DiskType::Local))
|
||||
path = path.lexically_proximate(disk->getPath());
|
||||
|
||||
bool path_ok = path.empty() || (path.is_relative() && (*path.begin() != ".."));
|
||||
if (!path_ok)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} to backup must be inside the specified disk {}", quoteString(path.c_str()), disk_name);
|
||||
}
|
||||
|
||||
/// Checks that a path specified as parameters of File() is valid.
|
||||
void checkPath(fs::path & path, const Poco::Util::AbstractConfiguration & config, const fs::path & data_dir) {
|
||||
path = path.lexically_normal();
|
||||
if (path.empty())
|
||||
throwPathNotAllowed(path);
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path to backup must not be empty");
|
||||
|
||||
String key = "backups.allowed_path";
|
||||
if (!config.has(key))
|
||||
throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER,
|
||||
"The \"backups.allowed_path\" configuration parameter is not set, cannot use File() backup engine");
|
||||
|
||||
if (path.is_relative())
|
||||
{
|
||||
if (*path.begin() == "..")
|
||||
throwPathNotAllowed(path);
|
||||
auto first_allowed_path = fs::path(config.getString(key));
|
||||
if (first_allowed_path.is_relative())
|
||||
first_allowed_path = data_dir / first_allowed_path;
|
||||
|
||||
auto base = fs::path(config.getString(key, ""));
|
||||
checkAllowedPathInConfigIsValid(key, base);
|
||||
path = base / path;
|
||||
return;
|
||||
path = first_allowed_path / path;
|
||||
}
|
||||
|
||||
bool path_found_in_config = false;
|
||||
size_t counter = 0;
|
||||
while (config.has(key))
|
||||
while (true)
|
||||
{
|
||||
auto base = fs::path(config.getString(key));
|
||||
checkAllowedPathInConfigIsValid(key, base);
|
||||
auto rel = path.lexically_relative(base);
|
||||
if (!rel.empty() && (*rel.begin() != ".."))
|
||||
{
|
||||
path_found_in_config = true;
|
||||
auto allowed_path = fs::path(config.getString(key));
|
||||
if (allowed_path.is_relative())
|
||||
allowed_path = data_dir / allowed_path;
|
||||
auto rel = path.lexically_proximate(allowed_path);
|
||||
bool path_ok = rel.empty() || (rel.is_relative() && (*rel.begin() != ".."));
|
||||
if (path_ok)
|
||||
break;
|
||||
}
|
||||
key = "backups.allowed_path[" + std::to_string(++counter) + "]";
|
||||
if (!config.has(key))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Path {} is not allowed for backups, see the \"backups.allowed_path\" configuration parameter",
|
||||
quoteString(path.c_str()));
|
||||
}
|
||||
|
||||
if (!path_found_in_config)
|
||||
throwPathNotAllowed(path);
|
||||
}
|
||||
}
|
||||
|
||||
@ -109,6 +98,15 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
|
||||
{
|
||||
String backup_name = params.backup_info.toString();
|
||||
const String & engine_name = params.backup_info.backup_engine_name;
|
||||
|
||||
if (!params.backup_info.id_arg.empty())
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"Backup engine '{}' requires first argument to be a string",
|
||||
engine_name);
|
||||
}
|
||||
|
||||
const auto & args = params.backup_info.args;
|
||||
|
||||
DiskPtr disk;
|
||||
@ -123,7 +121,9 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
|
||||
}
|
||||
|
||||
path = args[0].safeGet<String>();
|
||||
checkPath(path, params.context->getConfigRef());
|
||||
const auto & config = params.context->getConfigRef();
|
||||
const auto & data_dir = params.context->getPath();
|
||||
checkPath(path, config, data_dir);
|
||||
}
|
||||
else if (engine_name == "Disk")
|
||||
{
|
||||
@ -135,30 +135,28 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
|
||||
}
|
||||
|
||||
String disk_name = args[0].safeGet<String>();
|
||||
const auto & config = params.context->getConfigRef();
|
||||
checkDiskName(disk_name, config);
|
||||
path = args[1].safeGet<String>();
|
||||
checkDiskNameAndPath(disk_name, path, params.context->getConfigRef());
|
||||
disk = params.context->getDisk(disk_name);
|
||||
checkPath(disk_name, disk, path);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected backup engine '{}'", engine_name);
|
||||
|
||||
std::unique_ptr<IBackup> backup;
|
||||
|
||||
if (!path.has_filename() && !path.empty())
|
||||
{
|
||||
if (!params.password.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Password is not applicable, backup cannot be encrypted");
|
||||
backup = std::make_unique<DirectoryBackup>(backup_name, disk, path, params.context, params.base_backup_info);
|
||||
}
|
||||
else if (hasRegisteredArchiveFileExtension(path))
|
||||
if (hasRegisteredArchiveFileExtension(path))
|
||||
{
|
||||
auto archive_backup = std::make_unique<ArchiveBackup>(backup_name, disk, path, params.context, params.base_backup_info);
|
||||
archive_backup->setCompression(params.compression_method, params.compression_level);
|
||||
archive_backup->setPassword(params.password);
|
||||
backup = std::move(archive_backup);
|
||||
return archive_backup;
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path to backup must be either a directory or a path to an archive");
|
||||
|
||||
return backup;
|
||||
{
|
||||
if (!params.password.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Password is not applicable, backup cannot be encrypted");
|
||||
return std::make_unique<DirectoryBackup>(backup_name, disk, path, params.context, params.base_backup_info);
|
||||
}
|
||||
};
|
||||
|
||||
factory.registerBackupEngine("File", creator_fn);
|
||||
|
@ -275,7 +275,7 @@ void ClientBase::setupSignalHandler()
|
||||
|
||||
ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const
|
||||
{
|
||||
ParserQuery parser(end);
|
||||
ParserQuery parser(end, global_context->getSettings().allow_settings_after_format_in_insert);
|
||||
ASTPtr res;
|
||||
|
||||
const auto & settings = global_context->getSettingsRef();
|
||||
@ -1129,7 +1129,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
|
||||
sendDataFromPipe(
|
||||
storage->read(
|
||||
sample.getNames(),
|
||||
storage->getStorageSnapshot(metadata),
|
||||
storage->getStorageSnapshot(metadata, global_context),
|
||||
query_info,
|
||||
global_context,
|
||||
{},
|
||||
|
@ -91,6 +91,7 @@ public:
|
||||
struct QueryScope
|
||||
{
|
||||
explicit QueryScope(ContextMutablePtr query_context);
|
||||
explicit QueryScope(ContextPtr query_context);
|
||||
~QueryScope();
|
||||
|
||||
void logPeakMemoryUsage();
|
||||
|
@ -617,6 +617,8 @@
|
||||
M(646, CANNOT_BACKUP_DATABASE) \
|
||||
M(647, CANNOT_BACKUP_TABLE) \
|
||||
M(648, WRONG_DDL_RENAMING_SETTINGS) \
|
||||
M(649, INVALID_TRANSACTION) \
|
||||
M(650, SERIALIZATION_ERROR) \
|
||||
\
|
||||
M(999, KEEPER_EXCEPTION) \
|
||||
M(1000, POCO_EXCEPTION) \
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Common/randomSeed.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/hex.h>
|
||||
#include <Common/FileCacheSettings.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/ReadSettings.h>
|
||||
@ -31,13 +32,11 @@ namespace
|
||||
|
||||
IFileCache::IFileCache(
|
||||
const String & cache_base_path_,
|
||||
size_t max_size_,
|
||||
size_t max_element_size_,
|
||||
size_t max_file_segment_size_)
|
||||
const FileCacheSettings & cache_settings_)
|
||||
: cache_base_path(cache_base_path_)
|
||||
, max_size(max_size_)
|
||||
, max_element_size(max_element_size_)
|
||||
, max_file_segment_size(max_file_segment_size_)
|
||||
, max_size(cache_settings_.max_size)
|
||||
, max_element_size(cache_settings_.max_elements)
|
||||
, max_file_segment_size(cache_settings_.max_file_segment_size)
|
||||
{
|
||||
}
|
||||
|
||||
@ -58,7 +57,7 @@ String IFileCache::getPathInLocalCache(const Key & key)
|
||||
return fs::path(cache_base_path) / key_str.substr(0, 3) / key_str;
|
||||
}
|
||||
|
||||
bool IFileCache::shouldBypassCache()
|
||||
bool IFileCache::isReadOnly()
|
||||
{
|
||||
return !CurrentThread::isInitialized()
|
||||
|| !CurrentThread::get().getQueryContext()
|
||||
@ -71,8 +70,8 @@ void IFileCache::assertInitialized() const
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Cache not initialized");
|
||||
}
|
||||
|
||||
LRUFileCache::LRUFileCache(const String & cache_base_path_, size_t max_size_, size_t max_element_size_, size_t max_file_segment_size_)
|
||||
: IFileCache(cache_base_path_, max_size_, max_element_size_, max_file_segment_size_)
|
||||
LRUFileCache::LRUFileCache(const String & cache_base_path_, const FileCacheSettings & cache_settings_)
|
||||
: IFileCache(cache_base_path_, cache_settings_)
|
||||
, log(&Poco::Logger::get("LRUFileCache"))
|
||||
{
|
||||
}
|
||||
@ -205,8 +204,8 @@ FileSegments LRUFileCache::getImpl(
|
||||
return result;
|
||||
}
|
||||
|
||||
FileSegments LRUFileCache::splitRangeIntoEmptyCells(
|
||||
const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock)
|
||||
FileSegments LRUFileCache::splitRangeIntoCells(
|
||||
const Key & key, size_t offset, size_t size, FileSegment::State state, std::lock_guard<std::mutex> & cache_lock)
|
||||
{
|
||||
assert(size > 0);
|
||||
|
||||
@ -222,9 +221,10 @@ FileSegments LRUFileCache::splitRangeIntoEmptyCells(
|
||||
current_cell_size = std::min(remaining_size, max_file_segment_size);
|
||||
remaining_size -= current_cell_size;
|
||||
|
||||
auto * cell = addCell(key, current_pos, current_cell_size, FileSegment::State::EMPTY, cache_lock);
|
||||
auto * cell = addCell(key, current_pos, current_cell_size, state, cache_lock);
|
||||
if (cell)
|
||||
file_segments.push_back(cell->file_segment);
|
||||
assert(cell);
|
||||
|
||||
current_pos += current_cell_size;
|
||||
}
|
||||
@ -250,7 +250,7 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t
|
||||
|
||||
if (file_segments.empty())
|
||||
{
|
||||
file_segments = splitRangeIntoEmptyCells(key, offset, size, cache_lock);
|
||||
file_segments = splitRangeIntoCells(key, offset, size, FileSegment::State::EMPTY, cache_lock);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -295,7 +295,7 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t
|
||||
assert(current_pos < segment_range.left);
|
||||
|
||||
auto hole_size = segment_range.left - current_pos;
|
||||
file_segments.splice(it, splitRangeIntoEmptyCells(key, current_pos, hole_size, cache_lock));
|
||||
file_segments.splice(it, splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
|
||||
|
||||
current_pos = segment_range.right + 1;
|
||||
++it;
|
||||
@ -309,7 +309,7 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t
|
||||
/// segmentN
|
||||
|
||||
auto hole_size = range.right - current_pos + 1;
|
||||
file_segments.splice(file_segments.end(), splitRangeIntoEmptyCells(key, current_pos, hole_size, cache_lock));
|
||||
file_segments.splice(file_segments.end(), splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
|
||||
}
|
||||
}
|
||||
|
||||
@ -354,6 +354,21 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell(
|
||||
return &(it->second);
|
||||
}
|
||||
|
||||
FileSegmentsHolder LRUFileCache::setDownloading(const Key & key, size_t offset, size_t size)
|
||||
{
|
||||
std::lock_guard cache_lock(mutex);
|
||||
|
||||
auto * cell = getCell(key, offset, cache_lock);
|
||||
if (cell)
|
||||
throw Exception(
|
||||
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||
"Cache cell already exists for key `{}` and offset {}",
|
||||
keyToStr(key), offset);
|
||||
|
||||
auto file_segments = splitRangeIntoCells(key, offset, size, FileSegment::State::DOWNLOADING, cache_lock);
|
||||
return FileSegmentsHolder(std::move(file_segments));
|
||||
}
|
||||
|
||||
bool LRUFileCache::tryReserve(
|
||||
const Key & key_, size_t offset_, size_t size, std::lock_guard<std::mutex> & cache_lock)
|
||||
{
|
||||
@ -372,7 +387,8 @@ bool LRUFileCache::tryReserve(
|
||||
|
||||
auto is_overflow = [&]
|
||||
{
|
||||
return (current_size + size - removed_size > max_size)
|
||||
/// max_size == 0 means unlimited cache size, max_element_size means unlimited number of cache elements.
|
||||
return (max_size != 0 && current_size + size - removed_size > max_size)
|
||||
|| (max_element_size != 0 && queue_size > max_element_size);
|
||||
};
|
||||
|
||||
@ -484,6 +500,30 @@ void LRUFileCache::remove(const Key & key)
|
||||
fs::remove(key_path);
|
||||
}
|
||||
|
||||
void LRUFileCache::tryRemoveAll()
|
||||
{
|
||||
/// Try remove all cached files by cache_base_path.
|
||||
/// Only releasable file segments are evicted.
|
||||
|
||||
std::lock_guard cache_lock(mutex);
|
||||
|
||||
for (auto it = queue.begin(); it != queue.end();)
|
||||
{
|
||||
auto & [key, offset] = *it++;
|
||||
|
||||
auto * cell = getCell(key, offset, cache_lock);
|
||||
if (cell->releasable())
|
||||
{
|
||||
auto file_segment = cell->file_segment;
|
||||
if (file_segment)
|
||||
{
|
||||
std::lock_guard<std::mutex> segment_lock(file_segment->mutex);
|
||||
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LRUFileCache::remove(
|
||||
Key key, size_t offset,
|
||||
std::lock_guard<std::mutex> & cache_lock, std::lock_guard<std::mutex> & /* segment_lock */)
|
||||
@ -668,6 +708,38 @@ bool LRUFileCache::isLastFileSegmentHolder(
|
||||
return cell->file_segment.use_count() == 2;
|
||||
}
|
||||
|
||||
FileSegments LRUFileCache::getSnapshot() const
|
||||
{
|
||||
std::lock_guard cache_lock(mutex);
|
||||
|
||||
FileSegments file_segments;
|
||||
|
||||
for (const auto & [key, cells_by_offset] : files)
|
||||
{
|
||||
for (const auto & [offset, cell] : cells_by_offset)
|
||||
file_segments.push_back(FileSegment::getSnapshot(cell.file_segment, cache_lock));
|
||||
}
|
||||
|
||||
return file_segments;
|
||||
}
|
||||
|
||||
std::vector<String> LRUFileCache::tryGetCachePaths(const Key & key)
|
||||
{
|
||||
std::lock_guard cache_lock(mutex);
|
||||
|
||||
std::vector<String> cache_paths;
|
||||
|
||||
const auto & cells_by_offset = files[key];
|
||||
|
||||
for (const auto & [offset, cell] : cells_by_offset)
|
||||
{
|
||||
if (cell.file_segment->state() == FileSegment::State::DOWNLOADED)
|
||||
cache_paths.push_back(getPathInLocalCache(key, offset));
|
||||
}
|
||||
|
||||
return cache_paths;
|
||||
}
|
||||
|
||||
LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRUQueue & queue_)
|
||||
: file_segment(file_segment_)
|
||||
{
|
||||
@ -685,12 +757,13 @@ LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRU
|
||||
break;
|
||||
}
|
||||
case FileSegment::State::EMPTY:
|
||||
case FileSegment::State::DOWNLOADING:
|
||||
{
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||
"Can create cell with either DOWNLOADED or EMPTY state, got: {}",
|
||||
"Can create cell with either EMPTY, DOWNLOADED, DOWNLOADING state, got: {}",
|
||||
FileSegment::stateToString(file_segment->download_state));
|
||||
}
|
||||
}
|
||||
|
@ -33,9 +33,7 @@ public:
|
||||
|
||||
IFileCache(
|
||||
const String & cache_base_path_,
|
||||
size_t max_size_,
|
||||
size_t max_element_size_,
|
||||
size_t max_file_segment_size_);
|
||||
const FileCacheSettings & cache_settings_);
|
||||
|
||||
virtual ~IFileCache() = default;
|
||||
|
||||
@ -44,7 +42,9 @@ public:
|
||||
|
||||
virtual void remove(const Key & key) = 0;
|
||||
|
||||
static bool shouldBypassCache();
|
||||
virtual void tryRemoveAll() = 0;
|
||||
|
||||
static bool isReadOnly();
|
||||
|
||||
/// Cache capacity in bytes.
|
||||
size_t capacity() const { return max_size; }
|
||||
@ -55,6 +55,10 @@ public:
|
||||
|
||||
String getPathInLocalCache(const Key & key);
|
||||
|
||||
const String & getBasePath() const { return cache_base_path; }
|
||||
|
||||
virtual std::vector<String> tryGetCachePaths(const Key & key) = 0;
|
||||
|
||||
/**
|
||||
* Given an `offset` and `size` representing [offset, offset + size) bytes interval,
|
||||
* return list of cached non-overlapping non-empty
|
||||
@ -68,6 +72,10 @@ public:
|
||||
*/
|
||||
virtual FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) = 0;
|
||||
|
||||
virtual FileSegmentsHolder setDownloading(const Key & key, size_t offset, size_t size) = 0;
|
||||
|
||||
virtual FileSegments getSnapshot() const = 0;
|
||||
|
||||
/// For debug.
|
||||
virtual String dumpStructure(const Key & key) = 0;
|
||||
|
||||
@ -112,16 +120,22 @@ class LRUFileCache final : public IFileCache
|
||||
public:
|
||||
LRUFileCache(
|
||||
const String & cache_base_path_,
|
||||
size_t max_size_,
|
||||
size_t max_element_size_ = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS,
|
||||
size_t max_file_segment_size_ = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE);
|
||||
const FileCacheSettings & cache_settings_);
|
||||
|
||||
FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) override;
|
||||
|
||||
FileSegments getSnapshot() const override;
|
||||
|
||||
FileSegmentsHolder setDownloading(const Key & key, size_t offset, size_t size) override;
|
||||
|
||||
void initialize() override;
|
||||
|
||||
void remove(const Key & key) override;
|
||||
|
||||
void tryRemoveAll() override;
|
||||
|
||||
std::vector<String> tryGetCachePaths(const Key & key) override;
|
||||
|
||||
private:
|
||||
using FileKeyAndOffset = std::pair<Key, size_t>;
|
||||
using LRUQueue = std::list<FileKeyAndOffset>;
|
||||
@ -194,8 +208,8 @@ private:
|
||||
|
||||
void loadCacheInfoIntoMemory();
|
||||
|
||||
FileSegments splitRangeIntoEmptyCells(
|
||||
const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock);
|
||||
FileSegments splitRangeIntoCells(
|
||||
const Key & key, size_t offset, size_t size, FileSegment::State state, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
String dumpStructureImpl(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
|
@ -15,28 +15,53 @@ FileCacheFactory & FileCacheFactory::instance()
|
||||
return ret;
|
||||
}
|
||||
|
||||
FileCachePtr FileCacheFactory::getImpl(const std::string & cache_base_path, std::lock_guard<std::mutex> &)
|
||||
FileCacheFactory::CacheByBasePath FileCacheFactory::getAll()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return caches;
|
||||
}
|
||||
|
||||
const FileCacheSettings & FileCacheFactory::getSettings(const std::string & cache_base_path)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
auto * cache_data = getImpl(cache_base_path, lock);
|
||||
if (cache_data)
|
||||
return cache_data->settings;
|
||||
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by path: {}", cache_base_path);
|
||||
}
|
||||
|
||||
FileCacheFactory::CacheData * FileCacheFactory::getImpl(const std::string & cache_base_path, std::lock_guard<std::mutex> &)
|
||||
{
|
||||
auto it = caches.find(cache_base_path);
|
||||
if (it == caches.end())
|
||||
return nullptr;
|
||||
return it->second;
|
||||
return &it->second;
|
||||
}
|
||||
|
||||
FileCachePtr FileCacheFactory::get(const std::string & cache_base_path)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
auto * cache_data = getImpl(cache_base_path, lock);
|
||||
if (cache_data)
|
||||
return cache_data->cache;
|
||||
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by path: {}", cache_base_path);
|
||||
}
|
||||
|
||||
FileCachePtr FileCacheFactory::getOrCreate(
|
||||
const std::string & cache_base_path, size_t max_size, size_t max_elements_size, size_t max_file_segment_size)
|
||||
const std::string & cache_base_path, const FileCacheSettings & file_cache_settings)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto cache = getImpl(cache_base_path, lock);
|
||||
if (cache)
|
||||
{
|
||||
if (cache->capacity() != max_size)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cache with path `{}` already exists, but has different max size", cache_base_path);
|
||||
return cache;
|
||||
}
|
||||
|
||||
cache = std::make_shared<LRUFileCache>(cache_base_path, max_size, max_elements_size, max_file_segment_size);
|
||||
caches.emplace(cache_base_path, cache);
|
||||
auto * cache_data = getImpl(cache_base_path, lock);
|
||||
if (cache_data)
|
||||
return cache_data->cache;
|
||||
|
||||
auto cache = std::make_shared<LRUFileCache>(cache_base_path, file_cache_settings);
|
||||
caches.emplace(cache_base_path, CacheData(cache, file_cache_settings));
|
||||
return cache;
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/FileCache_fwd.h>
|
||||
#include <Common/FileCacheSettings.h>
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <unordered_map>
|
||||
@ -14,16 +15,32 @@ namespace DB
|
||||
*/
|
||||
class FileCacheFactory final : private boost::noncopyable
|
||||
{
|
||||
struct CacheData
|
||||
{
|
||||
FileCachePtr cache;
|
||||
FileCacheSettings settings;
|
||||
|
||||
CacheData(FileCachePtr cache_, const FileCacheSettings & settings_) : cache(cache_), settings(settings_) {}
|
||||
};
|
||||
|
||||
using CacheByBasePath = std::unordered_map<std::string, CacheData>;
|
||||
|
||||
public:
|
||||
static FileCacheFactory & instance();
|
||||
|
||||
FileCachePtr getOrCreate(const std::string & cache_base_path, size_t max_size, size_t max_elements_size, size_t max_file_segment_size);
|
||||
FileCachePtr getOrCreate(const std::string & cache_base_path, const FileCacheSettings & file_cache_settings);
|
||||
|
||||
FileCachePtr get(const std::string & cache_base_path);
|
||||
|
||||
CacheByBasePath getAll();
|
||||
|
||||
const FileCacheSettings & getSettings(const std::string & cache_base_path);
|
||||
|
||||
private:
|
||||
FileCachePtr getImpl(const std::string & cache_base_path, std::lock_guard<std::mutex> &);
|
||||
CacheData * getImpl(const std::string & cache_base_path, std::lock_guard<std::mutex> &);
|
||||
|
||||
std::mutex mutex;
|
||||
std::unordered_map<std::string, FileCachePtr> caches;
|
||||
CacheByBasePath caches;
|
||||
};
|
||||
|
||||
}
|
||||
|
16
src/Common/FileCacheSettings.cpp
Normal file
16
src/Common/FileCacheSettings.cpp
Normal file
@ -0,0 +1,16 @@
|
||||
#include "FileCacheSettings.h"
|
||||
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
|
||||
{
|
||||
max_size = config.getUInt64(config_prefix + ".data_cache_max_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_CACHE_SIZE);
|
||||
max_elements = config.getUInt64(config_prefix + ".data_cache_max_elements", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS);
|
||||
max_file_segment_size = config.getUInt64(config_prefix + ".max_file_segment_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE);
|
||||
cache_on_write_operations = config.getUInt64(config_prefix + ".cache_on_write_operations", false);
|
||||
}
|
||||
|
||||
}
|
20
src/Common/FileCacheSettings.h
Normal file
20
src/Common/FileCacheSettings.h
Normal file
@ -0,0 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/FileCache_fwd.h>
|
||||
|
||||
namespace Poco { namespace Util { class AbstractConfiguration; } }
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct FileCacheSettings
|
||||
{
|
||||
size_t max_size = 0;
|
||||
size_t max_elements = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS;
|
||||
size_t max_file_segment_size = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE;
|
||||
bool cache_on_write_operations = false;
|
||||
|
||||
void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
|
||||
};
|
||||
|
||||
}
|
@ -4,10 +4,13 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_CACHE_SIZE = 1024 * 1024 * 1024;
|
||||
static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 100 * 1024 * 1024;
|
||||
static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS = 1024 * 1024;
|
||||
|
||||
class IFileCache;
|
||||
using FileCachePtr = std::shared_ptr<IFileCache>;
|
||||
|
||||
struct FileCacheSettings;
|
||||
|
||||
}
|
||||
|
@ -31,10 +31,34 @@ FileSegment::FileSegment(
|
||||
, log(&Poco::Logger::get("FileSegment"))
|
||||
#endif
|
||||
{
|
||||
if (download_state == State::DOWNLOADED)
|
||||
reserved_size = downloaded_size = size_;
|
||||
else if (download_state != State::EMPTY)
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Can create cell with either DOWNLOADED or EMPTY state");
|
||||
/// On creation, file segment state can be EMPTY, DOWNLOADED, DOWNLOADING.
|
||||
switch (download_state)
|
||||
{
|
||||
/// EMPTY is used when file segment is not in cache and
|
||||
/// someone will _potentially_ want to download it (after calling getOrSetDownloader()).
|
||||
case (State::EMPTY):
|
||||
{
|
||||
break;
|
||||
}
|
||||
/// DOWNLOADED is used either on initial cache metadata load into memory on server startup
|
||||
/// or on reduceSizeToDownloaded() -- when file segment object is updated.
|
||||
case (State::DOWNLOADED):
|
||||
{
|
||||
reserved_size = downloaded_size = size_;
|
||||
break;
|
||||
}
|
||||
/// DOWNLOADING is used only for write-through caching (e.g. getOrSetDownloader() is not
|
||||
/// needed, downloader is set on file segment creation).
|
||||
case (State::DOWNLOADING):
|
||||
{
|
||||
downloader_id = getCallerId();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Can create cell with either EMPTY, DOWNLOADED, DOWNLOADING state");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FileSegment::State FileSegment::state() const
|
||||
@ -49,6 +73,12 @@ size_t FileSegment::getDownloadOffset() const
|
||||
return range().left + getDownloadedSize(segment_lock);
|
||||
}
|
||||
|
||||
size_t FileSegment::getDownloadedSize() const
|
||||
{
|
||||
std::lock_guard segment_lock(mutex);
|
||||
return getDownloadedSize(segment_lock);
|
||||
}
|
||||
|
||||
size_t FileSegment::getDownloadedSize(std::lock_guard<std::mutex> & /* segment_lock */) const
|
||||
{
|
||||
if (download_state == State::DOWNLOADED)
|
||||
@ -60,24 +90,15 @@ size_t FileSegment::getDownloadedSize(std::lock_guard<std::mutex> & /* segment_l
|
||||
|
||||
String FileSegment::getCallerId()
|
||||
{
|
||||
return getCallerIdImpl(false);
|
||||
return getCallerIdImpl();
|
||||
}
|
||||
|
||||
String FileSegment::getCallerIdImpl(bool allow_non_strict_checking)
|
||||
String FileSegment::getCallerIdImpl()
|
||||
{
|
||||
if (IFileCache::shouldBypassCache())
|
||||
{
|
||||
/// getCallerId() can be called from completeImpl(), which can be called from complete().
|
||||
/// complete() is called from destructor of CachedReadBufferFromRemoteFS when there is no query id anymore.
|
||||
/// Allow non strict checking in this case. This works correctly as if getCallerIdImpl() is called from destructor,
|
||||
/// then we know that caller is not a downloader, because downloader is reset each nextImpl() call either
|
||||
/// manually or via SCOPE_EXIT.
|
||||
|
||||
if (allow_non_strict_checking)
|
||||
return "None";
|
||||
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Cannot use cache without query id");
|
||||
}
|
||||
if (!CurrentThread::isInitialized()
|
||||
|| !CurrentThread::get().getQueryContext()
|
||||
|| CurrentThread::getQueryId().size == 0)
|
||||
return "None:" + toString(getThreadId());
|
||||
|
||||
return CurrentThread::getQueryId().toString() + ":" + toString(getThreadId());
|
||||
}
|
||||
@ -136,7 +157,6 @@ String FileSegment::getDownloader() const
|
||||
bool FileSegment::isDownloader() const
|
||||
{
|
||||
std::lock_guard segment_lock(mutex);
|
||||
LOG_TEST(log, "Checking for current downloader. Caller: {}, downloader: {}, current state: {}", getCallerId(), downloader_id, stateToString(download_state));
|
||||
return getCallerId() == downloader_id;
|
||||
}
|
||||
|
||||
@ -221,15 +241,9 @@ void FileSegment::write(const char * from, size_t size, size_t offset_)
|
||||
{
|
||||
std::lock_guard segment_lock(mutex);
|
||||
|
||||
auto info = getInfoForLogImpl(segment_lock);
|
||||
e.addMessage("while writing into cache, info: " + info);
|
||||
wrapWithCacheInfo(e, "while writing into cache", segment_lock);
|
||||
|
||||
LOG_ERROR(log, "Failed to write to cache. File segment info: {}", info);
|
||||
|
||||
download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION;
|
||||
|
||||
cache_writer->finalize();
|
||||
cache_writer.reset();
|
||||
setDownloadFailed(segment_lock);
|
||||
|
||||
cv.notify_all();
|
||||
|
||||
@ -239,6 +253,77 @@ void FileSegment::write(const char * from, size_t size, size_t offset_)
|
||||
assert(getDownloadOffset() == offset_ + size);
|
||||
}
|
||||
|
||||
void FileSegment::writeInMemory(const char * from, size_t size)
|
||||
{
|
||||
if (!size)
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Attempt to write zero size cache file");
|
||||
|
||||
if (availableSize() < size)
|
||||
throw Exception(
|
||||
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||
"Not enough space is reserved. Available: {}, expected: {}", availableSize(), size);
|
||||
|
||||
std::lock_guard segment_lock(mutex);
|
||||
|
||||
if (cache_writer)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache writer already initialized");
|
||||
|
||||
auto download_path = cache->getPathInLocalCache(key(), offset());
|
||||
cache_writer = std::make_unique<WriteBufferFromFile>(download_path, size + 1);
|
||||
|
||||
try
|
||||
{
|
||||
cache_writer->write(from, size);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
wrapWithCacheInfo(e, "while writing into cache", segment_lock);
|
||||
|
||||
setDownloadFailed(segment_lock);
|
||||
|
||||
cv.notify_all();
|
||||
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
size_t FileSegment::finalizeWrite()
|
||||
{
|
||||
std::lock_guard segment_lock(mutex);
|
||||
|
||||
if (!cache_writer)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache writer not initialized");
|
||||
|
||||
size_t size = cache_writer->offset();
|
||||
|
||||
if (size == 0)
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Writing size is not allowed");
|
||||
|
||||
try
|
||||
{
|
||||
cache_writer->next();
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
wrapWithCacheInfo(e, "while writing into cache", segment_lock);
|
||||
|
||||
setDownloadFailed(segment_lock);
|
||||
|
||||
cv.notify_all();
|
||||
|
||||
throw;
|
||||
}
|
||||
|
||||
downloaded_size += size;
|
||||
|
||||
if (downloaded_size != range().size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected downloaded size to equal file segment size ({} == {})", downloaded_size, range().size());
|
||||
|
||||
setDownloaded(segment_lock);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
FileSegment::State FileSegment::wait()
|
||||
{
|
||||
std::unique_lock segment_lock(mutex);
|
||||
@ -303,6 +388,20 @@ void FileSegment::setDownloaded(std::lock_guard<std::mutex> & /* segment_lock */
|
||||
{
|
||||
download_state = State::DOWNLOADED;
|
||||
is_downloaded = true;
|
||||
downloader_id.clear();
|
||||
|
||||
if (cache_writer)
|
||||
{
|
||||
cache_writer->finalize();
|
||||
cache_writer.reset();
|
||||
remote_file_reader.reset();
|
||||
}
|
||||
}
|
||||
|
||||
void FileSegment::setDownloadFailed(std::lock_guard<std::mutex> & /* segment_lock */)
|
||||
{
|
||||
download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION;
|
||||
downloader_id.clear();
|
||||
|
||||
if (cache_writer)
|
||||
{
|
||||
@ -360,7 +459,7 @@ void FileSegment::complete(State state)
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (!downloader_id.empty() && downloader_id == getCallerIdImpl(true))
|
||||
if (!downloader_id.empty() && downloader_id == getCallerIdImpl())
|
||||
downloader_id.clear();
|
||||
|
||||
cv.notify_all();
|
||||
@ -385,7 +484,7 @@ void FileSegment::complete(std::lock_guard<std::mutex> & cache_lock)
|
||||
/// Segment state can be changed from DOWNLOADING or EMPTY only if the caller is the
|
||||
/// downloader or the only owner of the segment.
|
||||
|
||||
bool can_update_segment_state = downloader_id == getCallerIdImpl(true)
|
||||
bool can_update_segment_state = downloader_id == getCallerIdImpl()
|
||||
|| cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock);
|
||||
|
||||
if (can_update_segment_state)
|
||||
@ -394,11 +493,11 @@ void FileSegment::complete(std::lock_guard<std::mutex> & cache_lock)
|
||||
|
||||
try
|
||||
{
|
||||
completeImpl(cache_lock, segment_lock, /* allow_non_strict_checking */true);
|
||||
completeImpl(cache_lock, segment_lock);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (!downloader_id.empty() && downloader_id == getCallerIdImpl(true))
|
||||
if (!downloader_id.empty() && downloader_id == getCallerIdImpl())
|
||||
downloader_id.clear();
|
||||
|
||||
cv.notify_all();
|
||||
@ -408,7 +507,7 @@ void FileSegment::complete(std::lock_guard<std::mutex> & cache_lock)
|
||||
cv.notify_all();
|
||||
}
|
||||
|
||||
void FileSegment::completeImpl(std::lock_guard<std::mutex> & cache_lock, std::lock_guard<std::mutex> & segment_lock, bool allow_non_strict_checking)
|
||||
void FileSegment::completeImpl(std::lock_guard<std::mutex> & cache_lock, std::lock_guard<std::mutex> & segment_lock)
|
||||
{
|
||||
bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock);
|
||||
|
||||
@ -444,7 +543,7 @@ void FileSegment::completeImpl(std::lock_guard<std::mutex> & cache_lock, std::lo
|
||||
}
|
||||
}
|
||||
|
||||
if (!downloader_id.empty() && (downloader_id == getCallerIdImpl(allow_non_strict_checking) || is_last_holder))
|
||||
if (!downloader_id.empty() && (downloader_id == getCallerIdImpl() || is_last_holder))
|
||||
{
|
||||
LOG_TEST(log, "Clearing downloader id: {}, current state: {}", downloader_id, stateToString(download_state));
|
||||
downloader_id.clear();
|
||||
@ -471,6 +570,11 @@ String FileSegment::getInfoForLogImpl(std::lock_guard<std::mutex> & segment_lock
|
||||
return info.str();
|
||||
}
|
||||
|
||||
void FileSegment::wrapWithCacheInfo(Exception & e, const String & message, std::lock_guard<std::mutex> & segment_lock) const
|
||||
{
|
||||
e.addMessage(fmt::format("{}, current cache state: {}", message, getInfoForLogImpl(segment_lock)));
|
||||
}
|
||||
|
||||
String FileSegment::stateToString(FileSegment::State state)
|
||||
{
|
||||
switch (state)
|
||||
@ -504,6 +608,23 @@ void FileSegment::assertCorrectnessImpl(std::lock_guard<std::mutex> & /* segment
|
||||
assert(download_state != FileSegment::State::DOWNLOADED || std::filesystem::file_size(cache->getPathInLocalCache(key(), offset())) > 0);
|
||||
}
|
||||
|
||||
FileSegmentPtr FileSegment::getSnapshot(const FileSegmentPtr & file_segment, std::lock_guard<std::mutex> & /* cache_lock */)
|
||||
{
|
||||
auto snapshot = std::make_shared<FileSegment>(
|
||||
file_segment->offset(),
|
||||
file_segment->range().size(),
|
||||
file_segment->key(),
|
||||
nullptr,
|
||||
State::EMPTY);
|
||||
|
||||
snapshot->hits_count = file_segment->getHitsCount();
|
||||
snapshot->ref_count = file_segment.use_count();
|
||||
snapshot->downloaded_size = file_segment->getDownloadedSize();
|
||||
snapshot->download_state = file_segment->state();
|
||||
|
||||
return snapshot;
|
||||
}
|
||||
|
||||
FileSegmentsHolder::~FileSegmentsHolder()
|
||||
{
|
||||
/// In CacheableReadBufferFromRemoteFS file segment's downloader removes file segments from
|
||||
|
@ -97,6 +97,15 @@ public:
|
||||
|
||||
void write(const char * from, size_t size, size_t offset_);
|
||||
|
||||
/**
|
||||
* writeInMemory and finalizeWrite are used together to write a single file with delay.
|
||||
* Both can be called only once, one after another. Used for writing cache via threadpool
|
||||
* on wrote operations. TODO: this solution is temporary, until adding a separate cache layer.
|
||||
*/
|
||||
void writeInMemory(const char * from, size_t size);
|
||||
|
||||
size_t finalizeWrite();
|
||||
|
||||
RemoteFileReaderPtr getRemoteFileReader();
|
||||
|
||||
void setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_);
|
||||
@ -117,14 +126,24 @@ public:
|
||||
|
||||
size_t getDownloadOffset() const;
|
||||
|
||||
size_t getDownloadedSize() const;
|
||||
|
||||
void completeBatchAndResetDownloader();
|
||||
|
||||
void complete(State state);
|
||||
|
||||
String getInfoForLog() const;
|
||||
|
||||
size_t getHitsCount() const { return hits_count; }
|
||||
|
||||
size_t getRefCount() const { return ref_count; }
|
||||
|
||||
void incrementHitsCount() { ++hits_count; }
|
||||
|
||||
void assertCorrectness() const;
|
||||
|
||||
static FileSegmentPtr getSnapshot(const FileSegmentPtr & file_segment, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
private:
|
||||
size_t availableSize() const { return reserved_size - downloaded_size; }
|
||||
|
||||
@ -133,6 +152,9 @@ private:
|
||||
void assertCorrectnessImpl(std::lock_guard<std::mutex> & segment_lock) const;
|
||||
|
||||
void setDownloaded(std::lock_guard<std::mutex> & segment_lock);
|
||||
void setDownloadFailed(std::lock_guard<std::mutex> & segment_lock);
|
||||
|
||||
void wrapWithCacheInfo(Exception & e, const String & message, std::lock_guard<std::mutex> & segment_lock) const;
|
||||
|
||||
bool lastFileSegmentHolder() const;
|
||||
|
||||
@ -144,9 +166,9 @@ private:
|
||||
|
||||
void completeImpl(
|
||||
std::lock_guard<std::mutex> & cache_lock,
|
||||
std::lock_guard<std::mutex> & segment_lock, bool allow_non_strict_checking = false);
|
||||
std::lock_guard<std::mutex> & segment_lock);
|
||||
|
||||
static String getCallerIdImpl(bool allow_non_strict_checking = false);
|
||||
static String getCallerIdImpl();
|
||||
|
||||
void resetDownloaderImpl(std::lock_guard<std::mutex> & segment_lock);
|
||||
|
||||
@ -180,6 +202,8 @@ private:
|
||||
bool detached = false;
|
||||
|
||||
std::atomic<bool> is_downloaded{false};
|
||||
std::atomic<size_t> hits_count = 0; /// cache hits.
|
||||
std::atomic<size_t> ref_count = 0; /// Used for getting snapshot state
|
||||
};
|
||||
|
||||
struct FileSegmentsHolder : private boost::noncopyable
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <Interpreters/TraceLog.h>
|
||||
#include <Interpreters/ProcessorsProfileLog.h>
|
||||
#include <Interpreters/ZooKeeperLog.h>
|
||||
#include <Interpreters/TransactionsInfoLog.h>
|
||||
|
||||
#include <Common/MemoryTrackerBlockerInThread.h>
|
||||
#include <Common/SystemLogBase.h>
|
||||
|
@ -23,6 +23,7 @@
|
||||
M(QueryViewsLogElement) \
|
||||
M(SessionLogElement) \
|
||||
M(TraceLogElement) \
|
||||
M(TransactionsInfoLogElement) \
|
||||
M(ZooKeeperLogElement) \
|
||||
M(ProcessorProfileLogElement) \
|
||||
M(TextLogElement)
|
||||
|
@ -216,6 +216,11 @@ public:
|
||||
return query_context.lock();
|
||||
}
|
||||
|
||||
auto getGlobalContext() const
|
||||
{
|
||||
return global_context.lock();
|
||||
}
|
||||
|
||||
void disableProfiling()
|
||||
{
|
||||
assert(!query_profiler_real && !query_profiler_cpu);
|
||||
|
43
src/Common/TransactionID.cpp
Normal file
43
src/Common/TransactionID.cpp
Normal file
@ -0,0 +1,43 @@
|
||||
#include <Common/TransactionID.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
TIDHash TransactionID::getHash() const
|
||||
{
|
||||
SipHash hash;
|
||||
hash.update(start_csn);
|
||||
hash.update(local_tid);
|
||||
hash.update(host_id);
|
||||
return hash.get64();
|
||||
}
|
||||
|
||||
|
||||
void TransactionID::write(const TransactionID & tid, WriteBuffer & buf)
|
||||
{
|
||||
writeChar('(', buf);
|
||||
writeText(tid.start_csn, buf);
|
||||
writeCString(", ", buf);
|
||||
writeText(tid.local_tid, buf);
|
||||
writeCString(", ", buf);
|
||||
writeText(tid.host_id, buf);
|
||||
writeChar(')', buf);
|
||||
}
|
||||
|
||||
TransactionID TransactionID::read(ReadBuffer & buf)
|
||||
{
|
||||
TransactionID tid = Tx::EmptyTID;
|
||||
assertChar('(', buf);
|
||||
readText(tid.start_csn, buf);
|
||||
assertString(", ", buf);
|
||||
readText(tid.local_tid, buf);
|
||||
assertString(", ", buf);
|
||||
readText(tid.host_id, buf);
|
||||
assertChar(')', buf);
|
||||
return tid;
|
||||
}
|
||||
|
||||
}
|
115
src/Common/TransactionID.h
Normal file
115
src/Common/TransactionID.h
Normal file
@ -0,0 +1,115 @@
|
||||
#pragma once
|
||||
#include <Core/Types.h>
|
||||
#include <Core/UUID.h>
|
||||
#include <fmt/format.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class IDataType;
|
||||
using DataTypePtr = std::shared_ptr<const IDataType>;
|
||||
class MergeTreeTransaction;
|
||||
|
||||
/// This macro is useful for places where a pointer to current transaction should be passed,
|
||||
/// but transactions are not supported yet (e.g. when calling MergeTreeData's methods from StorageReplicatedMergeTree)
|
||||
/// or transaction object is not needed and not passed intentionally.
|
||||
#ifndef NO_TRANSACTION_PTR
|
||||
#define NO_TRANSACTION_PTR std::shared_ptr<MergeTreeTransaction>(nullptr)
|
||||
#define NO_TRANSACTION_RAW static_cast<MergeTreeTransaction *>(nullptr)
|
||||
#endif
|
||||
|
||||
/// Commit Sequence Number
|
||||
using CSN = UInt64;
|
||||
/// Local part of TransactionID
|
||||
using LocalTID = UInt64;
|
||||
/// Hash of TransactionID that fits into 64-bit atomic
|
||||
using TIDHash = UInt64;
|
||||
|
||||
namespace Tx
|
||||
{
|
||||
/// For transactions that are probably not committed (yet)
|
||||
const CSN UnknownCSN = 0;
|
||||
/// For changes were made without creating a transaction
|
||||
const CSN PrehistoricCSN = 1;
|
||||
/// Special reserved values
|
||||
const CSN CommittingCSN = 2;
|
||||
const CSN EverythingVisibleCSN = 3;
|
||||
const CSN MaxReservedCSN = 32;
|
||||
|
||||
/// So far, that changes will never become visible
|
||||
const CSN RolledBackCSN = std::numeric_limits<CSN>::max();
|
||||
|
||||
const LocalTID PrehistoricLocalTID = 1;
|
||||
const LocalTID DummyLocalTID = 2;
|
||||
const LocalTID MaxReservedLocalTID = 32;
|
||||
}
|
||||
|
||||
struct TransactionID
|
||||
{
|
||||
/// Global sequential number, the newest commit timestamp the we saw when this transaction began
|
||||
CSN start_csn = 0;
|
||||
/// Local sequential that is unique for each transaction started by this host within specific start_csn
|
||||
LocalTID local_tid = 0;
|
||||
/// UUID of host that has started this transaction
|
||||
UUID host_id = UUIDHelpers::Nil;
|
||||
|
||||
/// NOTE Maybe we could just generate UUIDv4 for each transaction, but it would be harder to debug.
|
||||
/// Partial order is defined for this TransactionID structure:
|
||||
/// (tid1.start_csn <= tid2.start_csn) <==> (tid1 <= tid2)
|
||||
/// (tid1.start_csn == tid2.start_csn && tid1.host_id == tid2.host_id && tid1.local_tid < tid2.local_tid) ==> (tid1 < tid2)
|
||||
/// If two transaction have the same start_csn, but were started by different hosts, then order is undefined.
|
||||
|
||||
bool operator == (const TransactionID & rhs) const
|
||||
{
|
||||
return start_csn == rhs.start_csn && local_tid == rhs.local_tid && host_id == rhs.host_id;
|
||||
}
|
||||
|
||||
bool operator != (const TransactionID & rhs) const
|
||||
{
|
||||
return !(*this == rhs);
|
||||
}
|
||||
|
||||
TIDHash getHash() const;
|
||||
|
||||
bool isEmpty() const
|
||||
{
|
||||
assert((local_tid == 0) == (start_csn == 0 && host_id == UUIDHelpers::Nil));
|
||||
return local_tid == 0;
|
||||
}
|
||||
|
||||
bool isPrehistoric() const
|
||||
{
|
||||
assert((local_tid == Tx::PrehistoricLocalTID) == (start_csn == Tx::PrehistoricCSN));
|
||||
return local_tid == Tx::PrehistoricLocalTID;
|
||||
}
|
||||
|
||||
|
||||
static void write(const TransactionID & tid, WriteBuffer & buf);
|
||||
static TransactionID read(ReadBuffer & buf);
|
||||
};
|
||||
|
||||
namespace Tx
|
||||
{
|
||||
const TransactionID EmptyTID = {0, 0, UUIDHelpers::Nil};
|
||||
const TransactionID PrehistoricTID = {PrehistoricCSN, PrehistoricLocalTID, UUIDHelpers::Nil};
|
||||
const TransactionID DummyTID = {PrehistoricCSN, DummyLocalTID, UUIDHelpers::Nil};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template<>
|
||||
struct fmt::formatter<DB::TransactionID>
|
||||
{
|
||||
template<typename ParseContext>
|
||||
constexpr auto parse(ParseContext & context)
|
||||
{
|
||||
return context.begin();
|
||||
}
|
||||
|
||||
template<typename FormatContext>
|
||||
auto format(const DB::TransactionID & tid, FormatContext & context)
|
||||
{
|
||||
return fmt::format_to(context.out(), "({}, {}, {})", tid.start_csn, tid.local_tid, tid.host_id);
|
||||
}
|
||||
};
|
@ -1270,4 +1270,14 @@ String extractZooKeeperPath(const String & path, bool check_starts_with_slash, P
|
||||
return normalizeZooKeeperPath(path, check_starts_with_slash, log);
|
||||
}
|
||||
|
||||
String getSequentialNodeName(const String & prefix, UInt64 number)
|
||||
{
|
||||
/// NOTE Sequential counter in ZooKeeper is Int32.
|
||||
assert(number < std::numeric_limits<Int32>::max());
|
||||
constexpr size_t seq_node_digits = 10;
|
||||
String num_str = std::to_string(number);
|
||||
String name = prefix + String(seq_node_digits - num_str.size(), '0') + num_str;
|
||||
return name;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -417,4 +417,6 @@ String extractZooKeeperName(const String & path);
|
||||
|
||||
String extractZooKeeperPath(const String & path, bool check_starts_with_slash, Poco::Logger * log = nullptr);
|
||||
|
||||
String getSequentialNodeName(const String & prefix, UInt64 number);
|
||||
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <Common/FileCache.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
#include <Common/FileCacheSettings.h>
|
||||
#include <Common/tests/gtest_global_context.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/hex.h>
|
||||
@ -102,7 +103,10 @@ TEST(LRUFileCache, get)
|
||||
query_context->setCurrentQueryId("query_id");
|
||||
DB::CurrentThread::QueryScope query_scope_holder(query_context);
|
||||
|
||||
auto cache = DB::LRUFileCache(cache_base_path, 30, 5);
|
||||
DB::FileCacheSettings settings;
|
||||
settings.max_size = 30;
|
||||
settings.max_elements = 5;
|
||||
auto cache = DB::LRUFileCache(cache_base_path, settings);
|
||||
cache.initialize();
|
||||
auto key = cache.hash("key1");
|
||||
|
||||
@ -472,7 +476,7 @@ TEST(LRUFileCache, get)
|
||||
{
|
||||
/// Test LRUCache::restore().
|
||||
|
||||
auto cache2 = DB::LRUFileCache(cache_base_path, 30, 5);
|
||||
auto cache2 = DB::LRUFileCache(cache_base_path, settings);
|
||||
cache2.initialize();
|
||||
|
||||
ASSERT_EQ(cache2.getStat().downloaded_size, 5);
|
||||
@ -491,7 +495,9 @@ TEST(LRUFileCache, get)
|
||||
{
|
||||
/// Test max file segment size
|
||||
|
||||
auto cache2 = DB::LRUFileCache(caches_dir / "cache2", 30, 5, /* max_file_segment_size */10);
|
||||
auto settings2 = settings;
|
||||
settings2.max_file_segment_size = 10;
|
||||
auto cache2 = DB::LRUFileCache(caches_dir / "cache2", settings2);
|
||||
cache2.initialize();
|
||||
|
||||
auto holder1 = cache2.getOrSet(key, 0, 25); /// Get [0, 24]
|
||||
|
@ -465,6 +465,7 @@ class IColumn;
|
||||
M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \
|
||||
M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \
|
||||
M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \
|
||||
M(Bool, allow_settings_after_format_in_insert, false, "Allow SETTINGS after FORMAT, but note, that this is not always safe (note: this is a compatibility setting).", 0) \
|
||||
M(Seconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \
|
||||
M(Seconds, periodic_live_view_refresh, DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC, "Interval after which periodically refreshed live view is forced to refresh.", 0) \
|
||||
M(Bool, transform_null_in, false, "If enabled, NULL values will be matched with 'IN' operator as if they are considered equal.", 0) \
|
||||
@ -560,8 +561,10 @@ class IColumn;
|
||||
\
|
||||
M(UInt64, remote_fs_read_max_backoff_ms, 10000, "Max wait time when trying to read data for remote disk", 0) \
|
||||
M(UInt64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \
|
||||
M(Bool, remote_fs_enable_cache, true, "Use cache for remote filesystem. This setting does not turn on/off cache for disks (must me done via disk config), but allows to bypass cache for some queries if intended", 0) \
|
||||
M(UInt64, remote_fs_cache_max_wait_sec, 5, "Allow to wait at most this number of seconds for download of current remote_fs_buffer_size bytes, and skip cache if exceeded", 0) \
|
||||
M(Bool, enable_filesystem_cache, true, "Use cache for remote filesystem. This setting does not turn on/off cache for disks (must me done via disk config), but allows to bypass cache for some queries if intended", 0) \
|
||||
M(UInt64, filesystem_cache_max_wait_sec, 5, "Allow to wait at most this number of seconds for download of current remote_fs_buffer_size bytes, and skip cache if exceeded", 0) \
|
||||
M(Bool, enable_filesystem_cache_on_write_operations, false, "Write into cache on write operations. To actually work this setting requires be added to disk config too", 0) \
|
||||
M(Bool, read_from_filesystem_cache_if_exists_otherwise_bypass_cache, false, "", 0) \
|
||||
\
|
||||
M(UInt64, http_max_tries, 10, "Max attempts to read via http.", 0) \
|
||||
M(UInt64, http_retry_initial_backoff_ms, 100, "Min milliseconds for backoff, when retrying read via http", 0) \
|
||||
@ -578,6 +581,7 @@ class IColumn;
|
||||
M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
|
||||
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
|
||||
M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
|
||||
M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
|
||||
// End of COMMON_SETTINGS
|
||||
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
|
||||
|
||||
@ -638,6 +642,12 @@ class IColumn;
|
||||
M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \
|
||||
M(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, "The way how to output UUID in MsgPack format.", 0) \
|
||||
M(UInt64, input_format_max_rows_to_read_for_schema_inference, 100, "The maximum rows of data to read for automatic schema inference", 0) \
|
||||
M(Bool, input_format_csv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in CSV format", 0) \
|
||||
M(Bool, input_format_tsv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in TSV format", 0) \
|
||||
M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Allow to skip columns with unsupported types while schema inference for format Parquet", 0) \
|
||||
M(Bool, input_format_orc_skip_columns_with_unsupported_types_in_schema_inference, false, "Allow to skip columns with unsupported types while schema inference for format ORC", 0) \
|
||||
M(Bool, input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference, false, "Allow to skip columns with unsupported types while schema inference for format Arrow", 0) \
|
||||
M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \
|
||||
M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
|
||||
\
|
||||
M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic', 'best_effort' and 'best_effort_us'.", 0) \
|
||||
|
@ -165,7 +165,7 @@ DECLARE_SETTING_ENUM(DistributedDDLOutputMode)
|
||||
|
||||
enum class HandleKafkaErrorMode
|
||||
{
|
||||
DEFAULT = 0, // Ignore errors whit threshold.
|
||||
DEFAULT = 0, // Ignore errors with threshold.
|
||||
STREAM, // Put errors to stream in the virtual column named ``_error.
|
||||
/*FIXED_SYSTEM_TABLE, Put errors to in a fixed system table likey system.kafka_errors. This is not implemented now. */
|
||||
/*CUSTOM_SYSTEM_TABLE, Put errors to in a custom system table. This is not implemented now. */
|
||||
|
@ -45,22 +45,7 @@ DataTypeMap::DataTypeMap(const DataTypePtr & key_type_, const DataTypePtr & valu
|
||||
|
||||
void DataTypeMap::assertKeyType() const
|
||||
{
|
||||
bool type_error = false;
|
||||
if (key_type->getTypeId() == TypeIndex::LowCardinality)
|
||||
{
|
||||
const auto & low_cardinality_data_type = assert_cast<const DataTypeLowCardinality &>(*key_type);
|
||||
if (!isStringOrFixedString(*(low_cardinality_data_type.getDictionaryType())))
|
||||
type_error = true;
|
||||
}
|
||||
else if (!key_type->isValueRepresentedByInteger()
|
||||
&& !isStringOrFixedString(*key_type)
|
||||
&& !WhichDataType(key_type).isNothing()
|
||||
&& !WhichDataType(key_type).isUUID())
|
||||
{
|
||||
type_error = true;
|
||||
}
|
||||
|
||||
if (type_error)
|
||||
if (!checkKeyType(key_type))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Type of Map key must be a type, that can be represented by integer or String or FixedString (possibly LowCardinality) or UUID,"
|
||||
" but {} given", key_type->getName());
|
||||
@ -102,6 +87,25 @@ bool DataTypeMap::equals(const IDataType & rhs) const
|
||||
return nested->equals(*rhs_map.nested);
|
||||
}
|
||||
|
||||
bool DataTypeMap::checkKeyType(DataTypePtr key_type)
|
||||
{
|
||||
if (key_type->getTypeId() == TypeIndex::LowCardinality)
|
||||
{
|
||||
const auto & low_cardinality_data_type = assert_cast<const DataTypeLowCardinality &>(*key_type);
|
||||
if (!isStringOrFixedString(*(low_cardinality_data_type.getDictionaryType())))
|
||||
return false;
|
||||
}
|
||||
else if (!key_type->isValueRepresentedByInteger()
|
||||
&& !isStringOrFixedString(*key_type)
|
||||
&& !WhichDataType(key_type).isNothing()
|
||||
&& !WhichDataType(key_type).isUUID())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static DataTypePtr create(const ASTPtr & arguments)
|
||||
{
|
||||
if (!arguments || arguments->children.size() != 2)
|
||||
|
@ -48,6 +48,8 @@ public:
|
||||
|
||||
SerializationPtr doGetDefaultSerialization() const override;
|
||||
|
||||
static bool checkKeyType(DataTypePtr key_type);
|
||||
|
||||
private:
|
||||
void assertKeyType() const;
|
||||
};
|
||||
|
@ -461,6 +461,10 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_
|
||||
|
||||
BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context)
|
||||
{
|
||||
|
||||
if (query_context->getCurrentTransaction() && query_context->getSettingsRef().throw_on_unsupported_query_inside_transaction)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Distributed DDL queries inside transactions are not supported");
|
||||
|
||||
if (is_readonly)
|
||||
throw Exception(ErrorCodes::NO_ZOOKEEPER, "Database is in readonly mode, because it cannot connect to ZooKeeper");
|
||||
|
||||
|
@ -319,7 +319,6 @@ bool DatabaseReplicatedDDLWorker::canRemoveQueueEntry(const String & entry_name,
|
||||
void DatabaseReplicatedDDLWorker::initializeLogPointer(const String & processed_entry_name)
|
||||
{
|
||||
updateMaxDDLEntryID(processed_entry_name);
|
||||
assert(max_id.load() == parse<UInt32>(getAndSetZooKeeper()->get(fs::path(database->replica_path) / "log_ptr")));
|
||||
}
|
||||
|
||||
UInt32 DatabaseReplicatedDDLWorker::getLogPointer() const
|
||||
|
@ -71,8 +71,8 @@ std::unique_ptr<ReadBufferFromFileBase> DiskAzureBlobStorage::readFile(
|
||||
LOG_TEST(log, "Read from file by path: {}", backQuote(metadata_disk->getPath() + path));
|
||||
|
||||
auto reader_impl = std::make_unique<ReadBufferFromAzureBlobStorageGather>(
|
||||
path, blob_container_client, metadata, settings->max_single_read_retries,
|
||||
settings->max_single_download_retries, read_settings);
|
||||
blob_container_client, metadata.remote_fs_root_path, metadata.remote_fs_objects,
|
||||
settings->max_single_read_retries, settings->max_single_download_retries, read_settings);
|
||||
|
||||
if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool)
|
||||
{
|
||||
@ -90,7 +90,8 @@ std::unique_ptr<ReadBufferFromFileBase> DiskAzureBlobStorage::readFile(
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskAzureBlobStorage::writeFile(
|
||||
const String & path,
|
||||
size_t buf_size,
|
||||
WriteMode mode)
|
||||
WriteMode mode,
|
||||
const WriteSettings &)
|
||||
{
|
||||
auto blob_path = path + "_" + getRandomASCIIString(8); /// NOTE: path contains the tmp_* prefix in the blob name
|
||||
|
||||
@ -108,7 +109,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskAzureBlobStorage::writeFile(
|
||||
readOrCreateUpdateAndStoreMetadata(path, mode, false, [blob_path, count] (Metadata & metadata) { metadata.addObject(blob_path, count); return true; });
|
||||
};
|
||||
|
||||
return std::make_unique<WriteIndirectBufferFromRemoteFS>(std::move(buffer), std::move(create_metadata_callback), path);
|
||||
return std::make_unique<WriteIndirectBufferFromRemoteFS>(std::move(buffer), std::move(create_metadata_callback), blob_path);
|
||||
}
|
||||
|
||||
|
||||
|
@ -56,7 +56,8 @@ public:
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(
|
||||
const String & path,
|
||||
size_t buf_size,
|
||||
WriteMode mode) override;
|
||||
WriteMode mode,
|
||||
const WriteSettings & settings) override;
|
||||
|
||||
DiskType getType() const override;
|
||||
|
||||
|
@ -150,7 +150,7 @@ DiskCacheWrapper::readFile(
|
||||
/// Note: enabling `threadpool` read requires to call setReadUntilEnd().
|
||||
current_read_settings.remote_fs_method = RemoteFSReadMethod::read;
|
||||
/// Disable data cache.
|
||||
current_read_settings.remote_fs_enable_cache = false;
|
||||
current_read_settings.enable_filesystem_cache = false;
|
||||
|
||||
if (metadata->status == DOWNLOADING)
|
||||
{
|
||||
@ -167,7 +167,11 @@ DiskCacheWrapper::readFile(
|
||||
auto tmp_path = path + ".tmp";
|
||||
{
|
||||
auto src_buffer = DiskDecorator::readFile(path, current_read_settings, read_hint, file_size);
|
||||
auto dst_buffer = cache_disk->writeFile(tmp_path, settings.local_fs_buffer_size, WriteMode::Rewrite);
|
||||
|
||||
WriteSettings write_settings;
|
||||
write_settings.enable_filesystem_cache_on_write_operations = false;
|
||||
|
||||
auto dst_buffer = cache_disk->writeFile(tmp_path, settings.local_fs_buffer_size, WriteMode::Rewrite, write_settings);
|
||||
copyData(*src_buffer, *dst_buffer);
|
||||
}
|
||||
cache_disk->moveFile(tmp_path, path);
|
||||
@ -196,10 +200,15 @@ DiskCacheWrapper::readFile(
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase>
|
||||
DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode)
|
||||
DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings)
|
||||
{
|
||||
if (!cache_file_predicate(path))
|
||||
return DiskDecorator::writeFile(path, buf_size, mode);
|
||||
return DiskDecorator::writeFile(path, buf_size, mode, settings);
|
||||
|
||||
WriteSettings current_settings = settings;
|
||||
/// There are two different cache implementations. Disable second one if the first is enabled.
|
||||
/// The first will soon be removed, this disabling is temporary.
|
||||
current_settings.enable_filesystem_cache_on_write_operations = false;
|
||||
|
||||
LOG_TEST(log, "Write file {} to cache", backQuote(path));
|
||||
|
||||
@ -208,15 +217,15 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode
|
||||
cache_disk->createDirectories(dir_path);
|
||||
|
||||
return std::make_unique<WritingToCacheWriteBuffer>(
|
||||
cache_disk->writeFile(path, buf_size, mode),
|
||||
cache_disk->writeFile(path, buf_size, mode, current_settings),
|
||||
[this, path]()
|
||||
{
|
||||
/// Copy file from cache to actual disk when cached buffer is finalized.
|
||||
return cache_disk->readFile(path, ReadSettings(), /* read_hint= */ {}, /* file_size= */ {});
|
||||
},
|
||||
[this, path, buf_size, mode]()
|
||||
[this, path, buf_size, mode, current_settings]()
|
||||
{
|
||||
return DiskDecorator::writeFile(path, buf_size, mode);
|
||||
return DiskDecorator::writeFile(path, buf_size, mode, current_settings);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -40,7 +40,7 @@ public:
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size) const override;
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode) override;
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &) override;
|
||||
|
||||
void removeFile(const String & path) override;
|
||||
void removeFileIfExists(const String & path) override;
|
||||
|
@ -121,9 +121,9 @@ DiskDecorator::readFile(
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase>
|
||||
DiskDecorator::writeFile(const String & path, size_t buf_size, WriteMode mode)
|
||||
DiskDecorator::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings)
|
||||
{
|
||||
return delegate->writeFile(path, buf_size, mode);
|
||||
return delegate->writeFile(path, buf_size, mode, settings);
|
||||
}
|
||||
|
||||
void DiskDecorator::removeFile(const String & path)
|
||||
|
@ -44,7 +44,8 @@ public:
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(
|
||||
const String & path,
|
||||
size_t buf_size,
|
||||
WriteMode mode) override;
|
||||
WriteMode mode,
|
||||
const WriteSettings & settings) override;
|
||||
|
||||
void removeFile(const String & path) override;
|
||||
void removeFileIfExists(const String & path) override;
|
||||
@ -71,6 +72,9 @@ public:
|
||||
void shutdown() override;
|
||||
void startup() override;
|
||||
void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) override;
|
||||
String getCacheBasePath() const override { return delegate->getCacheBasePath(); }
|
||||
std::vector<String> getRemotePaths(const String & path) const override { return delegate->getRemotePaths(path); }
|
||||
void getRemotePathsRecursive(const String & path, std::vector<LocalPathWithRemotePaths> & paths_map) override { return delegate->getRemotePathsRecursive(path, paths_map); }
|
||||
|
||||
DiskPtr getMetadataDiskIfExistsOrSelf() override { return delegate->getMetadataDiskIfExistsOrSelf(); }
|
||||
|
||||
|
@ -269,7 +269,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskEncrypted::readFile(
|
||||
return std::make_unique<ReadBufferFromEncryptedFile>(settings.local_fs_buffer_size, std::move(buffer), key, header);
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskEncrypted::writeFile(const String & path, size_t buf_size, WriteMode mode)
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskEncrypted::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &)
|
||||
{
|
||||
auto wrapped_path = wrappedPath(path);
|
||||
FileEncryption::Header header;
|
||||
|
@ -126,7 +126,8 @@ public:
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(
|
||||
const String & path,
|
||||
size_t buf_size,
|
||||
WriteMode mode) override;
|
||||
WriteMode mode,
|
||||
const WriteSettings & settings) override;
|
||||
|
||||
void removeFile(const String & path) override
|
||||
{
|
||||
|
@ -345,7 +345,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskLocal::readFile(const String & path,
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase>
|
||||
DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode)
|
||||
DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &)
|
||||
{
|
||||
int flags = (mode == WriteMode::Append) ? (O_APPEND | O_CREAT | O_WRONLY) : -1;
|
||||
return std::make_unique<WriteBufferFromFile>(fs::path(disk_path) / path, buf_size, flags);
|
||||
@ -624,7 +624,7 @@ bool DiskLocal::setup()
|
||||
pcg32_fast rng(randomSeed());
|
||||
UInt32 magic_number = rng();
|
||||
{
|
||||
auto buf = writeFile(disk_checker_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
|
||||
auto buf = writeFile(disk_checker_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {});
|
||||
writeIntBinary(magic_number, *buf);
|
||||
}
|
||||
disk_checker_magic_number = magic_number;
|
||||
|
@ -79,7 +79,8 @@ public:
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(
|
||||
const String & path,
|
||||
size_t buf_size,
|
||||
WriteMode mode) override;
|
||||
WriteMode mode,
|
||||
const WriteSettings & settings) override;
|
||||
|
||||
void removeFile(const String & path) override;
|
||||
void removeFileIfExists(const String & path) override;
|
||||
|
@ -326,7 +326,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskMemory::readFile(const String & path
|
||||
return std::make_unique<ReadIndirectBuffer>(path, iter->second.data);
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskMemory::writeFile(const String & path, size_t buf_size, WriteMode mode)
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskMemory::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
|
@ -71,7 +71,8 @@ public:
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(
|
||||
const String & path,
|
||||
size_t buf_size,
|
||||
WriteMode mode) override;
|
||||
WriteMode mode,
|
||||
const WriteSettings & settings) override;
|
||||
|
||||
void removeFile(const String & path) override;
|
||||
void removeFileIfExists(const String & path) override;
|
||||
|
@ -214,10 +214,10 @@ std::unique_ptr<ReadBufferFromFileBase> DiskRestartProxy::readFile(
|
||||
return std::make_unique<RestartAwareReadBuffer>(*this, std::move(impl));
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskRestartProxy::writeFile(const String & path, size_t buf_size, WriteMode mode)
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskRestartProxy::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings)
|
||||
{
|
||||
ReadLock lock (mutex);
|
||||
auto impl = DiskDecorator::writeFile(path, buf_size, mode);
|
||||
auto impl = DiskDecorator::writeFile(path, buf_size, mode, settings);
|
||||
return std::make_unique<RestartAwareWriteBuffer>(*this, std::move(impl));
|
||||
}
|
||||
|
||||
@ -305,6 +305,24 @@ bool DiskRestartProxy::checkUniqueId(const String & id) const
|
||||
return DiskDecorator::checkUniqueId(id);
|
||||
}
|
||||
|
||||
String DiskRestartProxy::getCacheBasePath() const
|
||||
{
|
||||
ReadLock lock (mutex);
|
||||
return DiskDecorator::getCacheBasePath();
|
||||
}
|
||||
|
||||
std::vector<String> DiskRestartProxy::getRemotePaths(const String & path) const
|
||||
{
|
||||
ReadLock lock (mutex);
|
||||
return DiskDecorator::getRemotePaths(path);
|
||||
}
|
||||
|
||||
void DiskRestartProxy::getRemotePathsRecursive(const String & path, std::vector<LocalPathWithRemotePaths> & paths_map)
|
||||
{
|
||||
ReadLock lock (mutex);
|
||||
return DiskDecorator::getRemotePathsRecursive(path, paths_map);
|
||||
}
|
||||
|
||||
void DiskRestartProxy::restart()
|
||||
{
|
||||
/// Speed up processing unhealthy requests.
|
||||
|
@ -48,7 +48,7 @@ public:
|
||||
const ReadSettings & settings,
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size) const override;
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode) override;
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) override;
|
||||
void removeFile(const String & path) override;
|
||||
void removeFileIfExists(const String & path) override;
|
||||
void removeDirectory(const String & path) override;
|
||||
@ -63,6 +63,9 @@ public:
|
||||
void truncateFile(const String & path, size_t size) override;
|
||||
String getUniqueId(const String & path) const override;
|
||||
bool checkUniqueId(const String & id) const override;
|
||||
String getCacheBasePath() const override;
|
||||
std::vector<String> getRemotePaths(const String & path) const override;
|
||||
void getRemotePathsRecursive(const String & path, std::vector<LocalPathWithRemotePaths> & paths_map) override;
|
||||
|
||||
void restart();
|
||||
|
||||
|
@ -166,9 +166,9 @@ std::unique_ptr<ReadBufferFromFileBase> DiskWebServer::readFile(const String & p
|
||||
remote_path = remote_path.string().substr(url.size());
|
||||
|
||||
RemoteMetadata meta(path, remote_path);
|
||||
meta.remote_fs_objects.emplace_back(std::make_pair(remote_path, iter->second.size));
|
||||
meta.remote_fs_objects.emplace_back(remote_path, iter->second.size);
|
||||
|
||||
auto web_impl = std::make_unique<ReadBufferFromWebServerGather>(path, url, meta, getContext(), read_settings);
|
||||
auto web_impl = std::make_unique<ReadBufferFromWebServerGather>(url, meta.remote_fs_root_path, meta.remote_fs_objects, getContext(), read_settings);
|
||||
|
||||
if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool)
|
||||
{
|
||||
|
@ -77,7 +77,6 @@ public:
|
||||
UInt64 getTotalSpace() const final override { return std::numeric_limits<UInt64>::max(); }
|
||||
|
||||
UInt64 getAvailableSpace() const final override { return std::numeric_limits<UInt64>::max(); }
|
||||
|
||||
UInt64 getUnreservedSpace() const final override { return std::numeric_limits<UInt64>::max(); }
|
||||
|
||||
/// Read-only part
|
||||
@ -100,7 +99,7 @@ public:
|
||||
|
||||
/// Write and modification part
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String &, size_t, WriteMode) override
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String &, size_t, WriteMode, const WriteSettings &) override
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName());
|
||||
}
|
||||
@ -165,6 +164,10 @@ public:
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName());
|
||||
}
|
||||
|
||||
std::vector<String> getRemotePaths(const String &) const override { return {}; }
|
||||
|
||||
void getRemotePathsRecursive(const String &, std::vector<LocalPathWithRemotePaths> &) override {}
|
||||
|
||||
/// Create part
|
||||
|
||||
void createFile(const String &) final override {}
|
||||
|
@ -82,17 +82,17 @@ std::unique_ptr<ReadBufferFromFileBase> DiskHDFS::readFile(const String & path,
|
||||
"Read from file by path: {}. Existing HDFS objects: {}",
|
||||
backQuote(metadata_disk->getPath() + path), metadata.remote_fs_objects.size());
|
||||
|
||||
auto hdfs_impl = std::make_unique<ReadBufferFromHDFSGather>(path, config, remote_fs_root_path, metadata, read_settings);
|
||||
auto hdfs_impl = std::make_unique<ReadBufferFromHDFSGather>(config, remote_fs_root_path, remote_fs_root_path, metadata.remote_fs_objects, read_settings);
|
||||
auto buf = std::make_unique<ReadIndirectBufferFromRemoteFS>(std::move(hdfs_impl));
|
||||
return std::make_unique<SeekAvoidingReadBuffer>(std::move(buf), settings->min_bytes_for_seek);
|
||||
}
|
||||
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskHDFS::writeFile(const String & path, size_t buf_size, WriteMode mode)
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskHDFS::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &)
|
||||
{
|
||||
/// Path to store new HDFS object.
|
||||
auto file_name = getRandomName();
|
||||
auto hdfs_path = remote_fs_root_path + file_name;
|
||||
std::string file_name = getRandomName();
|
||||
std::string hdfs_path = fs::path(remote_fs_root_path) / file_name;
|
||||
|
||||
LOG_TRACE(log, "{} to file by path: {}. HDFS path: {}", mode == WriteMode::Rewrite ? "Write" : "Append",
|
||||
backQuote(metadata_disk->getPath() + path), hdfs_path);
|
||||
@ -106,7 +106,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskHDFS::writeFile(const String & path
|
||||
readOrCreateUpdateAndStoreMetadata(path, mode, false, [file_name, count] (Metadata & metadata) { metadata.addObject(file_name, count); return true; });
|
||||
};
|
||||
|
||||
return std::make_unique<WriteIndirectBufferFromRemoteFS>(std::move(hdfs_buffer), std::move(create_metadata_callback), path);
|
||||
return std::make_unique<WriteIndirectBufferFromRemoteFS>(std::move(hdfs_buffer), std::move(create_metadata_callback), hdfs_path);
|
||||
}
|
||||
|
||||
|
||||
|
@ -60,7 +60,7 @@ public:
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size) const override;
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode) override;
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) override;
|
||||
|
||||
void removeFromRemoteFS(RemoteFSPathKeeperPtr fs_paths_keeper) override;
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <Disks/Executor.h>
|
||||
#include <Disks/DiskType.h>
|
||||
#include <IO/ReadSettings.h>
|
||||
#include <IO/WriteSettings.h>
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
@ -31,6 +32,11 @@ namespace Poco
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
class IDiskDirectoryIterator;
|
||||
using DiskDirectoryIteratorPtr = std::unique_ptr<IDiskDirectoryIterator>;
|
||||
|
||||
@ -168,7 +174,8 @@ public:
|
||||
virtual std::unique_ptr<WriteBufferFromFileBase> writeFile( /// NOLINT
|
||||
const String & path,
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
WriteMode mode = WriteMode::Rewrite) = 0;
|
||||
WriteMode mode = WriteMode::Rewrite,
|
||||
const WriteSettings & settings = {}) = 0;
|
||||
|
||||
/// Remove file. Throws exception if file doesn't exists or it's a directory.
|
||||
virtual void removeFile(const String & path) = 0;
|
||||
@ -197,6 +204,24 @@ public:
|
||||
/// Second bool param is a flag to remove (true) or keep (false) shared data on S3
|
||||
virtual void removeSharedFileIfExists(const String & path, bool) { removeFileIfExists(path); }
|
||||
|
||||
|
||||
virtual String getCacheBasePath() const { return ""; }
|
||||
|
||||
/// Returns a list of paths because for Log family engines there might be
|
||||
/// multiple files in remote fs for single clickhouse file.
|
||||
virtual std::vector<String> getRemotePaths(const String &) const
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getRemotePaths() not implemented for disk: {}`", getType());
|
||||
}
|
||||
|
||||
/// For one local path there might be multiple remote paths in case of Log family engines.
|
||||
using LocalPathWithRemotePaths = std::pair<String, std::vector<String>>;
|
||||
|
||||
virtual void getRemotePathsRecursive(const String &, std::vector<LocalPathWithRemotePaths> &)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getRemotePathsRecursive() not implemented for disk: {}`", getType());
|
||||
}
|
||||
|
||||
struct RemoveRequest
|
||||
{
|
||||
String path;
|
||||
|
@ -122,7 +122,8 @@ void IDiskRemote::Metadata::load()
|
||||
remote_fs_object_path = remote_fs_object_path.substr(remote_fs_root_path.size());
|
||||
}
|
||||
assertChar('\n', *buf);
|
||||
remote_fs_objects[i] = {remote_fs_object_path, remote_fs_object_size};
|
||||
remote_fs_objects[i].relative_path = remote_fs_object_path;
|
||||
remote_fs_objects[i].bytes_size = remote_fs_object_size;
|
||||
}
|
||||
|
||||
readIntText(ref_count, *buf);
|
||||
@ -136,13 +137,15 @@ void IDiskRemote::Metadata::load()
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
|
||||
if (e.code() == ErrorCodes::UNKNOWN_FORMAT)
|
||||
throw;
|
||||
|
||||
if (e.code() == ErrorCodes::MEMORY_LIMIT_EXCEEDED)
|
||||
throw;
|
||||
|
||||
throw Exception("Failed to read metadata file", e, ErrorCodes::UNKNOWN_FORMAT);
|
||||
throw Exception("Failed to read metadata file: " + metadata_file_path, e, ErrorCodes::UNKNOWN_FORMAT);
|
||||
}
|
||||
}
|
||||
|
||||
@ -341,6 +344,30 @@ void IDiskRemote::removeMetadataRecursive(const String & path, RemoteFSPathKeepe
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<String> IDiskRemote::getRemotePaths(const String & local_path) const
|
||||
{
|
||||
auto metadata = readMetadata(local_path);
|
||||
|
||||
std::vector<String> remote_paths;
|
||||
for (const auto & [remote_path, _] : metadata.remote_fs_objects)
|
||||
remote_paths.push_back(remote_path);
|
||||
|
||||
return remote_paths;
|
||||
}
|
||||
|
||||
void IDiskRemote::getRemotePathsRecursive(const String & local_path, std::vector<LocalPathWithRemotePaths> & paths_map)
|
||||
{
|
||||
if (metadata_disk->isFile(local_path))
|
||||
{
|
||||
paths_map.emplace_back(local_path, getRemotePaths(local_path));
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto it = iterateDirectory(local_path); it->isValid(); it->next())
|
||||
IDiskRemote::getRemotePathsRecursive(fs::path(local_path) / it->name(), paths_map);
|
||||
}
|
||||
}
|
||||
|
||||
DiskPtr DiskRemoteReservation::getDisk(size_t i) const
|
||||
{
|
||||
if (i != 0)
|
||||
@ -348,7 +375,6 @@ DiskPtr DiskRemoteReservation::getDisk(size_t i) const
|
||||
return disk;
|
||||
}
|
||||
|
||||
|
||||
void DiskRemoteReservation::update(UInt64 new_size)
|
||||
{
|
||||
std::lock_guard lock(disk->reservation_mutex);
|
||||
@ -402,6 +428,12 @@ IDiskRemote::IDiskRemote(
|
||||
}
|
||||
|
||||
|
||||
String IDiskRemote::getCacheBasePath() const
|
||||
{
|
||||
return cache ? cache->getBasePath() : "";
|
||||
}
|
||||
|
||||
|
||||
bool IDiskRemote::exists(const String & path) const
|
||||
{
|
||||
return metadata_disk->exists(path);
|
||||
@ -607,7 +639,7 @@ String IDiskRemote::getUniqueId(const String & path) const
|
||||
auto metadata = readMetadata(path);
|
||||
String id;
|
||||
if (!metadata.remote_fs_objects.empty())
|
||||
id = metadata.remote_fs_root_path + metadata.remote_fs_objects[0].first;
|
||||
id = metadata.remote_fs_root_path + metadata.remote_fs_objects[0].relative_path;
|
||||
return id;
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <filesystem>
|
||||
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric DiskSpaceReservedForMerge;
|
||||
@ -22,6 +21,24 @@ namespace CurrentMetrics
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Path to blob with it's size
|
||||
struct BlobPathWithSize
|
||||
{
|
||||
std::string relative_path;
|
||||
uint64_t bytes_size;
|
||||
|
||||
BlobPathWithSize() = default;
|
||||
BlobPathWithSize(const BlobPathWithSize & other) = default;
|
||||
|
||||
BlobPathWithSize(const std::string & relative_path_, uint64_t bytes_size_)
|
||||
: relative_path(relative_path_)
|
||||
, bytes_size(bytes_size_)
|
||||
{}
|
||||
};
|
||||
|
||||
/// List of blobs with their sizes
|
||||
using BlobsPathToSize = std::vector<BlobPathWithSize>;
|
||||
|
||||
/// Helper class to collect paths into chunks of maximum size.
|
||||
/// For s3 it is Aws::vector<ObjectIdentifier>, for hdfs it is std::vector<std::string>.
|
||||
class RemoteFSPathKeeper
|
||||
@ -66,6 +83,12 @@ public:
|
||||
|
||||
const String & getPath() const final override { return metadata_disk->getPath(); }
|
||||
|
||||
String getCacheBasePath() const final override;
|
||||
|
||||
std::vector<String> getRemotePaths(const String & local_path) const final override;
|
||||
|
||||
void getRemotePathsRecursive(const String & local_path, std::vector<LocalPathWithRemotePaths> & paths_map) override;
|
||||
|
||||
/// Methods for working with metadata. For some operations (like hardlink
|
||||
/// creation) metadata can be updated concurrently from multiple threads
|
||||
/// (file actually rewritten on disk). So additional RW lock is required for
|
||||
@ -163,6 +186,7 @@ protected:
|
||||
const String remote_fs_root_path;
|
||||
|
||||
DiskPtr metadata_disk;
|
||||
|
||||
FileCachePtr cache;
|
||||
|
||||
private:
|
||||
@ -184,10 +208,8 @@ using RemoteDiskPtr = std::shared_ptr<IDiskRemote>;
|
||||
/// Minimum info, required to be passed to ReadIndirectBufferFromRemoteFS<T>
|
||||
struct RemoteMetadata
|
||||
{
|
||||
using PathAndSize = std::pair<String, size_t>;
|
||||
|
||||
/// Remote FS objects paths and their sizes.
|
||||
std::vector<PathAndSize> remote_fs_objects;
|
||||
std::vector<BlobPathWithSize> remote_fs_objects;
|
||||
|
||||
/// URI
|
||||
const String & remote_fs_root_path;
|
||||
|
@ -122,10 +122,25 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment(
|
||||
{
|
||||
auto range = file_segment->range();
|
||||
|
||||
size_t wait_download_max_tries = settings.remote_fs_cache_max_wait_sec;
|
||||
size_t wait_download_max_tries = settings.filesystem_cache_max_wait_sec;
|
||||
size_t wait_download_tries = 0;
|
||||
|
||||
auto download_state = file_segment->state();
|
||||
|
||||
if (settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
|
||||
{
|
||||
if (download_state == FileSegment::State::DOWNLOADED)
|
||||
{
|
||||
read_type = ReadType::CACHED;
|
||||
return getCacheReadBuffer(range.left);
|
||||
}
|
||||
else
|
||||
{
|
||||
read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE;
|
||||
return getRemoteFSReadBuffer(file_segment, read_type);
|
||||
}
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
switch (download_state)
|
||||
@ -375,6 +390,9 @@ bool CachedReadBufferFromRemoteFS::completeFileSegmentAndGetNext()
|
||||
|
||||
implementation_buffer = getImplementationBuffer(*current_file_segment_it);
|
||||
|
||||
if (read_type == ReadType::CACHED)
|
||||
(*current_file_segment_it)->incrementHitsCount();
|
||||
|
||||
LOG_TEST(log, "New segment: {}", (*current_file_segment_it)->range().toString());
|
||||
return true;
|
||||
}
|
||||
@ -559,9 +577,6 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
|
||||
{
|
||||
last_caller_id = FileSegment::getCallerId();
|
||||
|
||||
if (IFileCache::shouldBypassCache())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Using cache when not allowed");
|
||||
|
||||
if (!initialized)
|
||||
initialize(file_offset_of_buffer_end, getTotalSizeToRead());
|
||||
|
||||
@ -606,6 +621,9 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
|
||||
else
|
||||
{
|
||||
implementation_buffer = getImplementationBuffer(*current_file_segment_it);
|
||||
|
||||
if (read_type == ReadType::CACHED)
|
||||
(*current_file_segment_it)->incrementHitsCount();
|
||||
}
|
||||
|
||||
assert(!internal_buffer.empty());
|
||||
|
@ -38,12 +38,12 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S
|
||||
current_path = path;
|
||||
|
||||
auto cache = settings.remote_fs_cache;
|
||||
bool with_cache = cache && settings.remote_fs_enable_cache && !IFileCache::shouldBypassCache();
|
||||
bool with_cache = cache && settings.enable_filesystem_cache;
|
||||
|
||||
auto remote_file_reader_creator = [=, this]()
|
||||
{
|
||||
return std::make_unique<ReadBufferFromS3>(
|
||||
client_ptr, bucket, fs::path(metadata.remote_fs_root_path) / path, max_single_read_retries,
|
||||
client_ptr, bucket, fs::path(common_path_prefix) / path, max_single_read_retries,
|
||||
settings, /* use_external_buffer */true, /* offset */ 0, read_until_position, /* restricted_seek */true);
|
||||
};
|
||||
|
||||
@ -83,11 +83,14 @@ SeekableReadBufferPtr ReadBufferFromHDFSGather::createImplementationBuffer(const
|
||||
#endif
|
||||
|
||||
|
||||
ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather(const RemoteMetadata & metadata_, const ReadSettings & settings_, const String & path_)
|
||||
ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather(
|
||||
const std::string & common_path_prefix_,
|
||||
const BlobsPathToSize & blobs_to_read_,
|
||||
const ReadSettings & settings_)
|
||||
: ReadBuffer(nullptr, 0)
|
||||
, metadata(metadata_)
|
||||
, common_path_prefix(common_path_prefix_)
|
||||
, blobs_to_read(blobs_to_read_)
|
||||
, settings(settings_)
|
||||
, canonical_path(path_)
|
||||
, log(&Poco::Logger::get("ReadBufferFromRemoteFSGather"))
|
||||
{
|
||||
}
|
||||
@ -119,9 +122,9 @@ void ReadBufferFromRemoteFSGather::initialize()
|
||||
{
|
||||
/// One clickhouse file can be split into multiple files in remote fs.
|
||||
auto current_buf_offset = file_offset_of_buffer_end;
|
||||
for (size_t i = 0; i < metadata.remote_fs_objects.size(); ++i)
|
||||
for (size_t i = 0; i < blobs_to_read.size(); ++i)
|
||||
{
|
||||
const auto & [file_path, size] = metadata.remote_fs_objects[i];
|
||||
const auto & [file_path, size] = blobs_to_read[i];
|
||||
|
||||
if (size > current_buf_offset)
|
||||
{
|
||||
@ -138,7 +141,7 @@ void ReadBufferFromRemoteFSGather::initialize()
|
||||
|
||||
current_buf_offset -= size;
|
||||
}
|
||||
current_buf_idx = metadata.remote_fs_objects.size();
|
||||
current_buf_idx = blobs_to_read.size();
|
||||
current_buf = nullptr;
|
||||
}
|
||||
|
||||
@ -168,12 +171,12 @@ bool ReadBufferFromRemoteFSGather::nextImpl()
|
||||
bool ReadBufferFromRemoteFSGather::moveToNextBuffer()
|
||||
{
|
||||
/// If there is no available buffers - nothing to read.
|
||||
if (current_buf_idx + 1 >= metadata.remote_fs_objects.size())
|
||||
if (current_buf_idx + 1 >= blobs_to_read.size())
|
||||
return false;
|
||||
|
||||
++current_buf_idx;
|
||||
|
||||
const auto & [path, size] = metadata.remote_fs_objects[current_buf_idx];
|
||||
const auto & [path, size] = blobs_to_read[current_buf_idx];
|
||||
current_buf = createImplementationBuffer(path, size);
|
||||
|
||||
return true;
|
||||
@ -202,7 +205,7 @@ bool ReadBufferFromRemoteFSGather::readImpl()
|
||||
if (!result)
|
||||
result = current_buf->next();
|
||||
|
||||
if (metadata.remote_fs_objects.size() == 1)
|
||||
if (blobs_to_read.size() == 1)
|
||||
{
|
||||
file_offset_of_buffer_end = current_buf->getFileOffsetOfBufferEnd();
|
||||
}
|
||||
@ -255,8 +258,8 @@ String ReadBufferFromRemoteFSGather::getFileName() const
|
||||
size_t ReadBufferFromRemoteFSGather::getFileSize() const
|
||||
{
|
||||
size_t size = 0;
|
||||
for (const auto & object : metadata.remote_fs_objects)
|
||||
size += object.second;
|
||||
for (const auto & object : blobs_to_read)
|
||||
size += object.bytes_size;
|
||||
return size;
|
||||
}
|
||||
|
||||
|
@ -26,9 +26,9 @@ friend class ReadIndirectBufferFromRemoteFS;
|
||||
|
||||
public:
|
||||
ReadBufferFromRemoteFSGather(
|
||||
const RemoteMetadata & metadata_,
|
||||
const ReadSettings & settings_,
|
||||
const String & path_);
|
||||
const std::string & common_path_prefix_,
|
||||
const BlobsPathToSize & blobs_to_read_,
|
||||
const ReadSettings & settings_);
|
||||
|
||||
String getFileName() const;
|
||||
|
||||
@ -57,7 +57,9 @@ public:
|
||||
protected:
|
||||
virtual SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t file_size) = 0;
|
||||
|
||||
RemoteMetadata metadata;
|
||||
std::string common_path_prefix;
|
||||
|
||||
BlobsPathToSize blobs_to_read;
|
||||
|
||||
ReadSettings settings;
|
||||
|
||||
@ -89,8 +91,6 @@ private:
|
||||
*/
|
||||
size_t bytes_to_ignore = 0;
|
||||
|
||||
String canonical_path;
|
||||
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
@ -101,13 +101,13 @@ class ReadBufferFromS3Gather final : public ReadBufferFromRemoteFSGather
|
||||
{
|
||||
public:
|
||||
ReadBufferFromS3Gather(
|
||||
const String & path_,
|
||||
std::shared_ptr<Aws::S3::S3Client> client_ptr_,
|
||||
const String & bucket_,
|
||||
IDiskRemote::Metadata metadata_,
|
||||
const std::string & common_path_prefix_,
|
||||
const BlobsPathToSize & blobs_to_read_,
|
||||
size_t max_single_read_retries_,
|
||||
const ReadSettings & settings_)
|
||||
: ReadBufferFromRemoteFSGather(metadata_, settings_, path_)
|
||||
: ReadBufferFromRemoteFSGather(common_path_prefix_, blobs_to_read_, settings_)
|
||||
, client_ptr(std::move(client_ptr_))
|
||||
, bucket(bucket_)
|
||||
, max_single_read_retries(max_single_read_retries_)
|
||||
@ -130,13 +130,13 @@ class ReadBufferFromAzureBlobStorageGather final : public ReadBufferFromRemoteFS
|
||||
{
|
||||
public:
|
||||
ReadBufferFromAzureBlobStorageGather(
|
||||
const String & path_,
|
||||
std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> blob_container_client_,
|
||||
IDiskRemote::Metadata metadata_,
|
||||
const std::string & common_path_prefix_,
|
||||
const BlobsPathToSize & blobs_to_read_,
|
||||
size_t max_single_read_retries_,
|
||||
size_t max_single_download_retries_,
|
||||
const ReadSettings & settings_)
|
||||
: ReadBufferFromRemoteFSGather(metadata_, settings_, path_)
|
||||
: ReadBufferFromRemoteFSGather(common_path_prefix_, blobs_to_read_, settings_)
|
||||
, blob_container_client(blob_container_client_)
|
||||
, max_single_read_retries(max_single_read_retries_)
|
||||
, max_single_download_retries(max_single_download_retries_)
|
||||
@ -157,12 +157,12 @@ class ReadBufferFromWebServerGather final : public ReadBufferFromRemoteFSGather
|
||||
{
|
||||
public:
|
||||
ReadBufferFromWebServerGather(
|
||||
const String & path_,
|
||||
const String & uri_,
|
||||
RemoteMetadata metadata_,
|
||||
const std::string & common_path_prefix_,
|
||||
const BlobsPathToSize & blobs_to_read_,
|
||||
ContextPtr context_,
|
||||
const ReadSettings & settings_)
|
||||
: ReadBufferFromRemoteFSGather(metadata_, settings_, path_)
|
||||
: ReadBufferFromRemoteFSGather(common_path_prefix_, blobs_to_read_, settings_)
|
||||
, uri(uri_)
|
||||
, context(context_)
|
||||
{
|
||||
@ -182,12 +182,12 @@ class ReadBufferFromHDFSGather final : public ReadBufferFromRemoteFSGather
|
||||
{
|
||||
public:
|
||||
ReadBufferFromHDFSGather(
|
||||
const String & path_,
|
||||
const Poco::Util::AbstractConfiguration & config_,
|
||||
const String & hdfs_uri_,
|
||||
IDiskRemote::Metadata metadata_,
|
||||
const std::string & common_path_prefix_,
|
||||
const BlobsPathToSize & blobs_to_read_,
|
||||
const ReadSettings & settings_)
|
||||
: ReadBufferFromRemoteFSGather(metadata_, settings_, path_)
|
||||
: ReadBufferFromRemoteFSGather(common_path_prefix_, blobs_to_read_, settings_)
|
||||
, config(config_)
|
||||
{
|
||||
const size_t begin_of_path = hdfs_uri_.find('/', hdfs_uri_.find("//") + 2);
|
||||
|
@ -54,14 +54,14 @@ std::future<IAsynchronousReader::Result> ThreadPoolRemoteFSReader::submit(Reques
|
||||
{
|
||||
ThreadStatus thread_status;
|
||||
|
||||
/// Save query context if any, because cache implementation needs it.
|
||||
if (query_context)
|
||||
thread_status.attachQueryContext(query_context);
|
||||
|
||||
/// To be able to pass ProfileEvents.
|
||||
if (running_group)
|
||||
thread_status.attachQuery(running_group);
|
||||
|
||||
/// Save query context if any, because cache implementation needs it.
|
||||
if (query_context)
|
||||
thread_status.attachQueryContext(query_context);
|
||||
|
||||
setThreadName("VFSRead");
|
||||
|
||||
CurrentMetrics::Increment metric_increment{CurrentMetrics::Read};
|
||||
@ -83,12 +83,11 @@ std::future<IAsynchronousReader::Result> ThreadPoolRemoteFSReader::submit(Reques
|
||||
|
||||
watch.stop();
|
||||
|
||||
if (running_group)
|
||||
CurrentThread::detachQuery();
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::RemoteFSReadMicroseconds, watch.elapsedMicroseconds());
|
||||
ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, result.offset ? result.size - result.offset : result.size);
|
||||
|
||||
thread_status.detachQuery(/* if_not_detached */true);
|
||||
|
||||
return Result{ .size = result.size, .offset = result.offset };
|
||||
});
|
||||
|
||||
|
@ -12,10 +12,10 @@ namespace DB
|
||||
WriteIndirectBufferFromRemoteFS::WriteIndirectBufferFromRemoteFS(
|
||||
std::unique_ptr<WriteBuffer> impl_,
|
||||
CreateMetadataCallback && create_callback_,
|
||||
const String & metadata_file_path_)
|
||||
const String & remote_path_)
|
||||
: WriteBufferFromFileDecorator(std::move(impl_))
|
||||
, create_metadata_callback(std::move(create_callback_))
|
||||
, metadata_file_path(metadata_file_path_)
|
||||
, remote_path(remote_path_)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -18,17 +18,17 @@ public:
|
||||
WriteIndirectBufferFromRemoteFS(
|
||||
std::unique_ptr<WriteBuffer> impl_,
|
||||
CreateMetadataCallback && create_callback_,
|
||||
const String & metadata_file_path_);
|
||||
const String & remote_path_);
|
||||
|
||||
~WriteIndirectBufferFromRemoteFS() override;
|
||||
|
||||
String getFileName() const override { return metadata_file_path; }
|
||||
String getFileName() const override { return remote_path; }
|
||||
|
||||
private:
|
||||
void finalizeImpl() override;
|
||||
|
||||
CreateMetadataCallback create_metadata_callback;
|
||||
String metadata_file_path;
|
||||
String remote_path;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <Common/getRandomASCIIString.h>
|
||||
#include <Common/FileCacheFactory.h>
|
||||
#include <Common/FileCache.h>
|
||||
#include <Common/FileCacheSettings.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -64,18 +65,23 @@ FileCachePtr getCachePtrForDisk(
|
||||
if (!fs::exists(cache_base_path))
|
||||
fs::create_directories(cache_base_path);
|
||||
|
||||
LOG_INFO(&Poco::Logger::get("Disk(" + name + ")"), "Disk registered with cache path: {}", cache_base_path);
|
||||
|
||||
auto metadata_path = getDiskMetadataPath(name, config, config_prefix, context);
|
||||
if (metadata_path == cache_base_path)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Metadata path and cache base path must be different: {}", metadata_path);
|
||||
|
||||
size_t max_cache_size = config.getUInt64(config_prefix + ".data_cache_max_size", 1024*1024*1024);
|
||||
size_t max_cache_elements = config.getUInt64(config_prefix + ".data_cache_max_elements", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS);
|
||||
size_t max_file_segment_size = config.getUInt64(config_prefix + ".max_file_segment_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE);
|
||||
FileCacheSettings file_cache_settings;
|
||||
file_cache_settings.loadFromConfig(config, config_prefix);
|
||||
|
||||
auto cache = FileCacheFactory::instance().getOrCreate(cache_base_path, max_cache_size, max_cache_elements, max_file_segment_size);
|
||||
auto cache = FileCacheFactory::instance().getOrCreate(cache_base_path, file_cache_settings);
|
||||
cache->initialize();
|
||||
|
||||
auto * log = &Poco::Logger::get("Disk(" + name + ")");
|
||||
LOG_INFO(log, "Disk registered with cache path: {}. Cache size: {}, max cache elements size: {}, max_file_segment_size: {}",
|
||||
cache_base_path,
|
||||
file_cache_settings.max_size ? toString(file_cache_settings.max_size) : "UNLIMITED",
|
||||
file_cache_settings.max_elements ? toString(file_cache_settings.max_elements) : "UNLIMITED",
|
||||
file_cache_settings.max_file_segment_size);
|
||||
|
||||
return cache;
|
||||
}
|
||||
|
||||
|
@ -18,6 +18,8 @@
|
||||
#include <Common/quoteString.h>
|
||||
#include <Common/thread_local_rng.h>
|
||||
#include <Common/getRandomASCIIString.h>
|
||||
#include <Common/FileCacheFactory.h>
|
||||
#include <Common/FileCache.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/threadPoolCallbackRunner.h>
|
||||
@ -227,10 +229,15 @@ std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, co
|
||||
|
||||
ReadSettings disk_read_settings{read_settings};
|
||||
if (cache)
|
||||
{
|
||||
if (IFileCache::isReadOnly())
|
||||
disk_read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true;
|
||||
|
||||
disk_read_settings.remote_fs_cache = cache;
|
||||
}
|
||||
|
||||
auto s3_impl = std::make_unique<ReadBufferFromS3Gather>(
|
||||
path, settings->client, bucket, metadata,
|
||||
settings->client, bucket, metadata.remote_fs_root_path, metadata.remote_fs_objects,
|
||||
settings->s3_max_single_read_retries, disk_read_settings);
|
||||
|
||||
if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool)
|
||||
@ -245,7 +252,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, co
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path, size_t buf_size, WriteMode mode)
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & write_settings)
|
||||
{
|
||||
auto settings = current_settings.get();
|
||||
|
||||
@ -265,23 +272,28 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
|
||||
LOG_TRACE(log, "{} to file by path: {}. S3 path: {}",
|
||||
mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + blob_name);
|
||||
|
||||
bool cache_on_write = cache
|
||||
&& fs::path(path).extension() != ".tmp"
|
||||
&& write_settings.enable_filesystem_cache_on_write_operations
|
||||
&& FileCacheFactory::instance().getSettings(getCacheBasePath()).cache_on_write_operations;
|
||||
|
||||
auto s3_buffer = std::make_unique<WriteBufferFromS3>(
|
||||
settings->client,
|
||||
bucket,
|
||||
remote_fs_root_path + blob_name,
|
||||
fs::path(remote_fs_root_path) / blob_name,
|
||||
settings->s3_min_upload_part_size,
|
||||
settings->s3_upload_part_size_multiply_factor,
|
||||
settings->s3_upload_part_size_multiply_parts_count_threshold,
|
||||
settings->s3_max_single_part_upload_size,
|
||||
std::move(object_metadata),
|
||||
buf_size, threadPoolCallbackRunner(getThreadPoolWriter()));
|
||||
buf_size, threadPoolCallbackRunner(getThreadPoolWriter()), blob_name, cache_on_write ? cache : nullptr);
|
||||
|
||||
auto create_metadata_callback = [this, path, blob_name, mode] (size_t count)
|
||||
{
|
||||
readOrCreateUpdateAndStoreMetadata(path, mode, false, [blob_name, count] (Metadata & metadata) { metadata.addObject(blob_name, count); return true; });
|
||||
};
|
||||
|
||||
return std::make_unique<WriteIndirectBufferFromRemoteFS>(std::move(s3_buffer), std::move(create_metadata_callback), path);
|
||||
return std::make_unique<WriteIndirectBufferFromRemoteFS>(std::move(s3_buffer), std::move(create_metadata_callback), fs::path(remote_fs_root_path) / blob_name);
|
||||
}
|
||||
|
||||
void DiskS3::createHardLink(const String & src_path, const String & dst_path)
|
||||
|
@ -88,7 +88,8 @@ public:
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(
|
||||
const String & path,
|
||||
size_t buf_size,
|
||||
WriteMode mode) override;
|
||||
WriteMode mode,
|
||||
const WriteSettings & settings) override;
|
||||
|
||||
void removeFromRemoteFS(RemoteFSPathKeeperPtr keeper) override;
|
||||
|
||||
|
@ -96,7 +96,7 @@ TEST_F(DiskEncryptedTest, WriteAndRead)
|
||||
|
||||
/// Write a file.
|
||||
{
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {});
|
||||
writeString(std::string_view{"Some text"}, *buf);
|
||||
}
|
||||
|
||||
@ -122,7 +122,7 @@ TEST_F(DiskEncryptedTest, Append)
|
||||
|
||||
/// Write a file (we use the append mode).
|
||||
{
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append);
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {});
|
||||
writeString(std::string_view{"Some text"}, *buf);
|
||||
}
|
||||
|
||||
@ -132,7 +132,7 @@ TEST_F(DiskEncryptedTest, Append)
|
||||
|
||||
/// Append the file.
|
||||
{
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append);
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {});
|
||||
writeString(std::string_view{" Another text"}, *buf);
|
||||
}
|
||||
|
||||
@ -148,7 +148,7 @@ TEST_F(DiskEncryptedTest, Truncate)
|
||||
|
||||
/// Write a file (we use the append mode).
|
||||
{
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append);
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {});
|
||||
writeString(std::string_view{"Some text"}, *buf);
|
||||
}
|
||||
|
||||
@ -178,7 +178,7 @@ TEST_F(DiskEncryptedTest, ZeroFileSize)
|
||||
|
||||
/// Write nothing to a file.
|
||||
{
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {});
|
||||
}
|
||||
|
||||
EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 0);
|
||||
@ -187,7 +187,7 @@ TEST_F(DiskEncryptedTest, ZeroFileSize)
|
||||
|
||||
/// Append the file with nothing.
|
||||
{
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append);
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {});
|
||||
}
|
||||
|
||||
EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 0);
|
||||
@ -211,7 +211,7 @@ TEST_F(DiskEncryptedTest, AnotherFolder)
|
||||
|
||||
/// Write a file.
|
||||
{
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {});
|
||||
writeString(std::string_view{"Some text"}, *buf);
|
||||
}
|
||||
|
||||
@ -231,11 +231,11 @@ TEST_F(DiskEncryptedTest, RandomIV)
|
||||
|
||||
/// Write two files with the same contents.
|
||||
{
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {});
|
||||
writeString(std::string_view{"Some text"}, *buf);
|
||||
}
|
||||
{
|
||||
auto buf = encrypted_disk->writeFile("b.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
|
||||
auto buf = encrypted_disk->writeFile("b.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {});
|
||||
writeString(std::string_view{"Some text"}, *buf);
|
||||
}
|
||||
|
||||
@ -277,7 +277,7 @@ TEST_F(DiskEncryptedTest, RemoveFileDuringWriting)
|
||||
std::thread t1{[&]
|
||||
{
|
||||
for (size_t i = 0; i != n; ++i)
|
||||
encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append);
|
||||
encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {});
|
||||
}};
|
||||
|
||||
std::thread t2{[&]
|
||||
|
@ -5,12 +5,17 @@
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeNothing.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Poco/JSON/Parser.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <Parsers/TokenIterator.h>
|
||||
#include <Parsers/ExpressionListParsers.h>
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -18,7 +23,6 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
FormatSettings::EscapingRule stringToEscapingRule(const String & escaping_rule)
|
||||
@ -138,7 +142,8 @@ bool deserializeFieldByEscapingRule(
|
||||
serialization->deserializeTextRaw(column, buf, format_settings);
|
||||
break;
|
||||
default:
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Escaping rule {} is not suitable for deserialization", escapingRuleToString(escaping_rule));
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS, "Escaping rule {} is not suitable for deserialization", escapingRuleToString(escaping_rule));
|
||||
}
|
||||
return read;
|
||||
}
|
||||
@ -176,7 +181,8 @@ void serializeFieldByEscapingRule(
|
||||
}
|
||||
}
|
||||
|
||||
void writeStringByEscapingRule(const String & value, WriteBuffer & out, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings)
|
||||
void writeStringByEscapingRule(
|
||||
const String & value, WriteBuffer & out, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings)
|
||||
{
|
||||
switch (escaping_rule)
|
||||
{
|
||||
@ -249,85 +255,269 @@ String readStringByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule e
|
||||
return readByEscapingRule<true>(buf, escaping_rule, format_settings);
|
||||
}
|
||||
|
||||
static bool evaluateConstantExpressionFromString(const StringRef & field, DataTypePtr & type, ContextPtr context)
|
||||
static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBuffer & buf)
|
||||
{
|
||||
if (!context)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "You must provide context to evaluate constant expression");
|
||||
if (buf.eof())
|
||||
return nullptr;
|
||||
|
||||
ParserExpression parser;
|
||||
Expected expected;
|
||||
Tokens tokens(field.data, field.data + field.size);
|
||||
IParser::Pos token_iterator(tokens, context->getSettingsRef().max_parser_depth);
|
||||
ASTPtr ast;
|
||||
|
||||
/// FIXME: Our parser cannot parse maps in the form of '{key : value}' that is used in text formats.
|
||||
bool parsed = parser.parse(token_iterator, ast, expected);
|
||||
if (!parsed || !token_iterator->isEnd())
|
||||
return false;
|
||||
|
||||
try
|
||||
/// Array
|
||||
if (checkChar('[', buf))
|
||||
{
|
||||
std::pair<Field, DataTypePtr> result = evaluateConstantExpression(ast, context);
|
||||
type = generalizeDataType(result.second);
|
||||
return true;
|
||||
skipWhitespaceIfAny(buf);
|
||||
|
||||
DataTypes nested_types;
|
||||
bool first = true;
|
||||
while (!buf.eof() && *buf.position() != ']')
|
||||
{
|
||||
if (!first)
|
||||
{
|
||||
skipWhitespaceIfAny(buf);
|
||||
if (!checkChar(',', buf))
|
||||
return nullptr;
|
||||
skipWhitespaceIfAny(buf);
|
||||
}
|
||||
else
|
||||
first = false;
|
||||
|
||||
auto nested_type = determineDataTypeForSingleFieldImpl(buf);
|
||||
if (!nested_type)
|
||||
return nullptr;
|
||||
|
||||
nested_types.push_back(nested_type);
|
||||
}
|
||||
|
||||
if (buf.eof())
|
||||
return nullptr;
|
||||
|
||||
++buf.position();
|
||||
|
||||
if (nested_types.empty())
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeNothing>());
|
||||
|
||||
auto least_supertype = tryGetLeastSupertype(nested_types);
|
||||
if (!least_supertype)
|
||||
return nullptr;
|
||||
|
||||
return std::make_shared<DataTypeArray>(least_supertype);
|
||||
}
|
||||
catch (...)
|
||||
|
||||
/// Tuple
|
||||
if (checkChar('(', buf))
|
||||
{
|
||||
return false;
|
||||
skipWhitespaceIfAny(buf);
|
||||
|
||||
DataTypes nested_types;
|
||||
bool first = true;
|
||||
while (!buf.eof() && *buf.position() != ')')
|
||||
{
|
||||
if (!first)
|
||||
{
|
||||
skipWhitespaceIfAny(buf);
|
||||
if (!checkChar(',', buf))
|
||||
return nullptr;
|
||||
skipWhitespaceIfAny(buf);
|
||||
}
|
||||
else
|
||||
first = false;
|
||||
|
||||
auto nested_type = determineDataTypeForSingleFieldImpl(buf);
|
||||
if (!nested_type)
|
||||
return nullptr;
|
||||
|
||||
nested_types.push_back(nested_type);
|
||||
}
|
||||
|
||||
if (buf.eof() || nested_types.empty())
|
||||
return nullptr;
|
||||
|
||||
++buf.position();
|
||||
|
||||
return std::make_shared<DataTypeTuple>(nested_types);
|
||||
}
|
||||
|
||||
/// Map
|
||||
if (checkChar('{', buf))
|
||||
{
|
||||
skipWhitespaceIfAny(buf);
|
||||
|
||||
DataTypes key_types;
|
||||
DataTypes value_types;
|
||||
bool first = true;
|
||||
while (!buf.eof() && *buf.position() != '}')
|
||||
{
|
||||
if (!first)
|
||||
{
|
||||
skipWhitespaceIfAny(buf);
|
||||
if (!checkChar(',', buf))
|
||||
return nullptr;
|
||||
skipWhitespaceIfAny(buf);
|
||||
}
|
||||
else
|
||||
first = false;
|
||||
|
||||
auto key_type = determineDataTypeForSingleFieldImpl(buf);
|
||||
if (!key_type)
|
||||
return nullptr;
|
||||
|
||||
key_types.push_back(key_type);
|
||||
|
||||
skipWhitespaceIfAny(buf);
|
||||
if (!checkChar(':', buf))
|
||||
return nullptr;
|
||||
skipWhitespaceIfAny(buf);
|
||||
|
||||
auto value_type = determineDataTypeForSingleFieldImpl(buf);
|
||||
if (!value_type)
|
||||
return nullptr;
|
||||
|
||||
value_types.push_back(value_type);
|
||||
}
|
||||
|
||||
if (buf.eof())
|
||||
return nullptr;
|
||||
|
||||
++buf.position();
|
||||
skipWhitespaceIfAny(buf);
|
||||
|
||||
if (key_types.empty())
|
||||
return std::make_shared<DataTypeMap>(std::make_shared<DataTypeNothing>(), std::make_shared<DataTypeNothing>());
|
||||
|
||||
auto key_least_supertype = tryGetLeastSupertype(key_types);
|
||||
|
||||
auto value_least_supertype = tryGetLeastSupertype(value_types);
|
||||
if (!key_least_supertype || !value_least_supertype)
|
||||
return nullptr;
|
||||
|
||||
if (!DataTypeMap::checkKeyType(key_least_supertype))
|
||||
return nullptr;
|
||||
|
||||
return std::make_shared<DataTypeMap>(key_least_supertype, value_least_supertype);
|
||||
}
|
||||
|
||||
/// String
|
||||
if (*buf.position() == '\'')
|
||||
{
|
||||
++buf.position();
|
||||
while (!buf.eof())
|
||||
{
|
||||
char * next_pos = find_first_symbols<'\\', '\''>(buf.position(), buf.buffer().end());
|
||||
buf.position() = next_pos;
|
||||
|
||||
if (!buf.hasPendingData())
|
||||
continue;
|
||||
|
||||
if (*buf.position() == '\'')
|
||||
break;
|
||||
|
||||
if (*buf.position() == '\\')
|
||||
++buf.position();
|
||||
}
|
||||
|
||||
if (buf.eof())
|
||||
return nullptr;
|
||||
|
||||
++buf.position();
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
/// Bool
|
||||
if (checkStringCaseInsensitive("true", buf) || checkStringCaseInsensitive("false", buf))
|
||||
return DataTypeFactory::instance().get("Bool");
|
||||
|
||||
/// Null
|
||||
if (checkStringCaseInsensitive("NULL", buf))
|
||||
return std::make_shared<DataTypeNothing>();
|
||||
|
||||
/// Number
|
||||
Float64 tmp;
|
||||
if (tryReadFloatText(tmp, buf))
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context)
|
||||
static DataTypePtr determineDataTypeForSingleField(ReadBuffer & buf)
|
||||
{
|
||||
return makeNullableRecursivelyAndCheckForNothing(determineDataTypeForSingleFieldImpl(buf));
|
||||
}
|
||||
|
||||
DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule)
|
||||
{
|
||||
switch (escaping_rule)
|
||||
{
|
||||
case FormatSettings::EscapingRule::Quoted:
|
||||
{
|
||||
DataTypePtr type;
|
||||
bool parsed = evaluateConstantExpressionFromString(field, type, context);
|
||||
return parsed ? type : nullptr;
|
||||
ReadBufferFromString buf(field);
|
||||
auto type = determineDataTypeForSingleField(buf);
|
||||
return buf.eof() ? type : nullptr;
|
||||
}
|
||||
case FormatSettings::EscapingRule::JSON:
|
||||
return getDataTypeFromJSONField(field);
|
||||
case FormatSettings::EscapingRule::CSV:
|
||||
{
|
||||
if (!format_settings.csv.input_format_use_best_effort_in_schema_inference)
|
||||
return makeNullable(std::make_shared<DataTypeString>());
|
||||
|
||||
if (field.empty() || field == format_settings.csv.null_representation)
|
||||
return nullptr;
|
||||
|
||||
if (field == format_settings.bool_false_representation || field == format_settings.bool_true_representation)
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
return DataTypeFactory::instance().get("Nullable(Bool)");
|
||||
|
||||
DataTypePtr type;
|
||||
bool parsed;
|
||||
if (field[0] == '\'' || field[0] == '"')
|
||||
if (field.size() > 1 && ((field.front() == '\'' && field.back() == '\'') || (field.front() == '"' && field.back() == '"')))
|
||||
{
|
||||
/// Try to evaluate expression inside quotes.
|
||||
parsed = evaluateConstantExpressionFromString(StringRef(field.data() + 1, field.size() - 2), type, context);
|
||||
/// If it's a number in quotes we determine it as a string.
|
||||
if (parsed && type && isNumber(removeNullable(type)))
|
||||
return makeNullable(std::make_shared<DataTypeString>());
|
||||
}
|
||||
else
|
||||
parsed = evaluateConstantExpressionFromString(field, type, context);
|
||||
ReadBufferFromString buf(std::string_view(field.data() + 1, field.size() - 2));
|
||||
/// Try to determine the type of value inside quotes
|
||||
auto type = determineDataTypeForSingleField(buf);
|
||||
|
||||
/// If we couldn't parse an expression, determine it as a string.
|
||||
return parsed ? type : makeNullable(std::make_shared<DataTypeString>());
|
||||
if (!type)
|
||||
return nullptr;
|
||||
|
||||
/// If it's a number or tuple in quotes or there is some unread data in buffer, we determine it as a string.
|
||||
if (isNumber(removeNullable(type)) || isTuple(type) || !buf.eof())
|
||||
return makeNullable(std::make_shared<DataTypeString>());
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
/// Case when CSV value is not in quotes. Check if it's a number, and if not, determine it's as a string.
|
||||
ReadBufferFromString buf(field);
|
||||
Float64 tmp;
|
||||
if (tryReadFloatText(tmp, buf) && buf.eof())
|
||||
return makeNullable(std::make_shared<DataTypeFloat64>());
|
||||
|
||||
return makeNullable(std::make_shared<DataTypeString>());
|
||||
}
|
||||
case FormatSettings::EscapingRule::Raw: [[fallthrough]];
|
||||
case FormatSettings::EscapingRule::Escaped:
|
||||
/// TODO: Try to use some heuristics here to determine the type of data.
|
||||
return field.empty() ? nullptr : makeNullable(std::make_shared<DataTypeString>());
|
||||
{
|
||||
if (!format_settings.tsv.input_format_use_best_effort_in_schema_inference)
|
||||
return makeNullable(std::make_shared<DataTypeString>());
|
||||
|
||||
if (field.empty() || field == format_settings.tsv.null_representation)
|
||||
return nullptr;
|
||||
|
||||
if (field == format_settings.bool_false_representation || field == format_settings.bool_true_representation)
|
||||
return DataTypeFactory::instance().get("Nullable(Bool)");
|
||||
|
||||
ReadBufferFromString buf(field);
|
||||
auto type = determineDataTypeForSingleField(buf);
|
||||
if (!buf.eof())
|
||||
return makeNullable(std::make_shared<DataTypeString>());
|
||||
|
||||
return type;
|
||||
}
|
||||
default:
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot determine the type for value with {} escaping rule", escapingRuleToString(escaping_rule));
|
||||
}
|
||||
}
|
||||
|
||||
DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context)
|
||||
DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule)
|
||||
{
|
||||
DataTypes data_types;
|
||||
data_types.reserve(fields.size());
|
||||
for (const auto & field : fields)
|
||||
data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, escaping_rule, context));
|
||||
data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, escaping_rule));
|
||||
return data_types;
|
||||
}
|
||||
|
||||
@ -344,4 +534,12 @@ DataTypePtr getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule escap
|
||||
}
|
||||
}
|
||||
|
||||
DataTypes getDefaultDataTypeForEscapingRules(const std::vector<FormatSettings::EscapingRule> & escaping_rules)
|
||||
{
|
||||
DataTypes data_types;
|
||||
for (const auto & rule : escaping_rules)
|
||||
data_types.push_back(getDefaultDataTypeForEscapingRule(rule));
|
||||
return data_types;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -43,15 +43,21 @@ String readFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule es
|
||||
/// - For JSON escaping rule we can use JSON parser to parse a single field
|
||||
/// and then convert JSON type of this field to ClickHouse type.
|
||||
/// - For CSV escaping rule we can do the next:
|
||||
/// - If the field is an unquoted string, then we could try to evaluate it
|
||||
/// as a constant expression, and if it fails, treat it as a String.
|
||||
/// - If the field is a string in quotes, then we can try to evaluate
|
||||
/// expression inside quotes as a constant expression, and if it fails or
|
||||
/// the result is a number (we don't parse numbers in quotes) we treat it as a String.
|
||||
/// - For TSV and TSVRaw we treat each field as a String (TODO: try to use some tweaks and heuristics here)
|
||||
DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context = nullptr);
|
||||
DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context = nullptr);
|
||||
/// - If the field is an unquoted string, then we try to parse it as a number,
|
||||
/// and if we cannot, treat it as a String.
|
||||
/// - If the field is a string in quotes, then we try to use some
|
||||
/// tweaks and heuristics to determine the type inside quotes, and if we can't or
|
||||
/// the result is a number or tuple (we don't parse numbers in quotes and don't
|
||||
/// support tuples in CSV) we treat it as a String.
|
||||
/// - If input_format_csv_use_best_effort_in_schema_inference is disabled, we
|
||||
/// treat everything as a string.
|
||||
/// - For TSV and TSVRaw we try to use some tweaks and heuristics to determine the type
|
||||
/// of value if setting input_format_tsv_use_best_effort_in_schema_inference is enabled,
|
||||
/// otherwise we treat everything as a string.
|
||||
DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule);
|
||||
DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule);
|
||||
|
||||
DataTypePtr getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule escaping_rule);
|
||||
DataTypes getDefaultDataTypeForEscapingRules(const std::vector<FormatSettings::EscapingRule> & escaping_rules);
|
||||
|
||||
}
|
||||
|
@ -65,6 +65,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.csv.input_format_enum_as_number = settings.input_format_csv_enum_as_number;
|
||||
format_settings.csv.null_representation = settings.format_csv_null_representation;
|
||||
format_settings.csv.input_format_arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv;
|
||||
format_settings.csv.input_format_use_best_effort_in_schema_inference = settings.input_format_csv_use_best_effort_in_schema_inference;
|
||||
format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter;
|
||||
format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter;
|
||||
format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter;
|
||||
@ -97,6 +98,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.parquet.import_nested = settings.input_format_parquet_import_nested;
|
||||
format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching;
|
||||
format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns;
|
||||
format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference;
|
||||
format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8;
|
||||
format_settings.pretty.color = settings.output_format_pretty_color;
|
||||
format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width;
|
||||
@ -117,6 +119,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.tsv.empty_as_default = settings.input_format_tsv_empty_as_default;
|
||||
format_settings.tsv.input_format_enum_as_number = settings.input_format_tsv_enum_as_number;
|
||||
format_settings.tsv.null_representation = settings.format_tsv_null_representation;
|
||||
format_settings.tsv.input_format_use_best_effort_in_schema_inference = settings.input_format_tsv_use_best_effort_in_schema_inference;
|
||||
format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals;
|
||||
format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions;
|
||||
format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
|
||||
@ -126,10 +129,17 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary;
|
||||
format_settings.arrow.import_nested = settings.input_format_arrow_import_nested;
|
||||
format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns;
|
||||
format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference;
|
||||
format_settings.orc.import_nested = settings.input_format_orc_import_nested;
|
||||
format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns;
|
||||
format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size;
|
||||
format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference;
|
||||
format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference;
|
||||
format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching;
|
||||
format_settings.orc.import_nested = settings.input_format_orc_import_nested;
|
||||
format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns;
|
||||
format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size;
|
||||
format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference;
|
||||
format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching;
|
||||
format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
|
||||
format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
|
||||
@ -137,6 +147,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.msgpack.number_of_columns = settings.input_format_msgpack_number_of_columns;
|
||||
format_settings.msgpack.output_uuid_representation = settings.output_format_msgpack_uuid_representation;
|
||||
format_settings.max_rows_to_read_for_schema_inference = settings.input_format_max_rows_to_read_for_schema_inference;
|
||||
format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference;
|
||||
|
||||
/// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context
|
||||
if (format_settings.schema.is_server)
|
||||
@ -371,7 +382,7 @@ SchemaReaderPtr FormatFactory::getSchemaReader(
|
||||
throw Exception("FormatFactory: Format " + name + " doesn't support schema inference.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
auto format_settings = _format_settings ? *_format_settings : getFormatSettings(context);
|
||||
return schema_reader_creator(buf, format_settings, context);
|
||||
return schema_reader_creator(buf, format_settings);
|
||||
}
|
||||
|
||||
ExternalSchemaReaderPtr FormatFactory::getExternalSchemaReader(
|
||||
|
@ -97,7 +97,7 @@ private:
|
||||
/// The checker should return true if format support append.
|
||||
using AppendSupportChecker = std::function<bool(const FormatSettings & settings)>;
|
||||
|
||||
using SchemaReaderCreator = std::function<SchemaReaderPtr(ReadBuffer & in, const FormatSettings & settings, ContextPtr context)>;
|
||||
using SchemaReaderCreator = std::function<SchemaReaderPtr(ReadBuffer & in, const FormatSettings & settings)>;
|
||||
using ExternalSchemaReaderCreator = std::function<ExternalSchemaReaderPtr(const FormatSettings & settings)>;
|
||||
|
||||
struct Creators
|
||||
|
@ -36,6 +36,8 @@ struct FormatSettings
|
||||
bool seekable_read = true;
|
||||
UInt64 max_rows_to_read_for_schema_inference = 100;
|
||||
|
||||
String column_names_for_schema_inference = "";
|
||||
|
||||
enum class DateTimeInputFormat
|
||||
{
|
||||
Basic, /// Default format for fast parsing: YYYY-MM-DD hh:mm:ss (ISO-8601 without fractional part and timezone) or NNNNNNNNNN unix timestamp.
|
||||
@ -77,6 +79,7 @@ struct FormatSettings
|
||||
bool low_cardinality_as_dictionary = false;
|
||||
bool import_nested = false;
|
||||
bool allow_missing_columns = false;
|
||||
bool skip_columns_with_unsupported_types_in_schema_inference = false;
|
||||
bool case_insensitive_column_matching = false;
|
||||
} arrow;
|
||||
|
||||
@ -104,6 +107,7 @@ struct FormatSettings
|
||||
bool input_format_arrays_as_nested_csv = false;
|
||||
String null_representation = "\\N";
|
||||
char tuple_delimiter = ',';
|
||||
bool input_format_use_best_effort_in_schema_inference = true;
|
||||
} csv;
|
||||
|
||||
struct HiveText
|
||||
@ -141,6 +145,7 @@ struct FormatSettings
|
||||
UInt64 row_group_size = 1000000;
|
||||
bool import_nested = false;
|
||||
bool allow_missing_columns = false;
|
||||
bool skip_columns_with_unsupported_types_in_schema_inference = false;
|
||||
bool case_insensitive_column_matching = false;
|
||||
std::unordered_set<int> skip_row_groups = {};
|
||||
} parquet;
|
||||
@ -209,6 +214,7 @@ struct FormatSettings
|
||||
bool crlf_end_of_line = false;
|
||||
String null_representation = "\\N";
|
||||
bool input_format_enum_as_number = false;
|
||||
bool input_format_use_best_effort_in_schema_inference = true;
|
||||
} tsv;
|
||||
|
||||
struct
|
||||
@ -223,6 +229,7 @@ struct FormatSettings
|
||||
bool import_nested = false;
|
||||
bool allow_missing_columns = false;
|
||||
int64_t row_batch_size = 100'000;
|
||||
bool skip_columns_with_unsupported_types_in_schema_inference = false;
|
||||
bool case_insensitive_column_matching = false;
|
||||
std::unordered_set<int> skip_stripes = {};
|
||||
} orc;
|
||||
|
@ -105,8 +105,11 @@ ColumnsDescription readSchemaFromFormat(const String & format_name, const std::o
|
||||
return readSchemaFromFormat(format_name, format_settings, read_buffer_creator, context, buf_out);
|
||||
}
|
||||
|
||||
DataTypePtr generalizeDataType(DataTypePtr type)
|
||||
DataTypePtr makeNullableRecursivelyAndCheckForNothing(DataTypePtr type)
|
||||
{
|
||||
if (!type)
|
||||
return nullptr;
|
||||
|
||||
WhichDataType which(type);
|
||||
|
||||
if (which.isNothing())
|
||||
@ -115,16 +118,13 @@ DataTypePtr generalizeDataType(DataTypePtr type)
|
||||
if (which.isNullable())
|
||||
{
|
||||
const auto * nullable_type = assert_cast<const DataTypeNullable *>(type.get());
|
||||
return generalizeDataType(nullable_type->getNestedType());
|
||||
return makeNullableRecursivelyAndCheckForNothing(nullable_type->getNestedType());
|
||||
}
|
||||
|
||||
if (isNumber(type))
|
||||
return makeNullable(std::make_shared<DataTypeFloat64>());
|
||||
|
||||
if (which.isArray())
|
||||
{
|
||||
const auto * array_type = assert_cast<const DataTypeArray *>(type.get());
|
||||
auto nested_type = generalizeDataType(array_type->getNestedType());
|
||||
auto nested_type = makeNullableRecursivelyAndCheckForNothing(array_type->getNestedType());
|
||||
return nested_type ? std::make_shared<DataTypeArray>(nested_type) : nullptr;
|
||||
}
|
||||
|
||||
@ -134,7 +134,7 @@ DataTypePtr generalizeDataType(DataTypePtr type)
|
||||
DataTypes nested_types;
|
||||
for (const auto & element : tuple_type->getElements())
|
||||
{
|
||||
auto nested_type = generalizeDataType(element);
|
||||
auto nested_type = makeNullableRecursivelyAndCheckForNothing(element);
|
||||
if (!nested_type)
|
||||
return nullptr;
|
||||
nested_types.push_back(nested_type);
|
||||
@ -145,19 +145,27 @@ DataTypePtr generalizeDataType(DataTypePtr type)
|
||||
if (which.isMap())
|
||||
{
|
||||
const auto * map_type = assert_cast<const DataTypeMap *>(type.get());
|
||||
auto key_type = removeNullable(generalizeDataType(map_type->getKeyType()));
|
||||
auto value_type = generalizeDataType(map_type->getValueType());
|
||||
return key_type && value_type ? std::make_shared<DataTypeMap>(key_type, value_type) : nullptr;
|
||||
auto key_type = makeNullableRecursivelyAndCheckForNothing(map_type->getKeyType());
|
||||
auto value_type = makeNullableRecursivelyAndCheckForNothing(map_type->getValueType());
|
||||
return key_type && value_type ? std::make_shared<DataTypeMap>(removeNullable(key_type), value_type) : nullptr;
|
||||
}
|
||||
|
||||
if (which.isLowCarnality())
|
||||
{
|
||||
const auto * lc_type = assert_cast<const DataTypeLowCardinality *>(type.get());
|
||||
auto nested_type = generalizeDataType(lc_type->getDictionaryType());
|
||||
auto nested_type = makeNullableRecursivelyAndCheckForNothing(lc_type->getDictionaryType());
|
||||
return nested_type ? std::make_shared<DataTypeLowCardinality>(nested_type) : nullptr;
|
||||
}
|
||||
|
||||
return makeNullable(type);
|
||||
}
|
||||
|
||||
NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header)
|
||||
{
|
||||
NamesAndTypesList result;
|
||||
for (auto & [name, type] : header.getNamesAndTypesList())
|
||||
result.emplace_back(name, makeNullableRecursivelyAndCheckForNothing(type));
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -29,14 +29,16 @@ ColumnsDescription readSchemaFromFormat(
|
||||
ContextPtr context,
|
||||
std::unique_ptr<ReadBuffer> & buf_out);
|
||||
|
||||
/// Convert type to the most general type:
|
||||
/// - IntN, UIntN, FloatN, Decimal -> Float64
|
||||
/// Make type Nullable recursively:
|
||||
/// - Type -> Nullable(type)
|
||||
/// - Array(Type) -> Array(Nullable(Type))
|
||||
/// - Tuple(Type1, ..., TypeN) -> Tuple(Nullable(Type1), ..., Nullable(TypeN))
|
||||
/// - Map(KeyType, ValueType) -> Map(KeyType, Nullable(ValueType))
|
||||
/// - LowCardinality(Type) -> LowCardinality(Nullable(Type))
|
||||
/// If type is Nothing or one of the nested types is Nothing, return nullptr.
|
||||
DataTypePtr generalizeDataType(DataTypePtr type);
|
||||
DataTypePtr makeNullableRecursivelyAndCheckForNothing(DataTypePtr type);
|
||||
|
||||
/// Call makeNullableRecursivelyAndCheckForNothing for all types
|
||||
/// in the block and return names and types.
|
||||
NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header);
|
||||
}
|
||||
|
@ -41,9 +41,9 @@ public:
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
return ColumnT().createColumnConst(input_rows_count, constant_value);
|
||||
return result_type->createColumnConst(input_rows_count, constant_value);
|
||||
}
|
||||
|
||||
private:
|
||||
|
71
src/Functions/FunctionsTransactionCounters.cpp
Normal file
71
src/Functions/FunctionsTransactionCounters.cpp
Normal file
@ -0,0 +1,71 @@
|
||||
#include <Functions/FunctionConstantBase.h>
|
||||
#include <DataTypes/DataTypeNothing.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/MergeTreeTransaction.h>
|
||||
#include <Interpreters/TransactionLog.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
class FunctionTransactionID : public FunctionConstantBase<FunctionTransactionID, Tuple, DataTypeNothing>
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "transactionID";
|
||||
static Tuple getValue(const MergeTreeTransactionPtr & txn)
|
||||
{
|
||||
Tuple res;
|
||||
if (txn)
|
||||
res = {txn->tid.start_csn, txn->tid.local_tid, txn->tid.host_id};
|
||||
else
|
||||
res = {UInt64(0), UInt64(0), UUIDHelpers::Nil};
|
||||
return res;
|
||||
}
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { return getTransactionIDDataType(); }
|
||||
|
||||
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTransactionID>(context); }
|
||||
explicit FunctionTransactionID(ContextPtr context) : FunctionConstantBase(getValue(context->getCurrentTransaction()), context->isDistributed()) {}
|
||||
};
|
||||
|
||||
class FunctionTransactionLatestSnapshot : public FunctionConstantBase<FunctionTransactionLatestSnapshot, UInt64, DataTypeUInt64>
|
||||
{
|
||||
static UInt64 getLatestSnapshot(ContextPtr context)
|
||||
{
|
||||
context->checkTransactionsAreAllowed(/* explicit_tcl_query */ true);
|
||||
return TransactionLog::instance().getLatestSnapshot();
|
||||
}
|
||||
public:
|
||||
static constexpr auto name = "transactionLatestSnapshot";
|
||||
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTransactionLatestSnapshot>(context); }
|
||||
explicit FunctionTransactionLatestSnapshot(ContextPtr context) : FunctionConstantBase(getLatestSnapshot(context), context->isDistributed()) {}
|
||||
};
|
||||
|
||||
class FunctionTransactionOldestSnapshot : public FunctionConstantBase<FunctionTransactionOldestSnapshot, UInt64, DataTypeUInt64>
|
||||
{
|
||||
static UInt64 getOldestSnapshot(ContextPtr context)
|
||||
{
|
||||
context->checkTransactionsAreAllowed(/* explicit_tcl_query */ true);
|
||||
return TransactionLog::instance().getOldestSnapshot();
|
||||
}
|
||||
public:
|
||||
static constexpr auto name = "transactionOldestSnapshot";
|
||||
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTransactionOldestSnapshot>(context); }
|
||||
explicit FunctionTransactionOldestSnapshot(ContextPtr context) : FunctionConstantBase(getOldestSnapshot(context), context->isDistributed()) {}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void registerFunctionsTransactionCounters(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionTransactionID>();
|
||||
factory.registerFunction<FunctionTransactionLatestSnapshot>();
|
||||
factory.registerFunction<FunctionTransactionOldestSnapshot>();
|
||||
}
|
||||
|
||||
}
|
@ -75,6 +75,7 @@ void registerFunctionFile(FunctionFactory &);
|
||||
void registerFunctionConnectionId(FunctionFactory &);
|
||||
void registerFunctionPartitionId(FunctionFactory &);
|
||||
void registerFunctionIsIPAddressContainedIn(FunctionFactory &);
|
||||
void registerFunctionsTransactionCounters(FunctionFactory & factory);
|
||||
void registerFunctionQueryID(FunctionFactory &);
|
||||
void registerFunctionInitialQueryID(FunctionFactory &);
|
||||
void registerFunctionServerUUID(FunctionFactory &);
|
||||
@ -163,6 +164,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
|
||||
registerFunctionConnectionId(factory);
|
||||
registerFunctionPartitionId(factory);
|
||||
registerFunctionIsIPAddressContainedIn(factory);
|
||||
registerFunctionsTransactionCounters(factory);
|
||||
registerFunctionQueryID(factory);
|
||||
registerFunctionInitialQueryID(factory);
|
||||
registerFunctionServerUUID(factory);
|
||||
|
@ -26,6 +26,7 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_PARSE_DATETIME;
|
||||
extern const int CANNOT_PARSE_DATE;
|
||||
extern const int INCORRECT_DATA;
|
||||
extern const int ATTEMPT_TO_READ_AFTER_EOF;
|
||||
}
|
||||
|
||||
template <typename IteratorSrc, typename IteratorDst>
|
||||
@ -137,6 +138,12 @@ void assertEOF(ReadBuffer & buf)
|
||||
throwAtAssertionFailed("eof", buf);
|
||||
}
|
||||
|
||||
void assertNotEOF(ReadBuffer & buf)
|
||||
{
|
||||
if (buf.eof())
|
||||
throw Exception("Attempt to read after EOF", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);
|
||||
}
|
||||
|
||||
|
||||
void assertStringCaseInsensitive(const char * s, ReadBuffer & buf)
|
||||
{
|
||||
@ -1366,6 +1373,7 @@ void readQuotedFieldIntoString(String & s, ReadBuffer & buf)
|
||||
/// - Tuples: (...)
|
||||
/// - Maps: {...}
|
||||
/// - NULL
|
||||
/// - Bool: true/false
|
||||
/// - Number: integer, float, decimal.
|
||||
|
||||
if (*buf.position() == '\'')
|
||||
@ -1394,6 +1402,16 @@ void readQuotedFieldIntoString(String & s, ReadBuffer & buf)
|
||||
s.append("NaN");
|
||||
}
|
||||
}
|
||||
else if (checkCharCaseInsensitive('t', buf))
|
||||
{
|
||||
assertStringCaseInsensitive("rue", buf);
|
||||
s.append("true");
|
||||
}
|
||||
else if (checkCharCaseInsensitive('f', buf))
|
||||
{
|
||||
assertStringCaseInsensitive("alse", buf);
|
||||
s.append("false");
|
||||
}
|
||||
else
|
||||
{
|
||||
/// It's an integer, float or decimal. They all can be parsed as float.
|
||||
|
@ -163,6 +163,7 @@ void readVectorBinary(std::vector<T> & v, ReadBuffer & buf, size_t MAX_VECTOR_SI
|
||||
|
||||
void assertString(const char * s, ReadBuffer & buf);
|
||||
void assertEOF(ReadBuffer & buf);
|
||||
void assertNotEOF(ReadBuffer & buf);
|
||||
|
||||
[[noreturn]] void throwAtAssertionFailed(const char * s, ReadBuffer & buf);
|
||||
|
||||
|
@ -77,8 +77,9 @@ struct ReadSettings
|
||||
|
||||
size_t remote_fs_read_max_backoff_ms = 10000;
|
||||
size_t remote_fs_read_backoff_max_tries = 4;
|
||||
bool remote_fs_enable_cache = true;
|
||||
size_t remote_fs_cache_max_wait_sec = 1;
|
||||
bool enable_filesystem_cache = true;
|
||||
size_t filesystem_cache_max_wait_sec = 1;
|
||||
bool read_from_filesystem_cache_if_exists_otherwise_bypass_cache = false;
|
||||
|
||||
size_t remote_read_min_bytes_for_seek = DBMS_DEFAULT_BUFFER_SIZE;
|
||||
|
||||
|
@ -133,7 +133,6 @@ off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence) // NOLINT
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
void WriteBufferFromFileDescriptor::truncate(off_t length) // NOLINT
|
||||
{
|
||||
int res = ftruncate(fd, length);
|
||||
|
@ -2,25 +2,28 @@
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
# include <IO/WriteBufferFromS3.h>
|
||||
# include <IO/WriteHelpers.h>
|
||||
#include <base/logger_useful.h>
|
||||
#include <Common/FileCache.h>
|
||||
|
||||
# include <aws/s3/S3Client.h>
|
||||
# include <aws/s3/model/CreateMultipartUploadRequest.h>
|
||||
# include <aws/s3/model/CompleteMultipartUploadRequest.h>
|
||||
# include <aws/s3/model/PutObjectRequest.h>
|
||||
# include <aws/s3/model/UploadPartRequest.h>
|
||||
# include <base/logger_useful.h>
|
||||
#include <IO/WriteBufferFromS3.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
# include <utility>
|
||||
#include <aws/s3/S3Client.h>
|
||||
#include <aws/s3/model/CreateMultipartUploadRequest.h>
|
||||
#include <aws/s3/model/CompleteMultipartUploadRequest.h>
|
||||
#include <aws/s3/model/PutObjectRequest.h>
|
||||
#include <aws/s3/model/UploadPartRequest.h>
|
||||
|
||||
#include <utility>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event S3WriteBytes;
|
||||
extern const Event RemoteFSCacheDownloadBytes;
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
// S3 protocol does not allow to have multipart upload with more than 10000 parts.
|
||||
@ -32,6 +35,7 @@ const int S3_WARN_MAX_PARTS = 10000;
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int S3_ERROR;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
struct WriteBufferFromS3::UploadPartTask
|
||||
@ -40,6 +44,7 @@ struct WriteBufferFromS3::UploadPartTask
|
||||
bool is_finised = false;
|
||||
std::string tag;
|
||||
std::exception_ptr exception;
|
||||
std::optional<FileSegmentsHolder> cache_files;
|
||||
};
|
||||
|
||||
struct WriteBufferFromS3::PutObjectTask
|
||||
@ -47,6 +52,7 @@ struct WriteBufferFromS3::PutObjectTask
|
||||
Aws::S3::Model::PutObjectRequest req;
|
||||
bool is_finised = false;
|
||||
std::exception_ptr exception;
|
||||
std::optional<FileSegmentsHolder> cache_files;
|
||||
};
|
||||
|
||||
WriteBufferFromS3::WriteBufferFromS3(
|
||||
@ -59,7 +65,9 @@ WriteBufferFromS3::WriteBufferFromS3(
|
||||
size_t max_single_part_upload_size_,
|
||||
std::optional<std::map<String, String>> object_metadata_,
|
||||
size_t buffer_size_,
|
||||
ScheduleFunc schedule_)
|
||||
ScheduleFunc schedule_,
|
||||
const String & blob_name_,
|
||||
FileCachePtr cache_)
|
||||
: BufferWithOwnMemory<WriteBuffer>(buffer_size_, nullptr, 0)
|
||||
, bucket(bucket_)
|
||||
, key(key_)
|
||||
@ -70,6 +78,8 @@ WriteBufferFromS3::WriteBufferFromS3(
|
||||
, upload_part_size_multiply_threshold(upload_part_size_multiply_threshold_)
|
||||
, max_single_part_upload_size(max_single_part_upload_size_)
|
||||
, schedule(std::move(schedule_))
|
||||
, blob_name(blob_name_)
|
||||
, cache(cache_)
|
||||
{
|
||||
allocateBuffer();
|
||||
}
|
||||
@ -83,7 +93,41 @@ void WriteBufferFromS3::nextImpl()
|
||||
if (temporary_buffer->tellp() == -1)
|
||||
allocateBuffer();
|
||||
|
||||
temporary_buffer->write(working_buffer.begin(), offset());
|
||||
size_t size = offset();
|
||||
temporary_buffer->write(working_buffer.begin(), size);
|
||||
|
||||
ThreadGroupStatusPtr running_group = CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup()
|
||||
? CurrentThread::get().getThreadGroup()
|
||||
: MainThreadStatus::getInstance().getThreadGroup();
|
||||
|
||||
if (cacheEnabled())
|
||||
{
|
||||
if (blob_name.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty blob name");
|
||||
|
||||
auto cache_key = cache->hash(blob_name);
|
||||
file_segments_holder.emplace(cache->setDownloading(cache_key, current_download_offset, size));
|
||||
current_download_offset += size;
|
||||
|
||||
size_t remaining_size = size;
|
||||
auto & file_segments = file_segments_holder->file_segments;
|
||||
for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end(); ++file_segment_it)
|
||||
{
|
||||
auto & file_segment = *file_segment_it;
|
||||
size_t current_size = std::min(file_segment->range().size(), remaining_size);
|
||||
remaining_size -= current_size;
|
||||
|
||||
if (file_segment->reserve(current_size))
|
||||
{
|
||||
file_segment->writeInMemory(working_buffer.begin(), current_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
file_segments.erase(file_segment_it, file_segments.end());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::S3WriteBytes, offset());
|
||||
|
||||
@ -95,7 +139,6 @@ void WriteBufferFromS3::nextImpl()
|
||||
|
||||
if (!multipart_upload_id.empty() && last_part_size > upload_part_size)
|
||||
{
|
||||
|
||||
writePart();
|
||||
|
||||
allocateBuffer();
|
||||
@ -126,6 +169,11 @@ WriteBufferFromS3::~WriteBufferFromS3()
|
||||
}
|
||||
}
|
||||
|
||||
bool WriteBufferFromS3::cacheEnabled() const
|
||||
{
|
||||
return cache != nullptr;
|
||||
}
|
||||
|
||||
void WriteBufferFromS3::preFinalize()
|
||||
{
|
||||
next();
|
||||
@ -213,6 +261,13 @@ void WriteBufferFromS3::writePart()
|
||||
}
|
||||
|
||||
fillUploadRequest(task->req, part_number);
|
||||
|
||||
if (file_segments_holder)
|
||||
{
|
||||
task->cache_files.emplace(std::move(*file_segments_holder));
|
||||
file_segments_holder.reset();
|
||||
}
|
||||
|
||||
schedule([this, task]()
|
||||
{
|
||||
try
|
||||
@ -224,6 +279,15 @@ void WriteBufferFromS3::writePart()
|
||||
task->exception = std::current_exception();
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
finalizeCacheIfNeeded(task->cache_files);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard lock(bg_tasks_mutex);
|
||||
task->is_finised = true;
|
||||
@ -240,8 +304,14 @@ void WriteBufferFromS3::writePart()
|
||||
{
|
||||
UploadPartTask task;
|
||||
fillUploadRequest(task.req, part_tags.size() + 1);
|
||||
if (file_segments_holder)
|
||||
{
|
||||
task.cache_files.emplace(std::move(*file_segments_holder));
|
||||
file_segments_holder.reset();
|
||||
}
|
||||
processUploadRequest(task);
|
||||
part_tags.push_back(task.tag);
|
||||
finalizeCacheIfNeeded(task.cache_files);
|
||||
}
|
||||
}
|
||||
|
||||
@ -328,7 +398,14 @@ void WriteBufferFromS3::makeSinglepartUpload()
|
||||
if (schedule)
|
||||
{
|
||||
put_object_task = std::make_unique<PutObjectTask>();
|
||||
|
||||
fillPutRequest(put_object_task->req);
|
||||
if (file_segments_holder)
|
||||
{
|
||||
put_object_task->cache_files.emplace(std::move(*file_segments_holder));
|
||||
file_segments_holder.reset();
|
||||
}
|
||||
|
||||
schedule([this]()
|
||||
{
|
||||
try
|
||||
@ -340,6 +417,15 @@ void WriteBufferFromS3::makeSinglepartUpload()
|
||||
put_object_task->exception = std::current_exception();
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
finalizeCacheIfNeeded(put_object_task->cache_files);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard lock(bg_tasks_mutex);
|
||||
put_object_task->is_finised = true;
|
||||
@ -349,14 +435,19 @@ void WriteBufferFromS3::makeSinglepartUpload()
|
||||
/// Releasing lock and condvar notification.
|
||||
bg_tasks_condvar.notify_one();
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
PutObjectTask task;
|
||||
fillPutRequest(task.req);
|
||||
if (file_segments_holder)
|
||||
{
|
||||
task.cache_files.emplace(std::move(*file_segments_holder));
|
||||
file_segments_holder.reset();
|
||||
}
|
||||
processPutRequest(task);
|
||||
finalizeCacheIfNeeded(task.cache_files);
|
||||
}
|
||||
}
|
||||
|
||||
@ -384,6 +475,28 @@ void WriteBufferFromS3::processPutRequest(PutObjectTask & task)
|
||||
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||
}
|
||||
|
||||
void WriteBufferFromS3::finalizeCacheIfNeeded(std::optional<FileSegmentsHolder> & file_segments_holder)
|
||||
{
|
||||
if (!file_segments_holder)
|
||||
return;
|
||||
|
||||
auto & file_segments = file_segments_holder->file_segments;
|
||||
for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end();)
|
||||
{
|
||||
try
|
||||
{
|
||||
size_t size = (*file_segment_it)->finalizeWrite();
|
||||
file_segment_it = file_segments.erase(file_segment_it);
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::RemoteFSCacheDownloadBytes, size);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WriteBufferFromS3::waitForReadyBackGroundTasks()
|
||||
{
|
||||
if (schedule)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user