Merge branch 'master' into ascii_function

This commit is contained in:
李扬 2022-10-28 07:05:58 -05:00 committed by GitHub
commit ddaf1bf7b2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
161 changed files with 2918 additions and 2367 deletions

View File

@ -1,64 +0,0 @@
name: Sonar Cloud
on:
push:
branches:
- master
pull_request:
types: [opened, synchronize, reopened]
env:
CC: clang-15
CXX: clang++-15
jobs:
sonar_cloud:
name: Sonar Cloud
runs-on: [self-hosted, builder]
env:
SONAR_SCANNER_VERSION: 4.7.0.2747
SONAR_SERVER_URL: "https://sonarcloud.io"
BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
submodules: true
- name: Set up JDK 11
uses: actions/setup-java@v1
with:
java-version: 11
- name: Download and set up sonar-scanner
env:
SONAR_SCANNER_DOWNLOAD_URL: https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-${{ env.SONAR_SCANNER_VERSION }}-linux.zip
run: |
mkdir -p $HOME/.sonar
curl -sSLo $HOME/.sonar/sonar-scanner.zip ${{ env.SONAR_SCANNER_DOWNLOAD_URL }}
unzip -o $HOME/.sonar/sonar-scanner.zip -d $HOME/.sonar/
echo "$HOME/.sonar/sonar-scanner-${{ env.SONAR_SCANNER_VERSION }}-linux/bin" >> $GITHUB_PATH
- name: Download and set up build-wrapper
env:
BUILD_WRAPPER_DOWNLOAD_URL: ${{ env.SONAR_SERVER_URL }}/static/cpp/build-wrapper-linux-x86.zip
run: |
curl -sSLo $HOME/.sonar/build-wrapper-linux-x86.zip ${{ env.BUILD_WRAPPER_DOWNLOAD_URL }}
unzip -o $HOME/.sonar/build-wrapper-linux-x86.zip -d $HOME/.sonar/
echo "$HOME/.sonar/build-wrapper-linux-x86" >> $GITHUB_PATH
- name: Set Up Build Tools
run: |
sudo apt-get update
sudo apt-get install -yq git cmake ccache python3 ninja-build
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
- name: Run build-wrapper
run: |
mkdir build
cd build
cmake ..
cd ..
build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/
- name: Run sonar-scanner
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
run: |
sonar-scanner \
--define sonar.host.url="${{ env.SONAR_SERVER_URL }}" \
--define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \
--define sonar.projectKey="clickhouse-java" \
--define sonar.organization="ClickHouse"

View File

@ -88,7 +88,6 @@
* Allow to use `Date32` arguments for `dateName` function. [#42554](https://github.com/ClickHouse/ClickHouse/pull/42554) ([Roman Vasin](https://github.com/rvasin)).
* Now filters with NULL literals will be used during index analysis. [#34063](https://github.com/ClickHouse/ClickHouse/issues/34063). [#41842](https://github.com/ClickHouse/ClickHouse/pull/41842) ([Amos Bird](https://github.com/amosbird)).
* Merge parts if every part in the range is older than a certain threshold. The threshold can be set by using `min_age_to_force_merge_seconds`. This closes [#35836](https://github.com/ClickHouse/ClickHouse/issues/35836). [#42423](https://github.com/ClickHouse/ClickHouse/pull/42423) ([Antonio Andelic](https://github.com/antonio2368)). This is continuation of [#39550i](https://github.com/ClickHouse/ClickHouse/pull/39550) by [@fastio](https://github.com/fastio) who implemented most of the logic.
* Added new infrastructure for query analysis and planning under `allow_experimental_analyzer` setting. [#31796](https://github.com/ClickHouse/ClickHouse/pull/31796) ([Maksim Kita](https://github.com/kitaisreal)).
* Improve the time to recover lost keeper connections. [#42541](https://github.com/ClickHouse/ClickHouse/pull/42541) ([Raúl Marín](https://github.com/Algunenano)).
#### Build/Testing/Packaging Improvement
@ -143,7 +142,6 @@
* Fix bad_cast assert during INSERT into `Annoy` indexes over non-Float32 columns. `Annoy` indices is an experimental feature. [#42485](https://github.com/ClickHouse/ClickHouse/pull/42485) ([Robert Schulze](https://github.com/rschu1ze)).
* Arithmetic operator with Date or DateTime and 128 or 256-bit integer was referencing uninitialized memory. [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix unexpected table loading error when partition key contains alias function names during server upgrade. [#36379](https://github.com/ClickHouse/ClickHouse/pull/36379) ([Amos Bird](https://github.com/amosbird)).
* Fixes a crash in `JSONExtract` with `LowCardinality`. [#42633](https://github.com/ClickHouse/ClickHouse/pull/42633) ([Anton Popov](https://github.com/CurtizJ)).
### <a id="229"></a> ClickHouse release 22.9, 2022-09-22

2
contrib/libcxx vendored

@ -1 +1 @@
Subproject commit 172b2ae074f6755145b91c53a95c8540c1468239
Subproject commit 4db7f838afd3139eb3761694b04d31275df45d2d

View File

@ -25,6 +25,7 @@ set(SRCS
"${LIBCXX_SOURCE_DIR}/src/ios.cpp"
"${LIBCXX_SOURCE_DIR}/src/ios.instantiations.cpp"
"${LIBCXX_SOURCE_DIR}/src/iostream.cpp"
"${LIBCXX_SOURCE_DIR}/src/legacy_debug_handler.cpp"
"${LIBCXX_SOURCE_DIR}/src/legacy_pointer_safety.cpp"
"${LIBCXX_SOURCE_DIR}/src/locale.cpp"
"${LIBCXX_SOURCE_DIR}/src/memory.cpp"
@ -49,6 +50,7 @@ set(SRCS
"${LIBCXX_SOURCE_DIR}/src/valarray.cpp"
"${LIBCXX_SOURCE_DIR}/src/variant.cpp"
"${LIBCXX_SOURCE_DIR}/src/vector.cpp"
"${LIBCXX_SOURCE_DIR}/src/verbose_abort.cpp"
)
add_library(cxx ${SRCS})

2
contrib/libcxxabi vendored

@ -1 +1 @@
Subproject commit 6eb7cc7a7bdd779e6734d1b9fb451df2274462d7
Subproject commit a736a6b3c6a7b8aae2ebad629ca21b2c55b4820e

View File

@ -9,6 +9,7 @@ set(SRCS
"${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp"
# "${LIBCXXABI_SOURCE_DIR}/src/cxa_noexception.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_vector.cpp"

2
contrib/rocksdb vendored

@ -1 +1 @@
Subproject commit e7c2b2f7bcf3b4b33892a1a6d25c32a93edfbdb9
Subproject commit 2c8998e26c6d46b27c710d7829c3a15e34959f70

2
contrib/zlib-ng vendored

@ -1 +1 @@
Subproject commit bffad6f6fe74d6a2f92e2668390664a926c68733
Subproject commit 50f0eae1a411764cd6d1e85b3ce471438acd3c1c

View File

@ -27,9 +27,14 @@ RUN apt-get update \
tar \
tzdata \
unixodbc \
python3-pip \
libcurl4-openssl-dev \
libssl-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN pip3 install pycurl
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH

View File

@ -0,0 +1,29 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.7.7.24-stable (02ad1f979a8) FIXME as compared to v22.7.6.74-stable (c00ffb3c11a)
#### Bug Fix
* Backported in [#42433](https://github.com/ClickHouse/ClickHouse/issues/42433): - Choose correct aggregation method for LowCardinality with BigInt. [#42342](https://github.com/ClickHouse/ClickHouse/pull/42342) ([Duc Canh Le](https://github.com/canhld94)).
#### Build/Testing/Packaging Improvement
* Backported in [#42329](https://github.com/ClickHouse/ClickHouse/issues/42329): Update cctz to the latest master, update tzdb to 2020e. [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)).
* Backported in [#42359](https://github.com/ClickHouse/ClickHouse/issues/42359): Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#42268](https://github.com/ClickHouse/ClickHouse/issues/42268): Fix reusing of files > 4GB from base backup. [#42146](https://github.com/ClickHouse/ClickHouse/pull/42146) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#42299](https://github.com/ClickHouse/ClickHouse/issues/42299): Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#42386](https://github.com/ClickHouse/ClickHouse/issues/42386): `ALTER UPDATE` of attached part (with columns different from table schema) could create an invalid `columns.txt` metadata on disk. Reading from such part could fail with errors or return invalid data. Fixes [#42161](https://github.com/ClickHouse/ClickHouse/issues/42161). [#42319](https://github.com/ClickHouse/ClickHouse/pull/42319) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#42498](https://github.com/ClickHouse/ClickHouse/issues/42498): Setting `additional_table_filters` were not applied to `Distributed` storage. Fixes [#41692](https://github.com/ClickHouse/ClickHouse/issues/41692). [#42322](https://github.com/ClickHouse/ClickHouse/pull/42322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#42593](https://github.com/ClickHouse/ClickHouse/issues/42593): This closes [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Add a warning message to release.py script, require release type [#41975](https://github.com/ClickHouse/ClickHouse/pull/41975) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Revert [#27787](https://github.com/ClickHouse/ClickHouse/issues/27787) [#42136](https://github.com/ClickHouse/ClickHouse/pull/42136) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).

View File

@ -0,0 +1,37 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.8.7.34-lts (3c38e5e8ab9) FIXME as compared to v22.8.6.71-lts (7bf38a43e30)
#### Improvement
* Backported in [#42096](https://github.com/ClickHouse/ClickHouse/issues/42096): Replace back `clickhouse su` command with `sudo -u` in start in order to respect limits in `/etc/security/limits.conf`. [#41847](https://github.com/ClickHouse/ClickHouse/pull/41847) ([Eugene Konkov](https://github.com/ekonkov)).
#### Bug Fix
* Backported in [#42434](https://github.com/ClickHouse/ClickHouse/issues/42434): - Choose correct aggregation method for LowCardinality with BigInt. [#42342](https://github.com/ClickHouse/ClickHouse/pull/42342) ([Duc Canh Le](https://github.com/canhld94)).
#### Build/Testing/Packaging Improvement
* Backported in [#42296](https://github.com/ClickHouse/ClickHouse/issues/42296): Update cctz to the latest master, update tzdb to 2020e. [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)).
* Backported in [#42360](https://github.com/ClickHouse/ClickHouse/issues/42360): Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#42489](https://github.com/ClickHouse/ClickHouse/issues/42489): Removed skipping of mutations in unaffected partitions of `MergeTree` tables, because this feature never worked correctly and might cause resurrection of finished mutations. [#40589](https://github.com/ClickHouse/ClickHouse/pull/40589) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#42121](https://github.com/ClickHouse/ClickHouse/issues/42121): Fixed "Part ... intersects part ..." error that might happen in extremely rare cases if replica was restarted just after detaching some part as broken. [#41741](https://github.com/ClickHouse/ClickHouse/pull/41741) ([Alexander Tokmakov](https://github.com/tavplubix)).
* - Prevent crash when passing wrong aggregation states to groupBitmap*. [#41972](https://github.com/ClickHouse/ClickHouse/pull/41972) ([Raúl Marín](https://github.com/Algunenano)).
* - Fix read bytes/rows in X-ClickHouse-Summary with materialized views. [#41973](https://github.com/ClickHouse/ClickHouse/pull/41973) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#42269](https://github.com/ClickHouse/ClickHouse/issues/42269): Fix reusing of files > 4GB from base backup. [#42146](https://github.com/ClickHouse/ClickHouse/pull/42146) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#42300](https://github.com/ClickHouse/ClickHouse/issues/42300): Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#42387](https://github.com/ClickHouse/ClickHouse/issues/42387): `ALTER UPDATE` of attached part (with columns different from table schema) could create an invalid `columns.txt` metadata on disk. Reading from such part could fail with errors or return invalid data. Fixes [#42161](https://github.com/ClickHouse/ClickHouse/issues/42161). [#42319](https://github.com/ClickHouse/ClickHouse/pull/42319) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#42499](https://github.com/ClickHouse/ClickHouse/issues/42499): Setting `additional_table_filters` were not applied to `Distributed` storage. Fixes [#41692](https://github.com/ClickHouse/ClickHouse/issues/41692). [#42322](https://github.com/ClickHouse/ClickHouse/pull/42322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#42571](https://github.com/ClickHouse/ClickHouse/issues/42571): Fix buffer overflow in the processing of Decimal data types. This closes [#42451](https://github.com/ClickHouse/ClickHouse/issues/42451). [#42465](https://github.com/ClickHouse/ClickHouse/pull/42465) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#42594](https://github.com/ClickHouse/ClickHouse/issues/42594): This closes [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Add a warning message to release.py script, require release type [#41975](https://github.com/ClickHouse/ClickHouse/pull/41975) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Revert [#27787](https://github.com/ClickHouse/ClickHouse/issues/27787) [#42136](https://github.com/ClickHouse/ClickHouse/pull/42136) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).

View File

@ -0,0 +1,13 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.8.8.3-lts (ac5a6cababc) FIXME as compared to v22.8.7.34-lts (3c38e5e8ab9)
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#42677](https://github.com/ClickHouse/ClickHouse/issues/42677): keeper-fix: fix race in accessing logs while snapshot is being installed. [#40627](https://github.com/ClickHouse/ClickHouse/pull/40627) ([Antonio Andelic](https://github.com/antonio2368)).

View File

@ -0,0 +1,33 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.9.4.32-stable (3db8bcf1a70) FIXME as compared to v22.9.3.18-stable (0cb4b15d2fa)
#### Bug Fix
* Backported in [#42435](https://github.com/ClickHouse/ClickHouse/issues/42435): - Choose correct aggregation method for LowCardinality with BigInt. [#42342](https://github.com/ClickHouse/ClickHouse/pull/42342) ([Duc Canh Le](https://github.com/canhld94)).
#### Build/Testing/Packaging Improvement
* Backported in [#42297](https://github.com/ClickHouse/ClickHouse/issues/42297): Update cctz to the latest master, update tzdb to 2020e. [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)).
* Backported in [#42361](https://github.com/ClickHouse/ClickHouse/issues/42361): Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#42122](https://github.com/ClickHouse/ClickHouse/issues/42122): Fixed "Part ... intersects part ..." error that might happen in extremely rare cases if replica was restarted just after detaching some part as broken. [#41741](https://github.com/ClickHouse/ClickHouse/pull/41741) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#41938](https://github.com/ClickHouse/ClickHouse/issues/41938): Don't allow to create or alter merge tree tables with virtual column name _row_exists, which is reserved for lightweight delete. Fixed [#41716](https://github.com/ClickHouse/ClickHouse/issues/41716). [#41763](https://github.com/ClickHouse/ClickHouse/pull/41763) ([Jianmei Zhang](https://github.com/zhangjmruc)).
* Backported in [#42179](https://github.com/ClickHouse/ClickHouse/issues/42179): Fix reusing of files > 4GB from base backup. [#42146](https://github.com/ClickHouse/ClickHouse/pull/42146) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#42301](https://github.com/ClickHouse/ClickHouse/issues/42301): Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#42388](https://github.com/ClickHouse/ClickHouse/issues/42388): `ALTER UPDATE` of attached part (with columns different from table schema) could create an invalid `columns.txt` metadata on disk. Reading from such part could fail with errors or return invalid data. Fixes [#42161](https://github.com/ClickHouse/ClickHouse/issues/42161). [#42319](https://github.com/ClickHouse/ClickHouse/pull/42319) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#42500](https://github.com/ClickHouse/ClickHouse/issues/42500): Setting `additional_table_filters` were not applied to `Distributed` storage. Fixes [#41692](https://github.com/ClickHouse/ClickHouse/issues/41692). [#42322](https://github.com/ClickHouse/ClickHouse/pull/42322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#42581](https://github.com/ClickHouse/ClickHouse/issues/42581): This reverts [#40217](https://github.com/ClickHouse/ClickHouse/issues/40217) which introduced a regression in date/time functions. [#42367](https://github.com/ClickHouse/ClickHouse/pull/42367) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#42572](https://github.com/ClickHouse/ClickHouse/issues/42572): Fix buffer overflow in the processing of Decimal data types. This closes [#42451](https://github.com/ClickHouse/ClickHouse/issues/42451). [#42465](https://github.com/ClickHouse/ClickHouse/pull/42465) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#42595](https://github.com/ClickHouse/ClickHouse/issues/42595): This closes [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Add a warning message to release.py script, require release type [#41975](https://github.com/ClickHouse/ClickHouse/pull/41975) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Revert [#27787](https://github.com/ClickHouse/ClickHouse/issues/27787) [#42136](https://github.com/ClickHouse/ClickHouse/pull/42136) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).

View File

@ -87,14 +87,15 @@ SETTINGS
<summary>Устаревший способ создания таблицы</summary>
:::note "Attention"
Не используйте этот метод в новых проектах. По возможности переключите старые проекты на метод, описанный выше.
:::note "Attention"
Не используйте этот метод в новых проектах. По возможности переключите старые проекты на метод, описанный выше.
:::
``` sql
Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format
[, kafka_row_delimiter, kafka_schema, kafka_num_consumers, kafka_skip_broken_messages])
```
:::
</details>
## Описание {#opisanie}

View File

@ -39,9 +39,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
<summary>Устаревший способ создания таблицы</summary>
:::note "Attention"
Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
:::
:::note "Attention"
Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
:::
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(

View File

@ -43,9 +43,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
<summary>Устаревший способ создания таблицы</summary>
:::note "Attention"
Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
:::
:::note "Attention"
Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
:::
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
@ -59,7 +60,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
- `sign` — Имя столбца с типом строки: `1` — строка состояния, `-1` — строка отмены состояния.
Тип данных столбца — `Int8`.
Тип данных столбца — `Int8`.
</details>

View File

@ -55,9 +55,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
<summary>Устаревший способ создания таблицы</summary>
:::note "Attention"
Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
:::
:::note "Attention"
Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
:::
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(

View File

@ -115,9 +115,10 @@ ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDa
<summary>Устаревший способ создания таблицы</summary>
:::note "Attention"
Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ, описанный выше.
:::
:::note "Attention"
Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ, описанный выше.
:::
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(

View File

@ -42,9 +42,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
<summary>Устаревший способ создания таблицы</summary>
:::note "Attention"
Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
:::
:::note "Attention"
Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
:::
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(

View File

@ -316,9 +316,9 @@ SELECT toStartOfISOYear(toDate('2017-01-01')) AS ISOYear20170101;
Возвращается дата.
:::note "Attention"
Возвращаемое значение для некорректных дат зависит от реализации. ClickHouse может вернуть нулевую дату, выбросить исключение, или выполнить «естественное» перетекание дат между месяцами.
Возвращаемое значение для некорректных дат зависит от реализации. ClickHouse может вернуть нулевую дату, выбросить исключение, или выполнить «естественное» перетекание дат между месяцами.
:::
## toMonday {#tomonday}
Округляет дату или дату-с-временем вниз до ближайшего понедельника.

View File

@ -122,9 +122,9 @@ FROM t_null
Существует два варианта IN-ов с подзапросами (аналогично для JOIN-ов): обычный `IN` / `JOIN` и `GLOBAL IN` / `GLOBAL JOIN`. Они отличаются способом выполнения при распределённой обработке запроса.
:::note "Attention"
Помните, что алгоритмы, описанные ниже, могут работать иначе в зависимости от [настройки](../../operations/settings/settings.md) `distributed_product_mode`.
:::
:::note "Attention"
Помните, что алгоритмы, описанные ниже, могут работать иначе в зависимости от [настройки](../../operations/settings/settings.md) `distributed_product_mode`.
:::
При использовании обычного IN-а, запрос отправляется на удалённые серверы, и на каждом из них выполняются подзапросы в секциях `IN` / `JOIN`.
При использовании `GLOBAL IN` / `GLOBAL JOIN-а`, сначала выполняются все подзапросы для `GLOBAL IN` / `GLOBAL JOIN-ов`, и результаты складываются во временные таблицы. Затем эти временные таблицы передаются на каждый удалённый сервер, и на них выполняются запросы, с использованием этих переданных временных данных.

View File

@ -1,6 +1,10 @@
#pragma once
#include <Interpreters/Cluster.h>
#include <base/types.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <utility>
namespace DB
{
@ -8,21 +12,4 @@ namespace DB
using DatabaseAndTableName = std::pair<String, String>;
using ListOfDatabasesAndTableNames = std::vector<DatabaseAndTableName>;
/// Hierarchical description of the tasks
struct ShardPartitionPiece;
struct ShardPartition;
struct TaskShard;
struct TaskTable;
struct TaskCluster;
struct ClusterPartition;
using PartitionPieces = std::vector<ShardPartitionPiece>;
using TasksPartition = std::map<String, ShardPartition, std::greater<>>;
using ShardInfo = Cluster::ShardInfo;
using TaskShardPtr = std::shared_ptr<TaskShard>;
using TasksShard = std::vector<TaskShardPtr>;
using TasksTable = std::list<TaskTable>;
using ClusterPartitions = std::map<String, ClusterPartition, std::greater<>>;
}

View File

@ -1,7 +1,13 @@
set(CLICKHOUSE_COPIER_SOURCES
"${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopierApp.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopier.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/Internals.cpp")
"${CMAKE_CURRENT_SOURCE_DIR}/Internals.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/ShardPartition.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/ShardPartitionPiece.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/StatusAccumulator.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/TaskCluster.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/TaskShard.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/TaskTable.cpp")
set (CLICKHOUSE_COPIER_LINK
PRIVATE

View File

@ -3,7 +3,8 @@
#include "Aliases.h"
#include "Internals.h"
#include "TaskCluster.h"
#include "TaskTableAndShard.h"
#include "TaskShard.h"
#include "TaskTable.h"
#include "ShardPartition.h"
#include "ShardPartitionPiece.h"
#include "ZooKeeperStaff.h"

View File

@ -1,17 +1,22 @@
#pragma once
#include "Aliases.h"
#include <base/types.h>
#include <map>
namespace DB
{
/// Contains info about all shards that contain a partition
struct ClusterPartition
{
double elapsed_time_seconds = 0;
UInt64 bytes_copied = 0;
UInt64 rows_copied = 0;
UInt64 blocks_copied = 0;
UInt64 total_tries = 0;
};
/// Contains info about all shards that contain a partition
struct ClusterPartition
{
double elapsed_time_seconds = 0;
UInt64 bytes_copied = 0;
UInt64 rows_copied = 0;
UInt64 blocks_copied = 0;
UInt64 total_tries = 0;
};
using ClusterPartitions = std::map<String, ClusterPartition, std::greater<>>;
}

View File

@ -0,0 +1,70 @@
#include "ShardPartition.h"
#include "TaskShard.h"
#include "TaskTable.h"
namespace DB
{
ShardPartition::ShardPartition(TaskShard & parent, String name_quoted_, size_t number_of_splits)
: task_shard(parent)
, name(std::move(name_quoted_))
{
pieces.reserve(number_of_splits);
}
String ShardPartition::getPartitionCleanStartPath() const
{
return getPartitionPath() + "/clean_start";
}
String ShardPartition::getPartitionPieceCleanStartPath(size_t current_piece_number) const
{
assert(current_piece_number < task_shard.task_table.number_of_splits);
return getPartitionPiecePath(current_piece_number) + "/clean_start";
}
String ShardPartition::getPartitionPath() const
{
return task_shard.task_table.getPartitionPath(name);
}
String ShardPartition::getPartitionPiecePath(size_t current_piece_number) const
{
assert(current_piece_number < task_shard.task_table.number_of_splits);
return task_shard.task_table.getPartitionPiecePath(name, current_piece_number);
}
String ShardPartition::getShardStatusPath() const
{
// schema: /<root...>/tables/<table>/<partition>/shards/<shard>
// e.g. /root/table_test.hits/201701/shards/1
return getPartitionShardsPath() + "/" + toString(task_shard.numberInCluster());
}
String ShardPartition::getPartitionShardsPath() const
{
return getPartitionPath() + "/shards";
}
String ShardPartition::getPartitionActiveWorkersPath() const
{
return getPartitionPath() + "/partition_active_workers";
}
String ShardPartition::getActiveWorkerPath() const
{
return getPartitionActiveWorkersPath() + "/" + toString(task_shard.numberInCluster());
}
String ShardPartition::getCommonPartitionIsDirtyPath() const
{
return getPartitionPath() + "/is_dirty";
}
String ShardPartition::getCommonPartitionIsCleanedPath() const
{
return getCommonPartitionIsDirtyPath() + "/cleaned";
}
}

View File

@ -1,19 +1,23 @@
#pragma once
#include "Aliases.h"
#include "TaskTableAndShard.h"
#include "ShardPartitionPiece.h"
#include <base/types.h>
#include <map>
namespace DB
{
struct TaskShard;
/// Just destination partition of a shard
/// I don't know what this comment means.
/// In short, when we discovered what shards contain currently processing partition,
/// This class describes a partition (name) that is stored on the shard (parent).
struct ShardPartition
{
ShardPartition(TaskShard &parent, String name_quoted_, size_t number_of_splits = 10)
: task_shard(parent), name(std::move(name_quoted_)) { pieces.reserve(number_of_splits); }
ShardPartition(TaskShard &parent, String name_quoted_, size_t number_of_splits = 10);
String getPartitionPath() const;
@ -45,58 +49,6 @@ struct ShardPartition
String name;
};
inline String ShardPartition::getPartitionCleanStartPath() const
{
return getPartitionPath() + "/clean_start";
}
inline String ShardPartition::getPartitionPieceCleanStartPath(size_t current_piece_number) const
{
assert(current_piece_number < task_shard.task_table.number_of_splits);
return getPartitionPiecePath(current_piece_number) + "/clean_start";
}
inline String ShardPartition::getPartitionPath() const
{
return task_shard.task_table.getPartitionPath(name);
}
inline String ShardPartition::getPartitionPiecePath(size_t current_piece_number) const
{
assert(current_piece_number < task_shard.task_table.number_of_splits);
return task_shard.task_table.getPartitionPiecePath(name, current_piece_number);
}
inline String ShardPartition::getShardStatusPath() const
{
// schema: /<root...>/tables/<table>/<partition>/shards/<shard>
// e.g. /root/table_test.hits/201701/shards/1
return getPartitionShardsPath() + "/" + toString(task_shard.numberInCluster());
}
inline String ShardPartition::getPartitionShardsPath() const
{
return getPartitionPath() + "/shards";
}
inline String ShardPartition::getPartitionActiveWorkersPath() const
{
return getPartitionPath() + "/partition_active_workers";
}
inline String ShardPartition::getActiveWorkerPath() const
{
return getPartitionActiveWorkersPath() + "/" + toString(task_shard.numberInCluster());
}
inline String ShardPartition::getCommonPartitionIsDirtyPath() const
{
return getPartitionPath() + "/is_dirty";
}
inline String ShardPartition::getCommonPartitionIsCleanedPath() const
{
return getCommonPartitionIsDirtyPath() + "/cleaned";
}
using TasksPartition = std::map<String, ShardPartition, std::greater<>>;
}

View File

@ -0,0 +1,64 @@
#include "ShardPartitionPiece.h"
#include "ShardPartition.h"
#include "TaskShard.h"
#include <IO/WriteHelpers.h>
namespace DB
{
ShardPartitionPiece::ShardPartitionPiece(ShardPartition & parent, size_t current_piece_number_, bool is_present_piece_)
: is_absent_piece(!is_present_piece_)
, current_piece_number(current_piece_number_)
, shard_partition(parent)
{
}
String ShardPartitionPiece::getPartitionPiecePath() const
{
return shard_partition.getPartitionPath() + "/piece_" + toString(current_piece_number);
}
String ShardPartitionPiece::getPartitionPieceCleanStartPath() const
{
return getPartitionPiecePath() + "/clean_start";
}
String ShardPartitionPiece::getPartitionPieceIsDirtyPath() const
{
return getPartitionPiecePath() + "/is_dirty";
}
String ShardPartitionPiece::getPartitionPieceIsCleanedPath() const
{
return getPartitionPieceIsDirtyPath() + "/cleaned";
}
String ShardPartitionPiece::getPartitionPieceActiveWorkersPath() const
{
return getPartitionPiecePath() + "/partition_piece_active_workers";
}
String ShardPartitionPiece::getActiveWorkerPath() const
{
return getPartitionPieceActiveWorkersPath() + "/" + toString(shard_partition.task_shard.numberInCluster());
}
/// On what shards do we have current partition.
String ShardPartitionPiece::getPartitionPieceShardsPath() const
{
return getPartitionPiecePath() + "/shards";
}
String ShardPartitionPiece::getShardStatusPath() const
{
return getPartitionPieceShardsPath() + "/" + toString(shard_partition.task_shard.numberInCluster());
}
String ShardPartitionPiece::getPartitionPieceCleanerPath() const
{
return getPartitionPieceIsDirtyPath() + "/cleaner";
}
}

View File

@ -1,16 +1,15 @@
#pragma once
#include "Internals.h"
#include <base/types.h>
namespace DB
{
struct ShardPartition;
struct ShardPartitionPiece
{
ShardPartitionPiece(ShardPartition &parent, size_t current_piece_number_, bool is_present_piece_)
: is_absent_piece(!is_present_piece_), current_piece_number(current_piece_number_),
shard_partition(parent) {}
ShardPartitionPiece(ShardPartition & parent, size_t current_piece_number_, bool is_present_piece_);
String getPartitionPiecePath() const;
@ -37,52 +36,6 @@ struct ShardPartitionPiece
ShardPartition & shard_partition;
};
inline String ShardPartitionPiece::getPartitionPiecePath() const
{
return shard_partition.getPartitionPath() + "/piece_" + toString(current_piece_number);
}
inline String ShardPartitionPiece::getPartitionPieceCleanStartPath() const
{
return getPartitionPiecePath() + "/clean_start";
}
inline String ShardPartitionPiece::getPartitionPieceIsDirtyPath() const
{
return getPartitionPiecePath() + "/is_dirty";
}
inline String ShardPartitionPiece::getPartitionPieceIsCleanedPath() const
{
return getPartitionPieceIsDirtyPath() + "/cleaned";
}
inline String ShardPartitionPiece::getPartitionPieceActiveWorkersPath() const
{
return getPartitionPiecePath() + "/partition_piece_active_workers";
}
inline String ShardPartitionPiece::getActiveWorkerPath() const
{
return getPartitionPieceActiveWorkersPath() + "/" + toString(shard_partition.task_shard.numberInCluster());
}
/// On what shards do we have current partition.
inline String ShardPartitionPiece::getPartitionPieceShardsPath() const
{
return getPartitionPiecePath() + "/shards";
}
inline String ShardPartitionPiece::getShardStatusPath() const
{
return getPartitionPieceShardsPath() + "/" + toString(shard_partition.task_shard.numberInCluster());
}
inline String ShardPartitionPiece::getPartitionPieceCleanerPath() const
{
return getPartitionPieceIsDirtyPath() + "/cleaner";
}
using PartitionPieces = std::vector<ShardPartitionPiece>;
}

View File

@ -0,0 +1,48 @@
#include "StatusAccumulator.h"
#include <Poco/JSON/Parser.h>
#include <Poco/JSON/JSON.h>
#include <Poco/JSON/Object.h>
#include <Poco/JSON/Stringifier.h>
#include <iostream>
namespace DB
{
StatusAccumulator::MapPtr StatusAccumulator::fromJSON(String state_json)
{
Poco::JSON::Parser parser;
auto state = parser.parse(state_json).extract<Poco::JSON::Object::Ptr>();
MapPtr result_ptr = std::make_shared<Map>();
for (const auto & table_name : state->getNames())
{
auto table_status_json = state->getValue<String>(table_name);
auto table_status = parser.parse(table_status_json).extract<Poco::JSON::Object::Ptr>();
/// Map entry will be created if it is absent
auto & map_table_status = (*result_ptr)[table_name];
map_table_status.all_partitions_count += table_status->getValue<size_t>("all_partitions_count");
map_table_status.processed_partitions_count += table_status->getValue<size_t>("processed_partitions_count");
}
return result_ptr;
}
String StatusAccumulator::serializeToJSON(MapPtr statuses)
{
Poco::JSON::Object result_json;
for (const auto & [table_name, table_status] : *statuses)
{
Poco::JSON::Object status_json;
status_json.set("all_partitions_count", table_status.all_partitions_count);
status_json.set("processed_partitions_count", table_status.processed_partitions_count);
result_json.set(table_name, status_json);
}
std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
oss.exceptions(std::ios::failbit);
Poco::JSON::Stringifier::stringify(result_json, oss);
auto result = oss.str();
return result;
}
}

View File

@ -1,65 +1,27 @@
#pragma once
#include <base/types.h>
#include <Poco/JSON/Parser.h>
#include <Poco/JSON/JSON.h>
#include <Poco/JSON/Object.h>
#include <Poco/JSON/Stringifier.h>
#include <unordered_map>
#include <memory>
#include <string>
#include <iostream>
#include <unordered_map>
namespace DB
{
class StatusAccumulator
{
public:
struct TableStatus
{
size_t all_partitions_count;
size_t processed_partitions_count;
};
public:
struct TableStatus
{
size_t all_partitions_count;
size_t processed_partitions_count;
};
using Map = std::unordered_map<std::string, TableStatus>;
using MapPtr = std::shared_ptr<Map>;
using Map = std::unordered_map<String, TableStatus>;
using MapPtr = std::shared_ptr<Map>;
static MapPtr fromJSON(std::string state_json)
{
Poco::JSON::Parser parser;
auto state = parser.parse(state_json).extract<Poco::JSON::Object::Ptr>();
MapPtr result_ptr = std::make_shared<Map>();
for (const auto & table_name : state->getNames())
{
auto table_status_json = state->getValue<std::string>(table_name);
auto table_status = parser.parse(table_status_json).extract<Poco::JSON::Object::Ptr>();
/// Map entry will be created if it is absent
auto & map_table_status = (*result_ptr)[table_name];
map_table_status.all_partitions_count += table_status->getValue<size_t>("all_partitions_count");
map_table_status.processed_partitions_count += table_status->getValue<size_t>("processed_partitions_count");
}
return result_ptr;
}
static std::string serializeToJSON(MapPtr statuses)
{
Poco::JSON::Object result_json;
for (const auto & [table_name, table_status] : *statuses)
{
Poco::JSON::Object status_json;
status_json.set("all_partitions_count", table_status.all_partitions_count);
status_json.set("processed_partitions_count", table_status.processed_partitions_count);
result_json.set(table_name, status_json);
}
std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
oss.exceptions(std::ios::failbit);
Poco::JSON::Stringifier::stringify(result_json, oss);
auto result = oss.str();
return result;
}
static MapPtr fromJSON(String state_json);
static String serializeToJSON(MapPtr statuses);
};
}

View File

@ -0,0 +1,74 @@
#include "TaskCluster.h"
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
TaskCluster::TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_)
: task_zookeeper_path(task_zookeeper_path_)
, default_local_database(default_local_database_)
{}
void DB::TaskCluster::loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key)
{
String prefix = base_key.empty() ? "" : base_key + ".";
clusters_prefix = prefix + "remote_servers";
if (!config.has(clusters_prefix))
throw Exception("You should specify list of clusters in " + clusters_prefix, ErrorCodes::BAD_ARGUMENTS);
Poco::Util::AbstractConfiguration::Keys tables_keys;
config.keys(prefix + "tables", tables_keys);
for (const auto & table_key : tables_keys)
{
table_tasks.emplace_back(*this, config, prefix + "tables", table_key);
}
}
void DB::TaskCluster::reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key)
{
String prefix = base_key.empty() ? "" : base_key + ".";
max_workers = config.getUInt64(prefix + "max_workers");
settings_common = Settings();
if (config.has(prefix + "settings"))
settings_common.loadSettingsFromConfig(prefix + "settings", config);
settings_common.prefer_localhost_replica = false;
settings_pull = settings_common;
if (config.has(prefix + "settings_pull"))
settings_pull.loadSettingsFromConfig(prefix + "settings_pull", config);
settings_push = settings_common;
if (config.has(prefix + "settings_push"))
settings_push.loadSettingsFromConfig(prefix + "settings_push", config);
auto set_default_value = [] (auto && setting, auto && default_value)
{
setting = setting.changed ? setting.value : default_value;
};
/// Override important settings
settings_pull.readonly = 1;
settings_pull.prefer_localhost_replica = false;
settings_push.insert_distributed_sync = true;
settings_push.prefer_localhost_replica = false;
set_default_value(settings_pull.load_balancing, LoadBalancing::NEAREST_HOSTNAME);
set_default_value(settings_pull.max_threads, 1);
set_default_value(settings_pull.max_block_size, 8192UL);
set_default_value(settings_pull.preferred_block_size_bytes, 0);
set_default_value(settings_push.insert_distributed_timeout, 0);
set_default_value(settings_push.replication_alter_partitions_sync, 2);
}
}

View File

@ -1,21 +1,20 @@
#pragma once
#include "Aliases.h"
#include "TaskTable.h"
#include <Core/Settings.h>
#include <base/types.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <random>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
struct TaskCluster
{
TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_)
: task_zookeeper_path(task_zookeeper_path_)
, default_local_database(default_local_database_)
{}
TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_);
void loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key = "");
@ -50,61 +49,4 @@ struct TaskCluster
pcg64 random_engine;
};
inline void DB::TaskCluster::loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key)
{
String prefix = base_key.empty() ? "" : base_key + ".";
clusters_prefix = prefix + "remote_servers";
if (!config.has(clusters_prefix))
throw Exception("You should specify list of clusters in " + clusters_prefix, ErrorCodes::BAD_ARGUMENTS);
Poco::Util::AbstractConfiguration::Keys tables_keys;
config.keys(prefix + "tables", tables_keys);
for (const auto & table_key : tables_keys)
{
table_tasks.emplace_back(*this, config, prefix + "tables", table_key);
}
}
inline void DB::TaskCluster::reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key)
{
String prefix = base_key.empty() ? "" : base_key + ".";
max_workers = config.getUInt64(prefix + "max_workers");
settings_common = Settings();
if (config.has(prefix + "settings"))
settings_common.loadSettingsFromConfig(prefix + "settings", config);
settings_common.prefer_localhost_replica = 0;
settings_pull = settings_common;
if (config.has(prefix + "settings_pull"))
settings_pull.loadSettingsFromConfig(prefix + "settings_pull", config);
settings_push = settings_common;
if (config.has(prefix + "settings_push"))
settings_push.loadSettingsFromConfig(prefix + "settings_push", config);
auto set_default_value = [] (auto && setting, auto && default_value)
{
setting = setting.changed ? setting.value : default_value;
};
/// Override important settings
settings_pull.readonly = 1;
settings_pull.prefer_localhost_replica = false;
settings_push.insert_distributed_sync = true;
settings_push.prefer_localhost_replica = false;
set_default_value(settings_pull.load_balancing, LoadBalancing::NEAREST_HOSTNAME);
set_default_value(settings_pull.max_threads, 1);
set_default_value(settings_pull.max_block_size, 8192UL);
set_default_value(settings_pull.preferred_block_size_bytes, 0);
set_default_value(settings_push.insert_distributed_timeout, 0);
set_default_value(settings_push.replication_alter_partitions_sync, 2);
}
}

View File

@ -0,0 +1,37 @@
#include "TaskShard.h"
#include "TaskTable.h"
namespace DB
{
TaskShard::TaskShard(TaskTable & parent, const Cluster::ShardInfo & info_)
: task_table(parent)
, info(info_)
{
list_of_split_tables_on_shard.assign(task_table.number_of_splits, DatabaseAndTableName());
}
UInt32 TaskShard::numberInCluster() const
{
return info.shard_num;
}
UInt32 TaskShard::indexInCluster() const
{
return info.shard_num - 1;
}
String DB::TaskShard::getDescription() const
{
return fmt::format("N{} (having a replica {}, pull table {} of cluster {}",
numberInCluster(), getHostNameExample(), getQuotedTable(task_table.table_pull), task_table.cluster_pull_name);
}
String DB::TaskShard::getHostNameExample() const
{
const auto & replicas = task_table.cluster_pull->getShardsAddresses().at(indexInCluster());
return replicas.at(0).readableString();
}
}

View File

@ -0,0 +1,56 @@
#pragma once
#include "Aliases.h"
#include "Internals.h"
#include "ClusterPartition.h"
#include "ShardPartition.h"
namespace DB
{
struct TaskTable;
struct TaskShard
{
TaskShard(TaskTable & parent, const Cluster::ShardInfo & info_);
TaskTable & task_table;
Cluster::ShardInfo info;
UInt32 numberInCluster() const;
UInt32 indexInCluster() const;
String getDescription() const;
String getHostNameExample() const;
/// Used to sort clusters by their proximity
ShardPriority priority;
/// Column with unique destination partitions (computed from engine_push_partition_key expr.) in the shard
ColumnWithTypeAndName partition_key_column;
/// There is a task for each destination partition
TasksPartition partition_tasks;
/// Which partitions have been checked for existence
/// If some partition from this lists is exists, it is in partition_tasks
std::set<String> checked_partitions;
/// Last CREATE TABLE query of the table of the shard
ASTPtr current_pull_table_create_query;
ASTPtr current_push_table_create_query;
/// Internal distributed tables
DatabaseAndTableName table_read_shard;
DatabaseAndTableName main_table_split_shard;
ListOfDatabasesAndTableNames list_of_split_tables_on_shard;
};
using TaskShardPtr = std::shared_ptr<TaskShard>;
using TasksShard = std::vector<TaskShardPtr>;
}

View File

@ -0,0 +1,221 @@
#include "TaskTable.h"
#include "ClusterPartition.h"
#include "TaskCluster.h"
#include <Parsers/ASTFunction.h>
#include <boost/algorithm/string/join.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
extern const int LOGICAL_ERROR;
}
TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config,
const String & prefix_, const String & table_key)
: task_cluster(parent)
{
String table_prefix = prefix_ + "." + table_key + ".";
name_in_config = table_key;
number_of_splits = config.getUInt64(table_prefix + "number_of_splits", 3);
allow_to_copy_alias_and_materialized_columns = config.getBool(table_prefix + "allow_to_copy_alias_and_materialized_columns", false);
allow_to_drop_target_partitions = config.getBool(table_prefix + "allow_to_drop_target_partitions", false);
cluster_pull_name = config.getString(table_prefix + "cluster_pull");
cluster_push_name = config.getString(table_prefix + "cluster_push");
table_pull.first = config.getString(table_prefix + "database_pull");
table_pull.second = config.getString(table_prefix + "table_pull");
table_push.first = config.getString(table_prefix + "database_push");
table_push.second = config.getString(table_prefix + "table_push");
/// Used as node name in ZooKeeper
table_id = escapeForFileName(cluster_push_name)
+ "." + escapeForFileName(table_push.first)
+ "." + escapeForFileName(table_push.second);
engine_push_str = config.getString(table_prefix + "engine", "rand()");
{
ParserStorage parser_storage;
engine_push_ast = parseQuery(parser_storage, engine_push_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
engine_push_partition_key_ast = extractPartitionKey(engine_push_ast);
primary_key_comma_separated = boost::algorithm::join(extractPrimaryKeyColumnNames(engine_push_ast), ", ");
is_replicated_table = isReplicatedTableEngine(engine_push_ast);
}
sharding_key_str = config.getString(table_prefix + "sharding_key");
auxiliary_engine_split_asts.reserve(number_of_splits);
{
ParserExpressionWithOptionalAlias parser_expression(false);
sharding_key_ast = parseQuery(parser_expression, sharding_key_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
main_engine_split_ast = createASTStorageDistributed(cluster_push_name, table_push.first, table_push.second,
sharding_key_ast);
for (const auto piece_number : collections::range(0, number_of_splits))
{
auxiliary_engine_split_asts.emplace_back
(
createASTStorageDistributed(cluster_push_name, table_push.first,
table_push.second + "_piece_" + toString(piece_number), sharding_key_ast)
);
}
}
where_condition_str = config.getString(table_prefix + "where_condition", "");
if (!where_condition_str.empty())
{
ParserExpressionWithOptionalAlias parser_expression(false);
where_condition_ast = parseQuery(parser_expression, where_condition_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
// Will use canonical expression form
where_condition_str = queryToString(where_condition_ast);
}
String enabled_partitions_prefix = table_prefix + "enabled_partitions";
has_enabled_partitions = config.has(enabled_partitions_prefix);
if (has_enabled_partitions)
{
Strings keys;
config.keys(enabled_partitions_prefix, keys);
if (keys.empty())
{
/// Parse list of partition from space-separated string
String partitions_str = config.getString(table_prefix + "enabled_partitions");
boost::trim_if(partitions_str, isWhitespaceASCII);
boost::split(enabled_partitions, partitions_str, isWhitespaceASCII, boost::token_compress_on);
}
else
{
/// Parse sequence of <partition>...</partition>
for (const String &key : keys)
{
if (!startsWith(key, "partition"))
throw Exception("Unknown key " + key + " in " + enabled_partitions_prefix, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
enabled_partitions.emplace_back(config.getString(enabled_partitions_prefix + "." + key));
}
}
std::copy(enabled_partitions.begin(), enabled_partitions.end(), std::inserter(enabled_partitions_set, enabled_partitions_set.begin()));
}
}
String TaskTable::getPartitionPath(const String & partition_name) const
{
return task_cluster.task_zookeeper_path // root
+ "/tables/" + table_id // tables/dst_cluster.merge.hits
+ "/" + escapeForFileName(partition_name); // 201701
}
String TaskTable::getPartitionAttachIsActivePath(const String & partition_name) const
{
return getPartitionPath(partition_name) + "/attach_active";
}
String TaskTable::getPartitionAttachIsDonePath(const String & partition_name) const
{
return getPartitionPath(partition_name) + "/attach_is_done";
}
String TaskTable::getPartitionPiecePath(const String & partition_name, size_t piece_number) const
{
assert(piece_number < number_of_splits);
return getPartitionPath(partition_name) + "/piece_" + toString(piece_number); // 1...number_of_splits
}
String TaskTable::getCertainPartitionIsDirtyPath(const String &partition_name) const
{
return getPartitionPath(partition_name) + "/is_dirty";
}
String TaskTable::getCertainPartitionPieceIsDirtyPath(const String & partition_name, const size_t piece_number) const
{
return getPartitionPiecePath(partition_name, piece_number) + "/is_dirty";
}
String TaskTable::getCertainPartitionIsCleanedPath(const String & partition_name) const
{
return getCertainPartitionIsDirtyPath(partition_name) + "/cleaned";
}
String TaskTable::getCertainPartitionPieceIsCleanedPath(const String & partition_name, const size_t piece_number) const
{
return getCertainPartitionPieceIsDirtyPath(partition_name, piece_number) + "/cleaned";
}
String TaskTable::getCertainPartitionTaskStatusPath(const String & partition_name) const
{
return getPartitionPath(partition_name) + "/shards";
}
String TaskTable::getCertainPartitionPieceTaskStatusPath(const String & partition_name, const size_t piece_number) const
{
return getPartitionPiecePath(partition_name, piece_number) + "/shards";
}
bool TaskTable::isReplicatedTable() const
{
return is_replicated_table;
}
String TaskTable::getStatusAllPartitionCount() const
{
return task_cluster.task_zookeeper_path + "/status/all_partitions_count";
}
String TaskTable::getStatusProcessedPartitionsCount() const
{
return task_cluster.task_zookeeper_path + "/status/processed_partitions_count";
}
ASTPtr TaskTable::rewriteReplicatedCreateQueryToPlain() const
{
ASTPtr prev_engine_push_ast = engine_push_ast->clone();
auto & new_storage_ast = prev_engine_push_ast->as<ASTStorage &>();
auto & new_engine_ast = new_storage_ast.engine->as<ASTFunction &>();
/// Remove "Replicated" from name
new_engine_ast.name = new_engine_ast.name.substr(10);
if (new_engine_ast.arguments)
{
auto & replicated_table_arguments = new_engine_ast.arguments->children;
/// In some cases of Atomic database engine usage ReplicatedMergeTree tables
/// could be created without arguments.
if (!replicated_table_arguments.empty())
{
/// Delete first two arguments of Replicated...MergeTree() table.
replicated_table_arguments.erase(replicated_table_arguments.begin());
replicated_table_arguments.erase(replicated_table_arguments.begin());
}
}
return new_storage_ast.clone();
}
ClusterPartition & TaskTable::getClusterPartition(const String & partition_name)
{
auto it = cluster_partitions.find(partition_name);
if (it == cluster_partitions.end())
throw Exception("There are no cluster partition " + partition_name + " in " + table_id,
ErrorCodes::LOGICAL_ERROR);
return it->second;
}
}

173
programs/copier/TaskTable.h Normal file
View File

@ -0,0 +1,173 @@
#pragma once
#include "Aliases.h"
#include "TaskShard.h"
namespace DB
{
struct ClusterPartition;
struct TaskCluster;
struct TaskTable
{
TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, const String & prefix, const String & table_key);
TaskCluster & task_cluster;
/// These functions used in checkPartitionIsDone() or checkPartitionPieceIsDone()
/// They are implemented here not to call task_table.tasks_shard[partition_name].second.pieces[current_piece_number] etc.
String getPartitionPath(const String & partition_name) const;
String getPartitionAttachIsActivePath(const String & partition_name) const;
String getPartitionAttachIsDonePath(const String & partition_name) const;
String getPartitionPiecePath(const String & partition_name, size_t piece_number) const;
String getCertainPartitionIsDirtyPath(const String & partition_name) const;
String getCertainPartitionPieceIsDirtyPath(const String & partition_name, size_t piece_number) const;
String getCertainPartitionIsCleanedPath(const String & partition_name) const;
String getCertainPartitionPieceIsCleanedPath(const String & partition_name, size_t piece_number) const;
String getCertainPartitionTaskStatusPath(const String & partition_name) const;
String getCertainPartitionPieceTaskStatusPath(const String & partition_name, size_t piece_number) const;
bool isReplicatedTable() const;
/// These nodes are used for check-status option
String getStatusAllPartitionCount() const;
String getStatusProcessedPartitionsCount() const;
/// Partitions will be split into number-of-splits pieces.
/// Each piece will be copied independently. (10 by default)
size_t number_of_splits;
bool allow_to_copy_alias_and_materialized_columns{false};
bool allow_to_drop_target_partitions{false};
String name_in_config;
/// Used as task ID
String table_id;
/// Column names in primary key
String primary_key_comma_separated;
/// Source cluster and table
String cluster_pull_name;
DatabaseAndTableName table_pull;
/// Destination cluster and table
String cluster_push_name;
DatabaseAndTableName table_push;
/// Storage of destination table
/// (tables that are stored on each shard of target cluster)
String engine_push_str;
ASTPtr engine_push_ast;
ASTPtr engine_push_partition_key_ast;
/// First argument of Replicated...MergeTree()
String engine_push_zk_path;
bool is_replicated_table;
ASTPtr rewriteReplicatedCreateQueryToPlain() const;
/*
* A Distributed table definition used to split data
* Distributed table will be created on each shard of default
* cluster to perform data copying and resharding
* */
String sharding_key_str;
ASTPtr sharding_key_ast;
ASTPtr main_engine_split_ast;
/*
* To copy partition piece form one cluster to another we have to use Distributed table.
* In case of usage separate table (engine_push) for each partition piece,
* we have to use many Distributed tables.
* */
ASTs auxiliary_engine_split_asts;
/// Additional WHERE expression to filter input data
String where_condition_str;
ASTPtr where_condition_ast;
/// Resolved clusters
ClusterPtr cluster_pull;
ClusterPtr cluster_push;
/// Filter partitions that should be copied
bool has_enabled_partitions = false;
Strings enabled_partitions;
NameSet enabled_partitions_set;
/**
* Prioritized list of shards
* all_shards contains information about all shards in the table.
* So we have to check whether particular shard have current partition or not while processing.
*/
TasksShard all_shards;
TasksShard local_shards;
/// All partitions of the current table.
ClusterPartitions cluster_partitions;
NameSet finished_cluster_partitions;
/// Partition names to process in user-specified order
Strings ordered_partition_names;
ClusterPartition & getClusterPartition(const String & partition_name);
Stopwatch watch;
UInt64 bytes_copied = 0;
UInt64 rows_copied = 0;
template <typename RandomEngine>
void initShards(RandomEngine &&random_engine);
};
using TasksTable = std::list<TaskTable>;
template<typename RandomEngine>
inline void TaskTable::initShards(RandomEngine && random_engine)
{
const String & fqdn_name = getFQDNOrHostName();
std::uniform_int_distribution<uint8_t> get_urand(0, std::numeric_limits<UInt8>::max());
// Compute the priority
for (const auto & shard_info : cluster_pull->getShardsInfo())
{
TaskShardPtr task_shard = std::make_shared<TaskShard>(*this, shard_info);
const auto & replicas = cluster_pull->getShardsAddresses().at(task_shard->indexInCluster());
task_shard->priority = getReplicasPriority(replicas, fqdn_name, get_urand(random_engine));
all_shards.emplace_back(task_shard);
}
// Sort by priority
std::sort(all_shards.begin(), all_shards.end(),
[](const TaskShardPtr & lhs, const TaskShardPtr & rhs)
{
return ShardPriority::greaterPriority(lhs->priority, rhs->priority);
});
// Cut local shards
auto it_first_remote = std::lower_bound(all_shards.begin(), all_shards.end(), 1,
[](const TaskShardPtr & lhs, UInt8 is_remote)
{
return lhs->priority.is_remote < is_remote;
});
local_shards.assign(all_shards.begin(), it_first_remote);
}
}

View File

@ -1,434 +0,0 @@
#pragma once
#include "Aliases.h"
#include "Internals.h"
#include "ClusterPartition.h"
#include <Core/Defines.h>
#include <Parsers/ASTFunction.h>
#include <base/map.h>
#include <boost/algorithm/string/join.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
extern const int LOGICAL_ERROR;
}
struct TaskShard;
struct TaskTable
{
TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, const String & prefix,
const String & table_key);
TaskCluster & task_cluster;
/// These functions used in checkPartitionIsDone() or checkPartitionPieceIsDone()
/// They are implemented here not to call task_table.tasks_shard[partition_name].second.pieces[current_piece_number] etc.
String getPartitionPath(const String & partition_name) const;
String getPartitionAttachIsActivePath(const String & partition_name) const;
String getPartitionAttachIsDonePath(const String & partition_name) const;
String getPartitionPiecePath(const String & partition_name, size_t piece_number) const;
String getCertainPartitionIsDirtyPath(const String & partition_name) const;
String getCertainPartitionPieceIsDirtyPath(const String & partition_name, size_t piece_number) const;
String getCertainPartitionIsCleanedPath(const String & partition_name) const;
String getCertainPartitionPieceIsCleanedPath(const String & partition_name, size_t piece_number) const;
String getCertainPartitionTaskStatusPath(const String & partition_name) const;
String getCertainPartitionPieceTaskStatusPath(const String & partition_name, size_t piece_number) const;
bool isReplicatedTable() const { return is_replicated_table; }
/// These nodes are used for check-status option
String getStatusAllPartitionCount() const;
String getStatusProcessedPartitionsCount() const;
/// Partitions will be split into number-of-splits pieces.
/// Each piece will be copied independently. (10 by default)
size_t number_of_splits;
bool allow_to_copy_alias_and_materialized_columns{false};
bool allow_to_drop_target_partitions{false};
String name_in_config;
/// Used as task ID
String table_id;
/// Column names in primary key
String primary_key_comma_separated;
/// Source cluster and table
String cluster_pull_name;
DatabaseAndTableName table_pull;
/// Destination cluster and table
String cluster_push_name;
DatabaseAndTableName table_push;
/// Storage of destination table
/// (tables that are stored on each shard of target cluster)
String engine_push_str;
ASTPtr engine_push_ast;
ASTPtr engine_push_partition_key_ast;
/// First argument of Replicated...MergeTree()
String engine_push_zk_path;
bool is_replicated_table;
ASTPtr rewriteReplicatedCreateQueryToPlain() const;
/*
* A Distributed table definition used to split data
* Distributed table will be created on each shard of default
* cluster to perform data copying and resharding
* */
String sharding_key_str;
ASTPtr sharding_key_ast;
ASTPtr main_engine_split_ast;
/*
* To copy partition piece form one cluster to another we have to use Distributed table.
* In case of usage separate table (engine_push) for each partition piece,
* we have to use many Distributed tables.
* */
ASTs auxiliary_engine_split_asts;
/// Additional WHERE expression to filter input data
String where_condition_str;
ASTPtr where_condition_ast;
/// Resolved clusters
ClusterPtr cluster_pull;
ClusterPtr cluster_push;
/// Filter partitions that should be copied
bool has_enabled_partitions = false;
Strings enabled_partitions;
NameSet enabled_partitions_set;
/**
* Prioritized list of shards
* all_shards contains information about all shards in the table.
* So we have to check whether particular shard have current partition or not while processing.
*/
TasksShard all_shards;
TasksShard local_shards;
/// All partitions of the current table.
ClusterPartitions cluster_partitions;
NameSet finished_cluster_partitions;
/// Partition names to process in user-specified order
Strings ordered_partition_names;
ClusterPartition & getClusterPartition(const String & partition_name)
{
auto it = cluster_partitions.find(partition_name);
if (it == cluster_partitions.end())
throw Exception("There are no cluster partition " + partition_name + " in " + table_id,
ErrorCodes::LOGICAL_ERROR);
return it->second;
}
Stopwatch watch;
UInt64 bytes_copied = 0;
UInt64 rows_copied = 0;
template <typename RandomEngine>
void initShards(RandomEngine &&random_engine);
};
struct TaskShard
{
TaskShard(TaskTable & parent, const ShardInfo & info_) : task_table(parent), info(info_)
{
list_of_split_tables_on_shard.assign(task_table.number_of_splits, DatabaseAndTableName());
}
TaskTable & task_table;
ShardInfo info;
UInt32 numberInCluster() const { return info.shard_num; }
UInt32 indexInCluster() const { return info.shard_num - 1; }
String getDescription() const;
String getHostNameExample() const;
/// Used to sort clusters by their proximity
ShardPriority priority;
/// Column with unique destination partitions (computed from engine_push_partition_key expr.) in the shard
ColumnWithTypeAndName partition_key_column;
/// There is a task for each destination partition
TasksPartition partition_tasks;
/// Which partitions have been checked for existence
/// If some partition from this lists is exists, it is in partition_tasks
std::set<String> checked_partitions;
/// Last CREATE TABLE query of the table of the shard
ASTPtr current_pull_table_create_query;
ASTPtr current_push_table_create_query;
/// Internal distributed tables
DatabaseAndTableName table_read_shard;
DatabaseAndTableName main_table_split_shard;
ListOfDatabasesAndTableNames list_of_split_tables_on_shard;
};
inline String TaskTable::getPartitionPath(const String & partition_name) const
{
return task_cluster.task_zookeeper_path // root
+ "/tables/" + table_id // tables/dst_cluster.merge.hits
+ "/" + escapeForFileName(partition_name); // 201701
}
inline String TaskTable::getPartitionAttachIsActivePath(const String & partition_name) const
{
return getPartitionPath(partition_name) + "/attach_active";
}
inline String TaskTable::getPartitionAttachIsDonePath(const String & partition_name) const
{
return getPartitionPath(partition_name) + "/attach_is_done";
}
inline String TaskTable::getPartitionPiecePath(const String & partition_name, size_t piece_number) const
{
assert(piece_number < number_of_splits);
return getPartitionPath(partition_name) + "/piece_" + toString(piece_number); // 1...number_of_splits
}
inline String TaskTable::getCertainPartitionIsDirtyPath(const String &partition_name) const
{
return getPartitionPath(partition_name) + "/is_dirty";
}
inline String TaskTable::getCertainPartitionPieceIsDirtyPath(const String & partition_name, const size_t piece_number) const
{
return getPartitionPiecePath(partition_name, piece_number) + "/is_dirty";
}
inline String TaskTable::getCertainPartitionIsCleanedPath(const String & partition_name) const
{
return getCertainPartitionIsDirtyPath(partition_name) + "/cleaned";
}
inline String TaskTable::getCertainPartitionPieceIsCleanedPath(const String & partition_name, const size_t piece_number) const
{
return getCertainPartitionPieceIsDirtyPath(partition_name, piece_number) + "/cleaned";
}
inline String TaskTable::getCertainPartitionTaskStatusPath(const String & partition_name) const
{
return getPartitionPath(partition_name) + "/shards";
}
inline String TaskTable::getCertainPartitionPieceTaskStatusPath(const String & partition_name, const size_t piece_number) const
{
return getPartitionPiecePath(partition_name, piece_number) + "/shards";
}
inline String TaskTable::getStatusAllPartitionCount() const
{
return task_cluster.task_zookeeper_path + "/status/all_partitions_count";
}
inline String TaskTable::getStatusProcessedPartitionsCount() const
{
return task_cluster.task_zookeeper_path + "/status/processed_partitions_count";
}
inline TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config,
const String & prefix_, const String & table_key)
: task_cluster(parent)
{
String table_prefix = prefix_ + "." + table_key + ".";
name_in_config = table_key;
number_of_splits = config.getUInt64(table_prefix + "number_of_splits", 3);
allow_to_copy_alias_and_materialized_columns = config.getBool(table_prefix + "allow_to_copy_alias_and_materialized_columns", false);
allow_to_drop_target_partitions = config.getBool(table_prefix + "allow_to_drop_target_partitions", false);
cluster_pull_name = config.getString(table_prefix + "cluster_pull");
cluster_push_name = config.getString(table_prefix + "cluster_push");
table_pull.first = config.getString(table_prefix + "database_pull");
table_pull.second = config.getString(table_prefix + "table_pull");
table_push.first = config.getString(table_prefix + "database_push");
table_push.second = config.getString(table_prefix + "table_push");
/// Used as node name in ZooKeeper
table_id = escapeForFileName(cluster_push_name)
+ "." + escapeForFileName(table_push.first)
+ "." + escapeForFileName(table_push.second);
engine_push_str = config.getString(table_prefix + "engine", "rand()");
{
ParserStorage parser_storage;
engine_push_ast = parseQuery(parser_storage, engine_push_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
engine_push_partition_key_ast = extractPartitionKey(engine_push_ast);
primary_key_comma_separated = boost::algorithm::join(extractPrimaryKeyColumnNames(engine_push_ast), ", ");
is_replicated_table = isReplicatedTableEngine(engine_push_ast);
}
sharding_key_str = config.getString(table_prefix + "sharding_key");
auxiliary_engine_split_asts.reserve(number_of_splits);
{
ParserExpressionWithOptionalAlias parser_expression(false);
sharding_key_ast = parseQuery(parser_expression, sharding_key_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
main_engine_split_ast = createASTStorageDistributed(cluster_push_name, table_push.first, table_push.second,
sharding_key_ast);
for (const auto piece_number : collections::range(0, number_of_splits))
{
auxiliary_engine_split_asts.emplace_back
(
createASTStorageDistributed(cluster_push_name, table_push.first,
table_push.second + "_piece_" + toString(piece_number), sharding_key_ast)
);
}
}
where_condition_str = config.getString(table_prefix + "where_condition", "");
if (!where_condition_str.empty())
{
ParserExpressionWithOptionalAlias parser_expression(false);
where_condition_ast = parseQuery(parser_expression, where_condition_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
// Will use canonical expression form
where_condition_str = queryToString(where_condition_ast);
}
String enabled_partitions_prefix = table_prefix + "enabled_partitions";
has_enabled_partitions = config.has(enabled_partitions_prefix);
if (has_enabled_partitions)
{
Strings keys;
config.keys(enabled_partitions_prefix, keys);
if (keys.empty())
{
/// Parse list of partition from space-separated string
String partitions_str = config.getString(table_prefix + "enabled_partitions");
boost::trim_if(partitions_str, isWhitespaceASCII);
boost::split(enabled_partitions, partitions_str, isWhitespaceASCII, boost::token_compress_on);
}
else
{
/// Parse sequence of <partition>...</partition>
for (const String &key : keys)
{
if (!startsWith(key, "partition"))
throw Exception("Unknown key " + key + " in " + enabled_partitions_prefix, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
enabled_partitions.emplace_back(config.getString(enabled_partitions_prefix + "." + key));
}
}
std::copy(enabled_partitions.begin(), enabled_partitions.end(), std::inserter(enabled_partitions_set, enabled_partitions_set.begin()));
}
}
template<typename RandomEngine>
inline void TaskTable::initShards(RandomEngine && random_engine)
{
const String & fqdn_name = getFQDNOrHostName();
std::uniform_int_distribution<UInt8> get_urand(0, std::numeric_limits<UInt8>::max());
// Compute the priority
for (const auto & shard_info : cluster_pull->getShardsInfo())
{
TaskShardPtr task_shard = std::make_shared<TaskShard>(*this, shard_info);
const auto & replicas = cluster_pull->getShardsAddresses().at(task_shard->indexInCluster());
task_shard->priority = getReplicasPriority(replicas, fqdn_name, get_urand(random_engine));
all_shards.emplace_back(task_shard);
}
// Sort by priority
std::sort(all_shards.begin(), all_shards.end(),
[](const TaskShardPtr & lhs, const TaskShardPtr & rhs)
{
return ShardPriority::greaterPriority(lhs->priority, rhs->priority);
});
// Cut local shards
auto it_first_remote = std::lower_bound(all_shards.begin(), all_shards.end(), 1,
[](const TaskShardPtr & lhs, UInt8 is_remote)
{
return lhs->priority.is_remote < is_remote;
});
local_shards.assign(all_shards.begin(), it_first_remote);
}
inline ASTPtr TaskTable::rewriteReplicatedCreateQueryToPlain() const
{
ASTPtr prev_engine_push_ast = engine_push_ast->clone();
auto & new_storage_ast = prev_engine_push_ast->as<ASTStorage &>();
auto & new_engine_ast = new_storage_ast.engine->as<ASTFunction &>();
/// Remove "Replicated" from name
new_engine_ast.name = new_engine_ast.name.substr(10);
if (new_engine_ast.arguments)
{
auto & replicated_table_arguments = new_engine_ast.arguments->children;
/// In some cases of Atomic database engine usage ReplicatedMergeTree tables
/// could be created without arguments.
if (!replicated_table_arguments.empty())
{
/// Delete first two arguments of Replicated...MergeTree() table.
replicated_table_arguments.erase(replicated_table_arguments.begin());
replicated_table_arguments.erase(replicated_table_arguments.begin());
}
}
return new_storage_ast.clone();
}
inline String DB::TaskShard::getDescription() const
{
return fmt::format("N{} (having a replica {}, pull table {} of cluster {}",
numberInCluster(), getHostNameExample(), getQuotedTable(task_table.table_pull), task_table.cluster_pull_name);
}
inline String DB::TaskShard::getHostNameExample() const
{
const auto & replicas = task_table.cluster_pull->getShardsAddresses().at(indexInCluster());
return replicas.at(0).readableString();
}
}

View File

@ -10,23 +10,34 @@ namespace DB
namespace
{
struct QueryTreeNodeHash
struct QueryTreeNodeWithHash
{
size_t operator()(const IQueryTreeNode * node) const
explicit QueryTreeNodeWithHash(const IQueryTreeNode * node_)
: node(node_)
, hash(node->getTreeHash().first)
{}
const IQueryTreeNode * node = nullptr;
size_t hash = 0;
};
struct QueryTreeNodeWithHashHash
{
size_t operator()(const QueryTreeNodeWithHash & node_with_hash) const
{
return node->getTreeHash().first;
return node_with_hash.hash;
}
};
struct QueryTreeNodeEqualTo
struct QueryTreeNodeWithHashEqualTo
{
size_t operator()(const IQueryTreeNode * lhs_node, const IQueryTreeNode * rhs_node) const
bool operator()(const QueryTreeNodeWithHash & lhs_node, const QueryTreeNodeWithHash & rhs_node) const
{
return lhs_node->isEqual(*rhs_node);
return lhs_node.hash == rhs_node.hash && lhs_node.node->isEqual(*rhs_node.node);
}
};
using QueryTreeNodeSet = std::unordered_set<const IQueryTreeNode *, QueryTreeNodeHash, QueryTreeNodeEqualTo>;
using QueryTreeNodeWithHashSet = std::unordered_set<QueryTreeNodeWithHash, QueryTreeNodeWithHashHash, QueryTreeNodeWithHashEqualTo>;
class OrderByLimitByDuplicateEliminationVisitor : public InDepthQueryTreeVisitor<OrderByLimitByDuplicateEliminationVisitor>
{
@ -82,7 +93,7 @@ public:
}
private:
QueryTreeNodeSet unique_expressions_nodes_set;
QueryTreeNodeWithHashSet unique_expressions_nodes_set;
};
}

View File

@ -2,6 +2,7 @@
#include <arpa/inet.h>
#include <sys/select.h>
#include <Common/Exception.h>
#include <Common/logger_useful.h>
#include "ares.h"
#include "netdb.h"
@ -40,6 +41,8 @@ namespace DB
}
}
std::mutex CaresPTRResolver::mutex;
CaresPTRResolver::CaresPTRResolver(CaresPTRResolver::provider_token) : channel(nullptr)
{
/*
@ -73,6 +76,8 @@ namespace DB
std::unordered_set<std::string> CaresPTRResolver::resolve(const std::string & ip)
{
std::lock_guard guard(mutex);
std::unordered_set<std::string> ptr_records;
resolve(ip, ptr_records);
@ -83,6 +88,8 @@ namespace DB
std::unordered_set<std::string> CaresPTRResolver::resolve_v6(const std::string & ip)
{
std::lock_guard guard(mutex);
std::unordered_set<std::string> ptr_records;
resolve_v6(ip, ptr_records);
@ -110,23 +117,83 @@ namespace DB
void CaresPTRResolver::wait()
{
timeval * tvp, tv;
fd_set read_fds;
fd_set write_fds;
int nfds;
int sockets[ARES_GETSOCK_MAXNUM];
pollfd pollfd[ARES_GETSOCK_MAXNUM];
for (;;)
while (true)
{
FD_ZERO(&read_fds);
FD_ZERO(&write_fds);
nfds = ares_fds(channel, &read_fds,&write_fds);
if (nfds == 0)
auto readable_sockets = get_readable_sockets(sockets, pollfd);
auto timeout = calculate_timeout();
int number_of_fds_ready = 0;
if (!readable_sockets.empty())
{
number_of_fds_ready = poll(readable_sockets.data(), readable_sockets.size(), static_cast<int>(timeout));
}
if (number_of_fds_ready > 0)
{
process_readable_sockets(readable_sockets);
}
else
{
process_possible_timeout();
break;
}
}
}
std::span<pollfd> CaresPTRResolver::get_readable_sockets(int * sockets, pollfd * pollfd)
{
int sockets_bitmask = ares_getsock(channel, sockets, ARES_GETSOCK_MAXNUM);
int number_of_sockets_to_poll = 0;
for (int i = 0; i < ARES_GETSOCK_MAXNUM; i++, number_of_sockets_to_poll++)
{
pollfd[i].events = 0;
pollfd[i].revents = 0;
if (ARES_GETSOCK_READABLE(sockets_bitmask, i))
{
pollfd[i].fd = sockets[i];
pollfd[i].events = POLLIN;
}
else
{
break;
}
tvp = ares_timeout(channel, nullptr, &tv);
select(nfds, &read_fds, &write_fds, nullptr, tvp);
ares_process(channel, &read_fds, &write_fds);
}
return std::span<struct pollfd>(pollfd, number_of_sockets_to_poll);
}
int64_t CaresPTRResolver::calculate_timeout()
{
timeval tv;
if (auto * tvp = ares_timeout(channel, nullptr, &tv))
{
auto timeout = tvp->tv_sec * 1000 + tvp->tv_usec / 1000;
return timeout;
}
return 0;
}
void CaresPTRResolver::process_possible_timeout()
{
/* Call ares_process() unconditonally here, even if we simply timed out
above, as otherwise the ares name resolve won't timeout! */
ares_process_fd(channel, ARES_SOCKET_BAD, ARES_SOCKET_BAD);
}
void CaresPTRResolver::process_readable_sockets(std::span<pollfd> readable_sockets)
{
for (auto readable_socket : readable_sockets)
{
auto fd = readable_socket.revents & POLLIN ? readable_socket.fd : ARES_SOCKET_BAD;
ares_process_fd(channel, fd, ARES_SOCKET_BAD);
}
}
}

View File

@ -1,5 +1,8 @@
#pragma once
#include <span>
#include <poll.h>
#include <mutex>
#include "DNSPTRResolver.h"
using ares_channel = struct ares_channeldata *;
@ -20,7 +23,6 @@ namespace DB
* Allow only DNSPTRProvider to instantiate this class
* */
struct provider_token {};
public:
explicit CaresPTRResolver(provider_token);
~CaresPTRResolver() override;
@ -36,7 +38,17 @@ namespace DB
void resolve_v6(const std::string & ip, std::unordered_set<std::string> & response);
std::span<pollfd> get_readable_sockets(int * sockets, pollfd * pollfd);
int64_t calculate_timeout();
void process_possible_timeout();
void process_readable_sockets(std::span<pollfd> readable_sockets);
ares_channel channel;
static std::mutex mutex;
};
}

View File

@ -5,8 +5,10 @@ namespace DB
{
std::shared_ptr<DNSPTRResolver> DNSPTRResolverProvider::get()
{
return std::make_shared<CaresPTRResolver>(
static auto resolver = std::make_shared<CaresPTRResolver>(
CaresPTRResolver::provider_token {}
);
return resolver;
}
}

View File

@ -13,9 +13,9 @@ namespace DB
using Checksum = CityHash_v1_0_2::uint128;
CompressionCodecPtr getCompressionCodecForFile(const DataPartStoragePtr & data_part_storage, const String & relative_path)
CompressionCodecPtr getCompressionCodecForFile(const IDataPartStorage & data_part_storage, const String & relative_path)
{
auto read_buffer = data_part_storage->readFile(relative_path, {}, std::nullopt, std::nullopt);
auto read_buffer = data_part_storage.readFile(relative_path, {}, std::nullopt, std::nullopt);
read_buffer->ignore(sizeof(Checksum));
UInt8 header_size = ICompressionCodec::getHeaderSize();

View File

@ -11,6 +11,6 @@ namespace DB
/// clickhouse fashion (with checksums, headers for each block, etc). This
/// method should be used as fallback when we cannot deduce compression codec
/// from metadata.
CompressionCodecPtr getCompressionCodecForFile(const DataPartStoragePtr & data_part_storage, const String & relative_path);
CompressionCodecPtr getCompressionCodecForFile(const IDataPartStorage & data_part_storage, const String & relative_path);
}

View File

@ -1049,7 +1049,7 @@ INSTANTIATE_TEST_SUITE_P(RandomInt,
::testing::Combine(
DefaultCodecsToTest,
::testing::Values(
generateSeq<UInt8 >(G(RandomGenerator<UInt8>(0))),
generateSeq<UInt8 >(G(RandomGenerator<uint8_t>(0))),
generateSeq<UInt16>(G(RandomGenerator<UInt16>(0))),
generateSeq<UInt32>(G(RandomGenerator<UInt32>(0, 0, 1000'000'000))),
generateSeq<UInt64>(G(RandomGenerator<UInt64>(0, 0, 1000'000'000)))

View File

@ -69,7 +69,7 @@ static std::ostream & operator<<(std::ostream & ostr, const JSONPathAndValue & p
bool first = true;
for (const auto & part : path_and_value.path.getParts())
{
ostr << (first ? "{" : ", {") << part.key << ", " << part.is_nested << ", " << part.anonymous_array_level << "}";
ostr << (first ? "{" : ", {") << part.key << ", " << part.is_nested << ", " << static_cast<uint8_t>(part.anonymous_array_level) << "}";
first = false;
}

View File

@ -31,4 +31,33 @@ VolumePtr IStoragePolicy::getVolumeByName(const String & volume_name) const
return volume;
}
size_t IStoragePolicy::getVolumeIndexByDiskName(const String & disk_name) const
{
auto index = tryGetVolumeIndexByDiskName(disk_name);
if (!index)
throw Exception(ErrorCodes::UNKNOWN_DISK,
"No disk {} in policy {}", backQuote(disk_name), backQuote(getName()));
return *index;
}
VolumePtr IStoragePolicy::tryGetVolumeByDiskName(const String & disk_name) const
{
auto index = tryGetVolumeIndexByDiskName(disk_name);
if (!index)
return nullptr;
return getVolume(*index);
}
VolumePtr IStoragePolicy::getVolumeByDiskName(const String & disk_name) const
{
auto volume = tryGetVolumeByDiskName(disk_name);
if (!volume)
throw Exception(ErrorCodes::UNKNOWN_DISK,
"No disk {} in policy {}", backQuote(disk_name), backQuote(getName()));
return volume;
}
}

View File

@ -4,6 +4,7 @@
#include <memory>
#include <vector>
#include <optional>
#include <base/types.h>
namespace DB
@ -55,12 +56,15 @@ public:
/// Get volume by index.
virtual VolumePtr getVolume(size_t index) const = 0;
virtual VolumePtr tryGetVolumeByName(const String & volume_name) const = 0;
virtual VolumePtr tryGetVolumeByDisk(const DiskPtr & disk_ptr) const = 0;
VolumePtr getVolumeByName(const String & volume_name) const;
/// Checks if storage policy can be replaced by another one.
virtual void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const = 0;
/// Find volume index, which contains disk
virtual size_t getVolumeIndexByDisk(const DiskPtr & disk_ptr) const = 0;
/// Finds a volume index, which contains disk
virtual std::optional<size_t> tryGetVolumeIndexByDiskName(const String & disk_name) const = 0;
size_t getVolumeIndexByDiskName(const String & disk_name) const;
/// Finds a volume which contains a specified disk.
VolumePtr tryGetVolumeByDiskName(const String & disk_name) const;
VolumePtr getVolumeByDiskName(const String & disk_name) const;
/// Check if we have any volume with stopped merges
virtual bool hasAnyVolumeWithDisabledMerges() const = 0;
virtual bool containsVolume(const String & volume_name) const = 0;

View File

@ -599,7 +599,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorageTransaction::writeFile
auto write_operation = std::make_unique<WriteFileObjectStorageOperation>(object_storage, metadata_storage, object);
std::function<void(size_t count)> create_metadata_callback;
if (autocommit)
if (autocommit)
{
create_metadata_callback = [tx = shared_from_this(), mode, path, blob_name] (size_t count)
{

View File

@ -26,7 +26,6 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
extern const int NO_ELEMENTS_IN_CONFIG;
extern const int UNKNOWN_DISK;
extern const int UNKNOWN_POLICY;
extern const int UNKNOWN_VOLUME;
extern const int LOGICAL_ERROR;
@ -311,22 +310,12 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol
}
size_t StoragePolicy::getVolumeIndexByDisk(const DiskPtr & disk_ptr) const
std::optional<size_t> StoragePolicy::tryGetVolumeIndexByDiskName(const String & disk_name) const
{
auto it = volume_index_by_disk_name.find(disk_ptr->getName());
auto it = volume_index_by_disk_name.find(disk_name);
if (it != volume_index_by_disk_name.end())
return it->second;
else
throw Exception("No disk " + backQuote(disk_ptr->getName()) + " in policy " + backQuote(name), ErrorCodes::UNKNOWN_DISK);
}
VolumePtr StoragePolicy::tryGetVolumeByDisk(const DiskPtr & disk_ptr) const
{
auto it = volume_index_by_disk_name.find(disk_ptr->getName());
if (it == volume_index_by_disk_name.end())
return nullptr;
return getVolume(it->second);
return {};
}

View File

@ -68,7 +68,7 @@ public:
ReservationPtr reserve(UInt64 bytes, size_t min_volume_index) const override;
/// Find volume index, which contains disk
size_t getVolumeIndexByDisk(const DiskPtr & disk_ptr) const override;
std::optional<size_t> tryGetVolumeIndexByDiskName(const String & disk_name) const override;
/// Reserves 0 bytes on disk with max available space
/// Do not use this function when it is possible to predict size.
@ -85,9 +85,6 @@ public:
VolumePtr tryGetVolumeByName(const String & volume_name) const override;
/// Finds a volume which contains a specified disk.
VolumePtr tryGetVolumeByDisk(const DiskPtr & disk_ptr) const override;
/// Checks if storage policy can be replaced by another one.
void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const override;

View File

@ -43,10 +43,34 @@ using FunctionCutToFirstSignificantSubdomainWithWWWRFC = FunctionStringToString<
REGISTER_FUNCTION(CutToFirstSignificantSubdomain)
{
factory.registerFunction<FunctionCutToFirstSignificantSubdomain>();
factory.registerFunction<FunctionCutToFirstSignificantSubdomainWithWWW>();
factory.registerFunction<FunctionCutToFirstSignificantSubdomainRFC>();
factory.registerFunction<FunctionCutToFirstSignificantSubdomainWithWWWRFC>();
factory.registerFunction<FunctionCutToFirstSignificantSubdomain>(
{
R"(Returns the part of the domain that includes top-level subdomains up to the "first significant subdomain" (see documentation of the `firstSignificantSubdomain`).)",
Documentation::Examples{
{"cutToFirstSignificantSubdomain1", "SELECT cutToFirstSignificantSubdomain('https://news.clickhouse.com.tr/')"},
{"cutToFirstSignificantSubdomain2", "SELECT cutToFirstSignificantSubdomain('www.tr')"},
{"cutToFirstSignificantSubdomain3", "SELECT cutToFirstSignificantSubdomain('tr')"},
},
Documentation::Categories{"URL"}
});
factory.registerFunction<FunctionCutToFirstSignificantSubdomainWithWWW>(
{
R"(Returns the part of the domain that includes top-level subdomains up to the "first significant subdomain", without stripping "www".)",
Documentation::Examples{},
Documentation::Categories{"URL"}
});
factory.registerFunction<FunctionCutToFirstSignificantSubdomainRFC>(
{
R"(Similar to `cutToFirstSignificantSubdomain` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
Documentation::Examples{},
Documentation::Categories{"URL"}
});
factory.registerFunction<FunctionCutToFirstSignificantSubdomainWithWWWRFC>(
{
R"(Similar to `cutToFirstSignificantSubdomainWithWWW` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
Documentation::Examples{},
Documentation::Categories{"URL"}
});
}
}

View File

@ -42,10 +42,41 @@ using FunctionCutToFirstSignificantSubdomainCustomWithWWWRFC = FunctionCutToFirs
REGISTER_FUNCTION(CutToFirstSignificantSubdomainCustom)
{
factory.registerFunction<FunctionCutToFirstSignificantSubdomainCustom>();
factory.registerFunction<FunctionCutToFirstSignificantSubdomainCustomWithWWW>();
factory.registerFunction<FunctionCutToFirstSignificantSubdomainCustomRFC>();
factory.registerFunction<FunctionCutToFirstSignificantSubdomainCustomWithWWWRFC>();
factory.registerFunction<FunctionCutToFirstSignificantSubdomainCustom>(
{
R"(
Returns the part of the domain that includes top-level subdomains up to the first significant subdomain. Accepts custom TLD list name.
Can be useful if you need fresh TLD list or you have custom.
)",
Documentation::Examples{
{"cutToFirstSignificantSubdomainCustom", "SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');"},
},
Documentation::Categories{"URL"}
});
factory.registerFunction<FunctionCutToFirstSignificantSubdomainCustomWithWWW>(
{
R"(
Returns the part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`.
Accepts custom TLD list name from config.
Can be useful if you need fresh TLD list or you have custom.
)",
Documentation::Examples{{"cutToFirstSignificantSubdomainCustomWithWWW", "SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')"}},
Documentation::Categories{"URL"}
});
factory.registerFunction<FunctionCutToFirstSignificantSubdomainCustomRFC>(
{
R"(Similar to `cutToFirstSignificantSubdomainCustom` but follows stricter rules according to RFC 3986.)",
Documentation::Examples{},
Documentation::Categories{"URL"}
});
factory.registerFunction<FunctionCutToFirstSignificantSubdomainCustomWithWWWRFC>(
{
R"(Similar to `cutToFirstSignificantSubdomainCustomWithWWW` but follows stricter rules according to RFC 3986.)",
Documentation::Examples{},
Documentation::Categories{"URL"}
});
}
}

View File

@ -14,8 +14,24 @@ using FunctionDomainRFC = FunctionStringToString<ExtractSubstringImpl<ExtractDom
REGISTER_FUNCTION(Domain)
{
factory.registerFunction<FunctionDomain>();
factory.registerFunction<FunctionDomainRFC>();
factory.registerFunction<FunctionDomain>(
{
R"(
Extracts the hostname from a URL.
The URL can be specified with or without a scheme.
If the argument can't be parsed as URL, the function returns an empty string.
)",
Documentation::Examples{{"domain", "SELECT domain('svn+ssh://some.svn-hosting.com:80/repo/trunk')"}},
Documentation::Categories{"URL"}
});
factory.registerFunction<FunctionDomainRFC>(
{
R"(Similar to `domain` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
Documentation::Examples{},
Documentation::Categories{"URL"}
});
}
}

View File

@ -14,8 +14,23 @@ using FunctionDomainWithoutWWWRFC = FunctionStringToString<ExtractSubstringImpl<
REGISTER_FUNCTION(DomainWithoutWWW)
{
factory.registerFunction<FunctionDomainWithoutWWW>();
factory.registerFunction<FunctionDomainWithoutWWWRFC>();
factory.registerFunction<FunctionDomainWithoutWWW>(
{
R"(
Extracts the hostname from a URL, removing the leading "www." if present.
The URL can be specified with or without a scheme.
If the argument can't be parsed as URL, the function returns an empty string.
)",
Documentation::Examples{{"domainWithoutWWW", "SELECT domainWithoutWWW('https://www.clickhouse.com')"}},
Documentation::Categories{"URL"}
});
factory.registerFunction<FunctionDomainWithoutWWWRFC>(
{
R"(Similar to `domainWithoutWWW` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
Documentation::Examples{},
Documentation::Categories{"URL"}
});
}
}

View File

@ -14,8 +14,28 @@ using FunctionFirstSignificantSubdomainRFC = FunctionStringToString<ExtractSubst
REGISTER_FUNCTION(FirstSignificantSubdomain)
{
factory.registerFunction<FunctionFirstSignificantSubdomain>();
factory.registerFunction<FunctionFirstSignificantSubdomainRFC>();
factory.registerFunction<FunctionFirstSignificantSubdomain>(
{
R"(
Returns the "first significant subdomain".
The first significant subdomain is a second-level domain if it is 'com', 'net', 'org', or 'co'.
Otherwise, it is a third-level domain.
For example, firstSignificantSubdomain('https://news.clickhouse.com/') = 'clickhouse', firstSignificantSubdomain ('https://news.clickhouse.com.tr/') = 'clickhouse'.
The list of "insignificant" second-level domains and other implementation details may change in the future.
)",
Documentation::Examples{{"firstSignificantSubdomain", "SELECT firstSignificantSubdomain('https://news.clickhouse.com/')"}},
Documentation::Categories{"URL"}
});
factory.registerFunction<FunctionFirstSignificantSubdomainRFC>(
{
R"(Returns the "first significant subdomain" according to RFC 1034.)",
Documentation::Examples{},
Documentation::Categories{"URL"}
});
}
}

View File

@ -139,8 +139,18 @@ struct FunctionPortRFC : public FunctionPortImpl<true>
REGISTER_FUNCTION(Port)
{
factory.registerFunction<FunctionPort>();
factory.registerFunction<FunctionPortRFC>();
factory.registerFunction<FunctionPort>(
{
R"(Returns the port or `default_port` if there is no port in the URL (or in case of validation error).)",
Documentation::Examples{},
Documentation::Categories{"URL"}
});
factory.registerFunction<FunctionPortRFC>(
{
R"(Similar to `port`, but conforms to RFC 3986.)",
Documentation::Examples{},
Documentation::Categories{"URL"}
});
}
}

View File

@ -53,8 +53,23 @@ using FunctionTopLevelDomainRFC = FunctionStringToString<ExtractSubstringImpl<Ex
REGISTER_FUNCTION(TopLevelDomain)
{
factory.registerFunction<FunctionTopLevelDomain>();
factory.registerFunction<FunctionTopLevelDomainRFC>();
factory.registerFunction<FunctionTopLevelDomain>(
{
R"(
Extracts the the top-level domain from a URL.
Returns an empty string if the argument cannot be parsed as a URL or does not contain a top-level domain.
)",
Documentation::Examples{{"topLevelDomain", "SELECT topLevelDomain('svn+ssh://www.some.svn-hosting.com:80/repo/trunk')"}},
Documentation::Categories{"URL"}
});
factory.registerFunction<FunctionTopLevelDomainRFC>(
{
R"(Similar to topLevelDomain, but conforms to RFC 3986.)",
Documentation::Examples{},
Documentation::Categories{"URL"}
});
}
}

View File

@ -213,7 +213,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue
{
/// And for simple MergeTree we can stop merges before acquiring the lock
auto merges_blocker = table->getActionLock(ActionLocks::PartsMerge);
auto table_lock = table->lockExclusively(context_->getCurrentQueryId(), context_->getSettingsRef().lock_acquire_timeout);
table_lock = table->lockExclusively(context_->getCurrentQueryId(), context_->getSettingsRef().lock_acquire_timeout);
}
auto metadata_snapshot = table->getInMemoryMetadataPtr();

View File

@ -207,8 +207,8 @@ bool PartLog::addNewParts(
elem.table_name = table_id.table_name;
elem.partition_id = part->info.partition_id;
elem.part_name = part->name;
elem.disk_name = part->data_part_storage->getDiskName();
elem.path_on_disk = part->data_part_storage->getFullPath();
elem.disk_name = part->getDataPartStorage().getDiskName();
elem.path_on_disk = part->getDataPartStorage().getFullPath();
elem.part_type = part->getType();
elem.bytes_compressed_on_disk = part->getBytesOnDisk();

View File

@ -1,7 +1,5 @@
#include <Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h>
#include <Columns/ColumnAggregateFunction.h>
#include <Common/AlignedBuffer.h>
#include <DataTypes/DataTypeAggregateFunction.h>
#include <DataTypes/DataTypeCustomSimpleAggregateFunction.h>
#include <DataTypes/DataTypeLowCardinality.h>
@ -18,70 +16,6 @@ AggregatingSortedAlgorithm::ColumnsDefinition::ColumnsDefinition() = default;
AggregatingSortedAlgorithm::ColumnsDefinition::ColumnsDefinition(ColumnsDefinition &&) noexcept = default;
AggregatingSortedAlgorithm::ColumnsDefinition::~ColumnsDefinition() = default;
/// Stores information for aggregation of AggregateFunction columns
struct AggregatingSortedAlgorithm::AggregateDescription
{
ColumnAggregateFunction * column = nullptr;
const size_t column_number = 0; /// Position in header.
AggregateDescription() = default;
explicit AggregateDescription(size_t col_number) : column_number(col_number) {}
};
/// Stores information for aggregation of SimpleAggregateFunction columns
struct AggregatingSortedAlgorithm::SimpleAggregateDescription
{
/// An aggregate function 'anyLast', 'sum'...
AggregateFunctionPtr function;
IAggregateFunction::AddFunc add_function = nullptr;
size_t column_number = 0;
IColumn * column = nullptr;
/// For LowCardinality, convert is converted to nested type. nested_type is nullptr if no conversion needed.
const DataTypePtr nested_type; /// Nested type for LowCardinality, if it is.
const DataTypePtr real_type; /// Type in header.
AlignedBuffer state;
bool created = false;
SimpleAggregateDescription(
AggregateFunctionPtr function_, const size_t column_number_,
DataTypePtr nested_type_, DataTypePtr real_type_)
: function(std::move(function_)), column_number(column_number_)
, nested_type(std::move(nested_type_)), real_type(std::move(real_type_))
{
add_function = function->getAddressOfAddFunction();
state.reset(function->sizeOfData(), function->alignOfData());
}
void createState()
{
if (created)
return;
function->create(state.data());
created = true;
}
void destroyState()
{
if (!created)
return;
function->destroy(state.data());
created = false;
}
/// Explicitly destroy aggregation state if the stream is terminated
~SimpleAggregateDescription()
{
destroyState();
}
SimpleAggregateDescription() = default;
SimpleAggregateDescription(SimpleAggregateDescription &&) = default;
SimpleAggregateDescription(const SimpleAggregateDescription &) = delete;
};
static AggregatingSortedAlgorithm::ColumnsDefinition defineColumns(
const Block & header, const SortDescription & description)
{
@ -191,6 +125,39 @@ static void postprocessChunk(Chunk & chunk, const AggregatingSortedAlgorithm::Co
}
AggregatingSortedAlgorithm::SimpleAggregateDescription::SimpleAggregateDescription(
AggregateFunctionPtr function_, const size_t column_number_,
DataTypePtr nested_type_, DataTypePtr real_type_)
: function(std::move(function_)), column_number(column_number_)
, nested_type(std::move(nested_type_)), real_type(std::move(real_type_))
{
add_function = function->getAddressOfAddFunction();
state.reset(function->sizeOfData(), function->alignOfData());
}
void AggregatingSortedAlgorithm::SimpleAggregateDescription::createState()
{
if (created)
return;
function->create(state.data());
created = true;
}
void AggregatingSortedAlgorithm::SimpleAggregateDescription::destroyState()
{
if (!created)
return;
function->destroy(state.data());
created = false;
}
/// Explicitly destroy aggregation state if the stream is terminated
AggregatingSortedAlgorithm::SimpleAggregateDescription::~SimpleAggregateDescription()
{
destroyState();
}
AggregatingSortedAlgorithm::AggregatingMergedData::AggregatingMergedData(
MutableColumns columns_, UInt64 max_block_size_, ColumnsDefinition & def_)
: MergedData(std::move(columns_), false, max_block_size_), def(def_)

View File

@ -1,5 +1,7 @@
#pragma once
#include <Columns/ColumnAggregateFunction.h>
#include <Common/AlignedBuffer.h>
#include <Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h>
#include <Processors/Merges/Algorithms/MergedData.h>
@ -23,8 +25,48 @@ public:
void consume(Input & input, size_t source_num) override;
Status merge() override;
struct SimpleAggregateDescription;
struct AggregateDescription;
/// Stores information for aggregation of SimpleAggregateFunction columns
struct SimpleAggregateDescription
{
/// An aggregate function 'anyLast', 'sum'...
AggregateFunctionPtr function;
IAggregateFunction::AddFunc add_function = nullptr;
size_t column_number = 0;
IColumn * column = nullptr;
/// For LowCardinality, convert is converted to nested type. nested_type is nullptr if no conversion needed.
const DataTypePtr nested_type; /// Nested type for LowCardinality, if it is.
const DataTypePtr real_type; /// Type in header.
AlignedBuffer state;
bool created = false;
SimpleAggregateDescription(
AggregateFunctionPtr function_, const size_t column_number_,
DataTypePtr nested_type_, DataTypePtr real_type_);
void createState();
void destroyState();
/// Explicitly destroy aggregation state if the stream is terminated
~SimpleAggregateDescription();
SimpleAggregateDescription() = default;
SimpleAggregateDescription(SimpleAggregateDescription &&) = default;
SimpleAggregateDescription(const SimpleAggregateDescription &) = delete;
};
/// Stores information for aggregation of AggregateFunction columns
struct AggregateDescription
{
ColumnAggregateFunction * column = nullptr;
const size_t column_number = 0; /// Position in header.
AggregateDescription() = default;
explicit AggregateDescription(size_t col_number) : column_number(col_number) {}
};
/// This structure define columns into one of three types:
/// * columns which are not aggregate functions and not needed to be aggregated

View File

@ -23,10 +23,6 @@ namespace ErrorCodes
extern const int CORRUPTED_DATA;
}
SummingSortedAlgorithm::ColumnsDefinition::ColumnsDefinition() = default;
SummingSortedAlgorithm::ColumnsDefinition::ColumnsDefinition(ColumnsDefinition &&) noexcept = default;
SummingSortedAlgorithm::ColumnsDefinition::~ColumnsDefinition() = default;
/// Stores numbers of key-columns and value-columns.
struct SummingSortedAlgorithm::MapDescription
{
@ -777,4 +773,8 @@ IMergingAlgorithm::Status SummingSortedAlgorithm::merge()
return Status(merged_data.pull(), true);
}
SummingSortedAlgorithm::ColumnsDefinition::ColumnsDefinition() = default;
SummingSortedAlgorithm::ColumnsDefinition::ColumnsDefinition(ColumnsDefinition &&) noexcept = default;
SummingSortedAlgorithm::ColumnsDefinition::~ColumnsDefinition() = default;
}

View File

@ -6,12 +6,12 @@
#include <IO/ReadBufferFromFileBase.h>
#include <IO/ReadHelpers.h>
#include <Common/logger_useful.h>
#include <Disks/IStoragePolicy.h>
#include <Backups/BackupEntryFromSmallFile.h>
#include <Backups/BackupEntryFromImmutableFile.h>
#include <Storages/MergeTree/localBackup.h>
#include <Disks/SingleDiskVolume.h>
#include <Interpreters/TransactionVersionMetadata.h>
#include <memory>
namespace DB
{
@ -29,6 +29,16 @@ DataPartStorageOnDisk::DataPartStorageOnDisk(VolumePtr volume_, std::string root
{
}
DataPartStorageOnDisk::DataPartStorageOnDisk(
VolumePtr volume_, std::string root_path_, std::string part_dir_, DiskTransactionPtr transaction_)
: volume(std::move(volume_))
, root_path(std::move(root_path_))
, part_dir(std::move(part_dir_))
, transaction(std::move(transaction_))
, has_shared_transaction(transaction != nullptr)
{
}
std::string DataPartStorageOnDisk::getFullPath() const
{
return fs::path(volume->getDisk()->getPath()) / root_path / part_dir / "";
@ -49,6 +59,11 @@ std::string DataPartStorageOnDisk::getFullRootPath() const
return fs::path(volume->getDisk()->getPath()) / root_path / "";
}
MutableDataPartStoragePtr DataPartStorageOnDisk::getProjection(const std::string & name)
{
return std::shared_ptr<DataPartStorageOnDisk>(new DataPartStorageOnDisk(volume, std::string(fs::path(root_path) / part_dir), name, transaction));
}
DataPartStoragePtr DataPartStorageOnDisk::getProjection(const std::string & name) const
{
return std::make_shared<DataPartStorageOnDisk>(volume, std::string(fs::path(root_path) / part_dir), name);
@ -113,6 +128,7 @@ static UInt64 calculateTotalSizeOnDiskImpl(const DiskPtr & disk, const String &
{
if (disk->isFile(from))
return disk->getFileSize(from);
std::vector<std::string> files;
disk->listFiles(from, files);
UInt64 res = 0;
@ -135,75 +151,11 @@ std::unique_ptr<ReadBufferFromFileBase> DataPartStorageOnDisk::readFile(
return volume->getDisk()->readFile(fs::path(root_path) / part_dir / name, settings, read_hint, file_size);
}
static std::unique_ptr<ReadBufferFromFileBase> openForReading(const DiskPtr & disk, const String & path)
{
size_t file_size = disk->getFileSize(path);
return disk->readFile(path, ReadSettings().adjustBufferSize(file_size), file_size);
}
void DataPartStorageOnDisk::loadVersionMetadata(VersionMetadata & version, Poco::Logger * log) const
{
std::string version_file_name = fs::path(root_path) / part_dir / "txn_version.txt";
String tmp_version_file_name = version_file_name + ".tmp";
DiskPtr disk = volume->getDisk();
auto remove_tmp_file = [&]()
{
auto last_modified = disk->getLastModified(tmp_version_file_name);
auto buf = openForReading(disk, tmp_version_file_name);
String content;
readStringUntilEOF(content, *buf);
LOG_WARNING(log, "Found file {} that was last modified on {}, has size {} and the following content: {}",
tmp_version_file_name, last_modified.epochTime(), content.size(), content);
disk->removeFile(tmp_version_file_name);
};
if (disk->exists(version_file_name))
{
auto buf = openForReading(disk, version_file_name);
version.read(*buf);
if (disk->exists(tmp_version_file_name))
remove_tmp_file();
return;
}
/// Four (?) cases are possible:
/// 1. Part was created without transactions.
/// 2. Version metadata file was not renamed from *.tmp on part creation.
/// 3. Version metadata were written to *.tmp file, but hard restart happened before fsync.
/// 4. Fsyncs in storeVersionMetadata() work incorrectly.
if (!disk->exists(tmp_version_file_name))
{
/// Case 1.
/// We do not have version metadata and transactions history for old parts,
/// so let's consider that such parts were created by some ancient transaction
/// and were committed with some prehistoric CSN.
/// NOTE It might be Case 3, but version metadata file is written on part creation before other files,
/// so it's not Case 3 if part is not broken.
version.setCreationTID(Tx::PrehistoricTID, nullptr);
version.creation_csn = Tx::PrehistoricCSN;
return;
}
/// Case 2.
/// Content of *.tmp file may be broken, just use fake TID.
/// Transaction was not committed if *.tmp file was not renamed, so we should complete rollback by removing part.
version.setCreationTID(Tx::DummyTID, nullptr);
version.creation_csn = Tx::RolledBackCSN;
remove_tmp_file();
}
void DataPartStorageOnDisk::checkConsistency(const MergeTreeDataPartChecksums & checksums) const
{
checksums.checkSizes(volume->getDisk(), getRelativePath());
}
DataPartStorageBuilderPtr DataPartStorageOnDisk::getBuilder() const
{
return std::make_shared<DataPartStorageBuilderOnDisk>(volume, root_path, part_dir);
}
void DataPartStorageOnDisk::remove(
CanRemoveCallback && can_remove_callback,
const MergeTreeDataPartChecksums & checksums,
@ -273,7 +225,7 @@ void DataPartStorageOnDisk::remove(
try
{
disk->moveDirectory(from, to);
onRename(root_path, part_dir_without_slash);
part_dir = part_dir_without_slash;
}
catch (const Exception & e)
{
@ -488,11 +440,6 @@ bool DataPartStorageOnDisk::looksLikeBrokenDetachedPartHasTheSameContent(const S
return original_files_list == detached_files_list;
}
void DataPartStorageBuilderOnDisk::setRelativePath(const std::string & path)
{
part_dir = path;
}
std::string DataPartStorageOnDisk::getDiskName() const
{
return volume->getDisk()->getName();
@ -523,7 +470,7 @@ bool DataPartStorageOnDisk::isBroken() const
return volume->getDisk()->isBroken();
}
void DataPartStorageOnDisk::syncRevision(UInt64 revision)
void DataPartStorageOnDisk::syncRevision(UInt64 revision) const
{
volume->getDisk()->syncRevision(revision);
}
@ -543,11 +490,6 @@ std::string DataPartStorageOnDisk::getDiskPath() const
return volume->getDisk()->getPath();
}
DataPartStorageOnDisk::DisksSet::const_iterator DataPartStorageOnDisk::isStoredOnDisk(const DisksSet & disks) const
{
return disks.find(volume->getDisk());
}
ReservationPtr DataPartStorageOnDisk::reserve(UInt64 bytes) const
{
auto res = volume->reserve(bytes);
@ -562,159 +504,6 @@ ReservationPtr DataPartStorageOnDisk::tryReserve(UInt64 bytes) const
return volume->reserve(bytes);
}
size_t DataPartStorageOnDisk::getVolumeIndex(const IStoragePolicy & storage_policy) const
{
return storage_policy.getVolumeIndexByDisk(volume->getDisk());
}
void DataPartStorageOnDisk::writeChecksums(const MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const
{
std::string path = fs::path(root_path) / part_dir / "checksums.txt";
try
{
{
auto out = volume->getDisk()->writeFile(path + ".tmp", 4096, WriteMode::Rewrite, settings);
checksums.write(*out);
}
volume->getDisk()->moveFile(path + ".tmp", path);
}
catch (...)
{
try
{
if (volume->getDisk()->exists(path + ".tmp"))
volume->getDisk()->removeFile(path + ".tmp");
}
catch (...)
{
tryLogCurrentException("DataPartStorageOnDisk");
}
throw;
}
}
void DataPartStorageOnDisk::writeColumns(const NamesAndTypesList & columns, const WriteSettings & settings) const
{
std::string path = fs::path(root_path) / part_dir / "columns.txt";
try
{
auto buf = volume->getDisk()->writeFile(path + ".tmp", 4096, WriteMode::Rewrite, settings);
columns.writeText(*buf);
buf->finalize();
volume->getDisk()->moveFile(path + ".tmp", path);
}
catch (...)
{
try
{
if (volume->getDisk()->exists(path + ".tmp"))
volume->getDisk()->removeFile(path + ".tmp");
}
catch (...)
{
tryLogCurrentException("DataPartStorageOnDisk");
}
throw;
}
}
void DataPartStorageOnDisk::writeVersionMetadata(const VersionMetadata & version, bool fsync_part_dir) const
{
std::string path = fs::path(root_path) / part_dir / "txn_version.txt";
try
{
{
/// TODO IDisk interface does not allow to open file with O_EXCL flag (for DiskLocal),
/// so we create empty file at first (expecting that createFile throws if file already exists)
/// and then overwrite it.
volume->getDisk()->createFile(path + ".tmp");
auto buf = volume->getDisk()->writeFile(path + ".tmp", 256);
version.write(*buf);
buf->finalize();
buf->sync();
}
SyncGuardPtr sync_guard;
if (fsync_part_dir)
sync_guard = volume->getDisk()->getDirectorySyncGuard(getRelativePath());
volume->getDisk()->replaceFile(path + ".tmp", path);
}
catch (...)
{
try
{
if (volume->getDisk()->exists(path + ".tmp"))
volume->getDisk()->removeFile(path + ".tmp");
}
catch (...)
{
tryLogCurrentException("DataPartStorageOnDisk");
}
throw;
}
}
void DataPartStorageOnDisk::appendCSNToVersionMetadata(const VersionMetadata & version, VersionMetadata::WhichCSN which_csn) const
{
/// Small enough appends to file are usually atomic,
/// so we append new metadata instead of rewriting file to reduce number of fsyncs.
/// We don't need to do fsync when writing CSN, because in case of hard restart
/// we will be able to restore CSN from transaction log in Keeper.
std::string version_file_name = fs::path(root_path) / part_dir / "txn_version.txt";
DiskPtr disk = volume->getDisk();
auto out = disk->writeFile(version_file_name, 256, WriteMode::Append);
version.writeCSN(*out, which_csn);
out->finalize();
}
void DataPartStorageOnDisk::appendRemovalTIDToVersionMetadata(const VersionMetadata & version, bool clear) const
{
String version_file_name = fs::path(root_path) / part_dir / "txn_version.txt";
DiskPtr disk = volume->getDisk();
auto out = disk->writeFile(version_file_name, 256, WriteMode::Append);
version.writeRemovalTID(*out, clear);
out->finalize();
/// fsync is not required when we clearing removal TID, because after hard restart we will fix metadata
if (!clear)
out->sync();
}
void DataPartStorageOnDisk::writeDeleteOnDestroyMarker(Poco::Logger * log) const
{
String marker_path = fs::path(root_path) / part_dir / "delete-on-destroy.txt";
auto disk = volume->getDisk();
try
{
volume->getDisk()->createFile(marker_path);
}
catch (Poco::Exception & e)
{
LOG_ERROR(log, "{} (while creating DeleteOnDestroy marker: {})", e.what(), backQuote(fullPath(disk, marker_path)));
}
}
void DataPartStorageOnDisk::removeDeleteOnDestroyMarker() const
{
std::string delete_on_destroy_file_name = fs::path(root_path) / part_dir / "delete-on-destroy.txt";
volume->getDisk()->removeFileIfExists(delete_on_destroy_file_name);
}
void DataPartStorageOnDisk::removeVersionMetadata() const
{
std::string version_file_name = fs::path(root_path) / part_dir / "txn_version.txt";
volume->getDisk()->removeFileIfExists(version_file_name);
}
String DataPartStorageOnDisk::getUniqueId() const
{
auto disk = volume->getDisk();
@ -724,16 +513,6 @@ String DataPartStorageOnDisk::getUniqueId() const
return disk->getUniqueId(fs::path(getRelativePath()) / "checksums.txt");
}
bool DataPartStorageOnDisk::shallParticipateInMerges(const IStoragePolicy & storage_policy) const
{
/// `IMergeTreeDataPart::volume` describes space where current part belongs, and holds
/// `SingleDiskVolume` object which does not contain up-to-date settings of corresponding volume.
/// Therefore we shall obtain volume from storage policy.
auto volume_ptr = storage_policy.getVolume(storage_policy.getVolumeIndexByDisk(volume->getDisk()));
return !volume_ptr->areMergesAvoided();
}
void DataPartStorageOnDisk::backup(
const MergeTreeDataPartChecksums & checksums,
const NameSet & files_without_checksums,
@ -798,7 +577,7 @@ void DataPartStorageOnDisk::backup(
}
}
DataPartStoragePtr DataPartStorageOnDisk::freeze(
MutableDataPartStoragePtr DataPartStorageOnDisk::freeze(
const std::string & to,
const std::string & dir_path,
bool make_source_readonly,
@ -822,7 +601,7 @@ DataPartStoragePtr DataPartStorageOnDisk::freeze(
return std::make_shared<DataPartStorageOnDisk>(single_disk_volume, to, dir_path);
}
DataPartStoragePtr DataPartStorageOnDisk::clone(
MutableDataPartStoragePtr DataPartStorageOnDisk::clonePart(
const std::string & to,
const std::string & dir_path,
const DiskPtr & disk,
@ -835,6 +614,7 @@ DataPartStoragePtr DataPartStorageOnDisk::clone(
LOG_WARNING(log, "Path {} already exists. Will remove it and clone again.", fullPath(disk, path_to_clone));
disk->removeRecursive(path_to_clone);
}
disk->createDirectories(to);
volume->getDisk()->copy(getRelativePath(), disk, to);
volume->getDisk()->removeFileIfExists(fs::path(path_to_clone) / "delete-on-destroy.txt");
@ -843,13 +623,7 @@ DataPartStoragePtr DataPartStorageOnDisk::clone(
return std::make_shared<DataPartStorageOnDisk>(single_disk_volume, to, dir_path);
}
void DataPartStorageOnDisk::onRename(const std::string & new_root_path, const std::string & new_part_dir)
{
part_dir = new_part_dir;
root_path = new_root_path;
}
void DataPartStorageBuilderOnDisk::rename(
void DataPartStorageOnDisk::rename(
const std::string & new_root_path,
const std::string & new_part_dir,
Poco::Logger * log,
@ -870,7 +644,7 @@ void DataPartStorageBuilderOnDisk::rename(
"Part directory {} already exists and contains {} files. Removing it.",
fullPath(volume->getDisk(), to), files.size());
transaction->removeRecursive(to);
executeOperation([&](auto & disk) { disk.removeRecursive(to); });
}
else
{
@ -884,8 +658,12 @@ void DataPartStorageBuilderOnDisk::rename(
String from = getRelativePath();
/// Why?
transaction->setLastModified(from, Poco::Timestamp::fromEpochTime(time(nullptr)));
transaction->moveDirectory(from, to);
executeOperation([&](auto & disk)
{
disk.setLastModified(from, Poco::Timestamp::fromEpochTime(time(nullptr)));
disk.moveDirectory(from, to);
});
part_dir = new_part_dir;
root_path = new_root_path;
@ -907,7 +685,7 @@ void DataPartStorageOnDisk::changeRootPath(const std::string & from_root, const
--prefix_size;
if (prefix_size > root_path.size()
|| std::string_view(from_root).substr(0, prefix_size) != std::string_view(root_path).substr(0, prefix_size))
|| std::string_view(from_root).substr(0, prefix_size) != std::string_view(root_path).substr(0, prefix_size))
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot change part root to {} because it is not a prefix of current root {}",
@ -920,51 +698,80 @@ void DataPartStorageOnDisk::changeRootPath(const std::string & from_root, const
root_path = to_root.substr(0, dst_size) + root_path.substr(prefix_size);
}
DataPartStorageBuilderOnDisk::DataPartStorageBuilderOnDisk(
VolumePtr volume_,
std::string root_path_,
std::string part_dir_)
: volume(std::move(volume_))
, root_path(std::move(root_path_))
, part_dir(std::move(part_dir_))
, transaction(volume->getDisk()->createTransaction())
{
}
std::unique_ptr<WriteBufferFromFileBase> DataPartStorageBuilderOnDisk::writeFile(
const String & name,
size_t buf_size,
const WriteSettings & settings)
{
return transaction->writeFile(fs::path(root_path) / part_dir / name, buf_size, WriteMode::Rewrite, settings, /* autocommit = */ false);
}
void DataPartStorageBuilderOnDisk::removeFile(const String & name)
{
transaction->removeFile(fs::path(root_path) / part_dir / name);
}
void DataPartStorageBuilderOnDisk::removeFileIfExists(const String & name)
{
transaction->removeFileIfExists(fs::path(root_path) / part_dir / name);
}
void DataPartStorageBuilderOnDisk::removeRecursive()
{
transaction->removeRecursive(fs::path(root_path) / part_dir);
}
void DataPartStorageBuilderOnDisk::removeSharedRecursive(bool keep_in_remote_fs)
{
transaction->removeSharedRecursive(fs::path(root_path) / part_dir, keep_in_remote_fs, {});
}
SyncGuardPtr DataPartStorageBuilderOnDisk::getDirectorySyncGuard() const
SyncGuardPtr DataPartStorageOnDisk::getDirectorySyncGuard() const
{
return volume->getDisk()->getDirectorySyncGuard(fs::path(root_path) / part_dir);
}
void DataPartStorageBuilderOnDisk::createHardLinkFrom(const IDataPartStorage & source, const std::string & from, const std::string & to) const
template <typename Op>
void DataPartStorageOnDisk::executeOperation(Op && op)
{
if (transaction)
op(*transaction);
else
op(*volume->getDisk());
}
std::unique_ptr<WriteBufferFromFileBase> DataPartStorageOnDisk::writeFile(
const String & name,
size_t buf_size,
const WriteSettings & settings)
{
if (transaction)
return transaction->writeFile(fs::path(root_path) / part_dir / name, buf_size, WriteMode::Rewrite, settings, /* autocommit = */ false);
return volume->getDisk()->writeFile(fs::path(root_path) / part_dir / name, buf_size, WriteMode::Rewrite, settings);
}
std::unique_ptr<WriteBufferFromFileBase> DataPartStorageOnDisk::writeTransactionFile(WriteMode mode) const
{
return volume->getDisk()->writeFile(fs::path(root_path) / part_dir / "txn_version.txt", 256, mode);
}
void DataPartStorageOnDisk::createFile(const String & name)
{
executeOperation([&](auto & disk) { disk.createFile(fs::path(root_path) / part_dir / name); });
}
void DataPartStorageOnDisk::moveFile(const String & from_name, const String & to_name)
{
executeOperation([&](auto & disk)
{
auto relative_path = fs::path(root_path) / part_dir;
disk.moveFile(relative_path / from_name, relative_path / to_name);
});
}
void DataPartStorageOnDisk::replaceFile(const String & from_name, const String & to_name)
{
executeOperation([&](auto & disk)
{
auto relative_path = fs::path(root_path) / part_dir;
disk.replaceFile(relative_path / from_name, relative_path / to_name);
});
}
void DataPartStorageOnDisk::removeFile(const String & name)
{
executeOperation([&](auto & disk) { disk.removeFile(fs::path(root_path) / part_dir / name); });
}
void DataPartStorageOnDisk::removeFileIfExists(const String & name)
{
executeOperation([&](auto & disk) { disk.removeFileIfExists(fs::path(root_path) / part_dir / name); });
}
void DataPartStorageOnDisk::removeRecursive()
{
executeOperation([&](auto & disk) { disk.removeRecursive(fs::path(root_path) / part_dir); });
}
void DataPartStorageOnDisk::removeSharedRecursive(bool keep_in_remote_fs)
{
executeOperation([&](auto & disk) { disk.removeSharedRecursive(fs::path(root_path) / part_dir, keep_in_remote_fs, {}); });
}
void DataPartStorageOnDisk::createHardLinkFrom(const IDataPartStorage & source, const std::string & from, const std::string & to)
{
const auto * source_on_disk = typeid_cast<const DataPartStorageOnDisk *>(&source);
if (!source_on_disk)
@ -973,58 +780,43 @@ void DataPartStorageBuilderOnDisk::createHardLinkFrom(const IDataPartStorage & s
"Cannot create hardlink from different storage. Expected DataPartStorageOnDisk, got {}",
typeid(source).name());
transaction->createHardLink(
fs::path(source_on_disk->getRelativePath()) / from,
fs::path(root_path) / part_dir / to);
executeOperation([&](auto & disk)
{
disk.createHardLink(
fs::path(source_on_disk->getRelativePath()) / from,
fs::path(root_path) / part_dir / to);
});
}
bool DataPartStorageBuilderOnDisk::exists() const
void DataPartStorageOnDisk::createDirectories()
{
return volume->getDisk()->exists(fs::path(root_path) / part_dir);
executeOperation([&](auto & disk) { disk.createDirectories(fs::path(root_path) / part_dir); });
}
std::string DataPartStorageBuilderOnDisk::getFullPath() const
void DataPartStorageOnDisk::createProjection(const std::string & name)
{
return fs::path(volume->getDisk()->getPath()) / root_path / part_dir;
executeOperation([&](auto & disk) { disk.createDirectory(fs::path(root_path) / part_dir / name); });
}
std::string DataPartStorageBuilderOnDisk::getRelativePath() const
void DataPartStorageOnDisk::beginTransaction()
{
return fs::path(root_path) / part_dir;
if (transaction)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Uncommitted {}transaction already exists", has_shared_transaction ? "shared " : "");
transaction = volume->getDisk()->createTransaction();
}
void DataPartStorageBuilderOnDisk::createDirectories()
void DataPartStorageOnDisk::commitTransaction()
{
transaction->createDirectories(fs::path(root_path) / part_dir);
}
if (!transaction)
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no uncommitted transaction");
void DataPartStorageBuilderOnDisk::createProjection(const std::string & name)
{
transaction->createDirectory(fs::path(root_path) / part_dir / name);
}
if (has_shared_transaction)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot commit shared transaction");
ReservationPtr DataPartStorageBuilderOnDisk::reserve(UInt64 bytes)
{
auto res = volume->reserve(bytes);
if (!res)
throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Cannot reserve {}, not enough space", ReadableSize(bytes));
return res;
}
DataPartStorageBuilderPtr DataPartStorageBuilderOnDisk::getProjection(const std::string & name) const
{
return std::make_shared<DataPartStorageBuilderOnDisk>(volume, std::string(fs::path(root_path) / part_dir), name);
}
DataPartStoragePtr DataPartStorageBuilderOnDisk::getStorage() const
{
return std::make_shared<DataPartStorageOnDisk>(volume, root_path, part_dir);
}
void DataPartStorageBuilderOnDisk::commit()
{
transaction->commit();
transaction.reset();
}
}

View File

@ -21,6 +21,7 @@ public:
std::string getPartDirectory() const override { return part_dir; }
std::string getFullRootPath() const override;
MutableDataPartStoragePtr getProjection(const std::string & name) override;
DataPartStoragePtr getProjection(const std::string & name) const override;
bool exists() const override;
@ -41,7 +42,6 @@ public:
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
void loadVersionMetadata(VersionMetadata & version, Poco::Logger * log) const override;
void checkConsistency(const MergeTreeDataPartChecksums & checksums) const override;
void remove(
@ -60,7 +60,6 @@ public:
std::optional<Strings> & original_files_list) const;
void setRelativePath(const std::string & path) override;
void onRename(const std::string & new_root_path, const std::string & new_part_dir) override;
std::string getDiskName() const override;
std::string getDiskType() const override;
@ -68,30 +67,14 @@ public:
bool supportZeroCopyReplication() const override;
bool supportParallelWrite() const override;
bool isBroken() const override;
void syncRevision(UInt64 revision) override;
void syncRevision(UInt64 revision) const override;
UInt64 getRevision() const override;
std::unordered_map<String, String> getSerializedMetadata(const std::vector<String> & paths) const override;
std::string getDiskPath() const override;
DisksSet::const_iterator isStoredOnDisk(const DisksSet & disks) const override;
ReservationPtr reserve(UInt64 bytes) const override;
ReservationPtr tryReserve(UInt64 bytes) const override;
size_t getVolumeIndex(const IStoragePolicy &) const override;
void writeChecksums(const MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const override;
void writeColumns(const NamesAndTypesList & columns, const WriteSettings & settings) const override;
void writeVersionMetadata(const VersionMetadata & version, bool fsync_part_dir) const override;
void appendCSNToVersionMetadata(const VersionMetadata & version, VersionMetadata::WhichCSN which_csn) const override;
void appendRemovalTIDToVersionMetadata(const VersionMetadata & version, bool clear) const override;
void writeDeleteOnDestroyMarker(Poco::Logger * log) const override;
void removeDeleteOnDestroyMarker() const override;
void removeVersionMetadata() const override;
String getUniqueId() const override;
bool shallParticipateInMerges(const IStoragePolicy &) const override;
void backup(
const MergeTreeDataPartChecksums & checksums,
const NameSet & files_without_checksums,
@ -100,7 +83,7 @@ public:
bool make_temporary_hard_links,
TemporaryFilesOnDisks * temp_dirs) const override;
DataPartStoragePtr freeze(
MutableDataPartStoragePtr freeze(
const std::string & to,
const std::string & dir_path,
bool make_source_readonly,
@ -108,7 +91,7 @@ public:
bool copy_instead_of_hardlink,
const NameSet & files_to_copy_instead_of_hardlinks) const override;
DataPartStoragePtr clone(
MutableDataPartStoragePtr clonePart(
const std::string & to,
const std::string & dir_path,
const DiskPtr & disk,
@ -116,11 +99,51 @@ public:
void changeRootPath(const std::string & from_root, const std::string & to_root) override;
DataPartStorageBuilderPtr getBuilder() const override;
void createDirectories() override;
void createProjection(const std::string & name) override;
std::unique_ptr<WriteBufferFromFileBase> writeFile(
const String & name,
size_t buf_size,
const WriteSettings & settings) override;
std::unique_ptr<WriteBufferFromFileBase> writeTransactionFile(WriteMode mode) const override;
void createFile(const String & name) override;
void moveFile(const String & from_name, const String & to_name) override;
void replaceFile(const String & from_name, const String & to_name) override;
void removeFile(const String & name) override;
void removeFileIfExists(const String & name) override;
void removeRecursive() override;
void removeSharedRecursive(bool keep_in_remote_fs) override;
SyncGuardPtr getDirectorySyncGuard() const override;
void createHardLinkFrom(const IDataPartStorage & source, const std::string & from, const std::string & to) override;
void rename(
const std::string & new_root_path,
const std::string & new_part_dir,
Poco::Logger * log,
bool remove_new_dir_if_exists,
bool fsync_part_dir) override;
void beginTransaction() override;
void commitTransaction() override;
bool hasActiveTransaction() const override { return transaction != nullptr; }
private:
VolumePtr volume;
std::string root_path;
std::string part_dir;
DiskTransactionPtr transaction;
bool has_shared_transaction = false;
DataPartStorageOnDisk(VolumePtr volume_, std::string root_path_, std::string part_dir_, DiskTransactionPtr transaction_);
template <typename Op>
void executeOperation(Op && op);
void clearDirectory(
const std::string & dir,
@ -134,56 +157,4 @@ private:
bool is_projection) const;
};
class DataPartStorageBuilderOnDisk final : public IDataPartStorageBuilder
{
public:
DataPartStorageBuilderOnDisk(VolumePtr volume_, std::string root_path_, std::string part_dir_);
void setRelativePath(const std::string & path) override;
bool exists() const override;
void createDirectories() override;
void createProjection(const std::string & name) override;
std::string getPartDirectory() const override { return part_dir; }
std::string getFullPath() const override;
std::string getRelativePath() const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile(
const String & name,
size_t buf_size,
const WriteSettings & settings) override;
void removeFile(const String & name) override;
void removeFileIfExists(const String & name) override;
void removeRecursive() override;
void removeSharedRecursive(bool keep_in_remote_fs) override;
SyncGuardPtr getDirectorySyncGuard() const override;
void createHardLinkFrom(const IDataPartStorage & source, const std::string & from, const std::string & to) const override;
ReservationPtr reserve(UInt64 bytes) override;
DataPartStorageBuilderPtr getProjection(const std::string & name) const override;
DataPartStoragePtr getStorage() const override;
void rename(
const std::string & new_root_path,
const std::string & new_part_dir,
Poco::Logger * log,
bool remove_new_dir_if_exists,
bool fsync_part_dir) override;
void commit() override;
private:
VolumePtr volume;
std::string root_path;
std::string part_dir;
DiskTransactionPtr transaction;
};
}

View File

@ -13,9 +13,9 @@
#include <Storages/MergeTree/MergedBlockOutputStream.h>
#include <Storages/MergeTree/ReplicatedFetchList.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/MergeTree/DataPartStorageOnDisk.h>
#include <Common/CurrentMetrics.h>
#include <Common/NetException.h>
#include <Storages/MergeTree/DataPartStorageOnDisk.h>
#include <Disks/IO/createReadBufferFromFileBase.h>
#include <base/scope_guard.h>
#include <Poco/Net/HTTPRequest.h>
@ -147,12 +147,13 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write
CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedSend};
if (part->data_part_storage->isStoredOnRemoteDisk())
if (part->getDataPartStorage().isStoredOnRemoteDisk())
{
UInt64 revision = parse<UInt64>(params.get("disk_revision", "0"));
if (revision)
part->data_part_storage->syncRevision(revision);
revision = part->data_part_storage->getRevision();
part->getDataPartStorage().syncRevision(revision);
revision = part->getDataPartStorage().getRevision();
if (revision)
response.addCookie({"disk_revision", toString(revision)});
}
@ -184,8 +185,8 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write
!isInMemoryPart(part) &&
client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY)
{
auto disk_type = part->data_part_storage->getDiskType();
if (part->data_part_storage->supportZeroCopyReplication() && std::find(capability.begin(), capability.end(), disk_type) != capability.end())
auto disk_type = part->getDataPartStorage().getDiskType();
if (part->getDataPartStorage().supportZeroCopyReplication() && std::find(capability.begin(), capability.end(), disk_type) != capability.end())
{
/// Send metadata if the receiver's capability covers the source disk type.
response.addCookie({"remote_fs_metadata", disk_type});
@ -307,12 +308,12 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDisk(
{
String file_name = it.first;
UInt64 size = part->data_part_storage->getFileSize(file_name);
UInt64 size = part->getDataPartStorage().getFileSize(file_name);
writeStringBinary(it.first, out);
writeBinary(size, out);
auto file_in = part->data_part_storage->readFile(file_name, {}, std::nullopt, std::nullopt);
auto file_in = part->getDataPartStorage().readFile(file_name, {}, std::nullopt, std::nullopt);
HashingWriteBuffer hashing_out(out);
copyDataWithThrottler(*file_in, hashing_out, blocker.getCounter(), data.getSendsThrottler());
@ -323,7 +324,7 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDisk(
throw Exception(
ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART,
"Unexpected size of file {}, expected {} got {}",
std::string(fs::path(part->data_part_storage->getRelativePath()) / file_name),
std::string(fs::path(part->getDataPartStorage().getRelativePath()) / file_name),
hashing_out.count(), size);
writePODBinary(hashing_out.getHash(), out);
@ -342,9 +343,9 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDiskRemoteMeta(
bool send_part_id,
const std::map<String, std::shared_ptr<IMergeTreeDataPart>> & projections)
{
const auto * data_part_storage_on_disk = dynamic_cast<const DataPartStorageOnDisk *>(part->data_part_storage.get());
const auto * data_part_storage_on_disk = dynamic_cast<const DataPartStorageOnDisk *>(&part->getDataPartStorage());
if (!data_part_storage_on_disk)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Storage '{}' doesn't support zero-copy replication", part->data_part_storage->getDiskName());
throw Exception(ErrorCodes::LOGICAL_ERROR, "Storage '{}' doesn't support zero-copy replication", part->getDataPartStorage().getDiskName());
if (!data_part_storage_on_disk->supportZeroCopyReplication())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Disk '{}' doesn't support zero-copy replication", data_part_storage_on_disk->getDiskName());
@ -365,7 +366,7 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDiskRemoteMeta(
std::vector<std::string> paths;
paths.reserve(checksums.files.size());
for (const auto & it : checksums.files)
paths.push_back(fs::path(part->data_part_storage->getRelativePath()) / it.first);
paths.push_back(fs::path(part->getDataPartStorage().getRelativePath()) / it.first);
/// Serialized metadatadatas with zero ref counts.
auto metadatas = data_part_storage_on_disk->getSerializedMetadata(paths);
@ -399,7 +400,7 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDiskRemoteMeta(
for (const auto & it : checksums.files)
{
const String & file_name = it.first;
String file_path_prefix = fs::path(part->data_part_storage->getRelativePath()) / file_name;
String file_path_prefix = fs::path(part->getDataPartStorage().getRelativePath()) / file_name;
/// Just some additional checks
String metadata_file_path = fs::path(data_part_storage_on_disk->getDiskPath()) / file_path_prefix;
@ -728,13 +729,9 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory(
data.getRelativeDataPath(),
part_name);
auto data_part_storage_builder = std::make_shared<DataPartStorageBuilderOnDisk>(
volume,
data.getRelativeDataPath(),
part_name);
MergeTreeData::MutableDataPartPtr new_data_part =
std::make_shared<MergeTreeDataPartInMemory>(data, part_name, data_part_storage);
new_data_part->version.setCreationTID(Tx::PrehistoricTID, nullptr);
for (auto i = 0ul; i < projections; ++i)
@ -750,7 +747,6 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory(
throttler->add(block.bytes());
auto projection_part_storage = data_part_storage->getProjection(projection_name + ".proj");
auto projection_part_storage_builder = data_part_storage_builder->getProjection(projection_name + ".proj");
MergeTreePartInfo new_part_info("all", 0, 0, 0);
MergeTreeData::MutableDataPartPtr new_projection_part =
@ -764,7 +760,6 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory(
MergedBlockOutputStream part_out(
new_projection_part,
projection_part_storage_builder,
metadata_snapshot->projections.get(projection_name).metadata,
block.getNamesAndTypesList(),
{},
@ -792,7 +787,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory(
new_data_part->partition.create(metadata_snapshot, block, 0, context);
MergedBlockOutputStream part_out(
new_data_part, data_part_storage_builder, metadata_snapshot, block.getNamesAndTypesList(), {},
new_data_part, metadata_snapshot, block.getNamesAndTypesList(), {},
CompressionCodecFactory::instance().get("NONE", {}), NO_TRANSACTION_PTR);
part_out.write(block);
@ -804,7 +799,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory(
void Fetcher::downloadBasePartOrProjectionPartToDiskRemoteMeta(
const String & replica_path,
DataPartStorageBuilderPtr & data_part_storage_builder,
const MutableDataPartStoragePtr & data_part_storage,
PooledReadWriteBufferFromHTTP & in,
MergeTreeData::DataPart::Checksums & checksums,
ThrottlerPtr throttler) const
@ -820,7 +815,7 @@ void Fetcher::downloadBasePartOrProjectionPartToDiskRemoteMeta(
readStringBinary(file_name, in);
readBinary(file_size, in);
String metadata_file = fs::path(data_part_storage_builder->getFullPath()) / file_name;
String metadata_file = fs::path(data_part_storage->getFullPath()) / file_name;
{
auto file_out = std::make_unique<WriteBufferFromFile>(metadata_file, DBMS_DEFAULT_BUFFER_SIZE, -1, 0666, nullptr, 0);
@ -834,8 +829,8 @@ void Fetcher::downloadBasePartOrProjectionPartToDiskRemoteMeta(
/// NOTE The is_cancelled flag also makes sense to check every time you read over the network,
/// performing a poll with a not very large timeout.
/// And now we check it only between read chunks (in the `copyData` function).
data_part_storage_builder->removeSharedRecursive(true);
data_part_storage_builder->commit();
data_part_storage->removeSharedRecursive(true);
data_part_storage->commitTransaction();
throw Exception("Fetching of part was cancelled", ErrorCodes::ABORTED);
}
@ -861,7 +856,7 @@ void Fetcher::downloadBasePartOrProjectionPartToDiskRemoteMeta(
void Fetcher::downloadBaseOrProjectionPartToDisk(
const String & replica_path,
DataPartStorageBuilderPtr & data_part_storage_builder,
const MutableDataPartStoragePtr & data_part_storage,
bool sync,
PooledReadWriteBufferFromHTTP & in,
MergeTreeData::DataPart::Checksums & checksums,
@ -880,14 +875,14 @@ void Fetcher::downloadBaseOrProjectionPartToDisk(
/// File must be inside "absolute_part_path" directory.
/// Otherwise malicious ClickHouse replica may force us to write to arbitrary path.
String absolute_file_path = fs::weakly_canonical(fs::path(data_part_storage_builder->getRelativePath()) / file_name);
if (!startsWith(absolute_file_path, fs::weakly_canonical(data_part_storage_builder->getRelativePath()).string()))
String absolute_file_path = fs::weakly_canonical(fs::path(data_part_storage->getRelativePath()) / file_name);
if (!startsWith(absolute_file_path, fs::weakly_canonical(data_part_storage->getRelativePath()).string()))
throw Exception(ErrorCodes::INSECURE_PATH,
"File path ({}) doesn't appear to be inside part path ({}). "
"This may happen if we are trying to download part from malicious replica or logical error.",
absolute_file_path, data_part_storage_builder->getRelativePath());
absolute_file_path, data_part_storage->getRelativePath());
auto file_out = data_part_storage_builder->writeFile(file_name, std::min<UInt64>(file_size, DBMS_DEFAULT_BUFFER_SIZE), {});
auto file_out = data_part_storage->writeFile(file_name, std::min<UInt64>(file_size, DBMS_DEFAULT_BUFFER_SIZE), {});
HashingWriteBuffer hashing_out(*file_out);
copyDataWithThrottler(in, hashing_out, file_size, blocker.getCounter(), throttler);
@ -896,7 +891,7 @@ void Fetcher::downloadBaseOrProjectionPartToDisk(
/// NOTE The is_cancelled flag also makes sense to check every time you read over the network,
/// performing a poll with a not very large timeout.
/// And now we check it only between read chunks (in the `copyData` function).
data_part_storage_builder->removeRecursive();
data_part_storage->removeRecursive();
throw Exception("Fetching of part was cancelled", ErrorCodes::ABORTED);
}
@ -906,7 +901,7 @@ void Fetcher::downloadBaseOrProjectionPartToDisk(
if (expected_hash != hashing_out.getHash())
throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH,
"Checksum mismatch for file {} transferred from {}",
(fs::path(data_part_storage_builder->getFullPath()) / file_name).string(),
(fs::path(data_part_storage->getFullPath()) / file_name).string(),
replica_path);
if (file_name != "checksums.txt" &&
@ -951,15 +946,12 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk(
part_relative_path,
part_dir);
DataPartStorageBuilderPtr data_part_storage_builder = std::make_shared<DataPartStorageBuilderOnDisk>(
volume,
part_relative_path,
part_dir);
data_part_storage->beginTransaction();
if (data_part_storage_builder->exists())
if (data_part_storage->exists())
{
LOG_WARNING(log, "Directory {} already exists, probably result of a failed fetch. Will remove it before fetching part.",
data_part_storage_builder->getFullPath());
data_part_storage->getFullPath());
/// Even if it's a temporary part it could be downloaded with zero copy replication and this function
/// is executed as a callback.
@ -967,10 +959,10 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk(
/// We don't control the amount of refs for temporary parts so we cannot decide can we remove blobs
/// or not. So we are not doing it
bool keep_shared = disk->supportZeroCopyReplication() && data_settings->allow_remote_fs_zero_copy_replication;
data_part_storage_builder->removeSharedRecursive(keep_shared);
data_part_storage->removeSharedRecursive(keep_shared);
}
data_part_storage_builder->createDirectories();
data_part_storage->createDirectories();
SyncGuardPtr sync_guard;
if (data.getSettings()->fsync_part_directory)
@ -985,19 +977,18 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk(
MergeTreeData::DataPart::Checksums projection_checksum;
auto projection_part_storage = data_part_storage->getProjection(projection_name + ".proj");
auto projection_part_storage_builder = data_part_storage_builder->getProjection(projection_name + ".proj");
projection_part_storage_builder->createDirectories();
projection_part_storage->createDirectories();
downloadBaseOrProjectionPartToDisk(
replica_path, projection_part_storage_builder, sync, in, projection_checksum, throttler);
replica_path, projection_part_storage, sync, in, projection_checksum, throttler);
checksums.addFile(
projection_name + ".proj", projection_checksum.getTotalSizeOnDisk(), projection_checksum.getTotalChecksumUInt128());
}
// Download the base part
downloadBaseOrProjectionPartToDisk(replica_path, data_part_storage_builder, sync, in, checksums, throttler);
downloadBaseOrProjectionPartToDisk(replica_path, data_part_storage, sync, in, checksums, throttler);
assertEOF(in);
data_part_storage->commitTransaction();
MergeTreeData::MutableDataPartPtr new_data_part = data.createPart(part_name, data_part_storage);
new_data_part->version.setCreationTID(Tx::PrehistoricTID, nullptr);
new_data_part->is_temp = true;
@ -1043,17 +1034,14 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDiskRemoteMeta(
part_relative_path,
part_dir);
DataPartStorageBuilderPtr data_part_storage_builder = std::make_shared<DataPartStorageBuilderOnDisk>(
volume,
part_relative_path,
part_dir);
data_part_storage->beginTransaction();
if (data_part_storage->exists())
throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Directory {} already exists.", data_part_storage->getFullPath());
CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedFetch};
volume->getDisk()->createDirectories(data_part_storage->getFullPath());
data_part_storage->createDirectories();
for (auto i = 0ul; i < projections; ++i)
{
@ -1062,24 +1050,22 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDiskRemoteMeta(
MergeTreeData::DataPart::Checksums projection_checksum;
auto projection_part_storage = data_part_storage->getProjection(projection_name + ".proj");
auto projection_part_storage_builder = data_part_storage_builder->getProjection(projection_name + ".proj");
projection_part_storage_builder->createDirectories();
projection_part_storage->createDirectories();
downloadBasePartOrProjectionPartToDiskRemoteMeta(
replica_path, projection_part_storage_builder, in, projection_checksum, throttler);
replica_path, projection_part_storage, in, projection_checksum, throttler);
checksums.addFile(
projection_name + ".proj", projection_checksum.getTotalSizeOnDisk(), projection_checksum.getTotalChecksumUInt128());
}
downloadBasePartOrProjectionPartToDiskRemoteMeta(
replica_path, data_part_storage_builder, in, checksums, throttler);
replica_path, data_part_storage, in, checksums, throttler);
assertEOF(in);
MergeTreeData::MutableDataPartPtr new_data_part;
try
{
data_part_storage_builder->commit();
data_part_storage->commitTransaction();
new_data_part = data.createPart(part_name, data_part_storage);
new_data_part->version.setCreationTID(Tx::PrehistoricTID, nullptr);

View File

@ -94,7 +94,7 @@ public:
private:
void downloadBaseOrProjectionPartToDisk(
const String & replica_path,
DataPartStorageBuilderPtr & data_part_storage_builder,
const MutableDataPartStoragePtr & data_part_storage,
bool sync,
PooledReadWriteBufferFromHTTP & in,
MergeTreeData::DataPart::Checksums & checksums,
@ -102,12 +102,11 @@ private:
void downloadBasePartOrProjectionPartToDiskRemoteMeta(
const String & replica_path,
DataPartStorageBuilderPtr & data_part_storage_builder,
const MutableDataPartStoragePtr & data_part_storage,
PooledReadWriteBufferFromHTTP & in,
MergeTreeData::DataPart::Checksums & checksums,
ThrottlerPtr throttler) const;
MergeTreeData::MutableDataPartPtr downloadPartToDisk(
const String & part_name,
const String & replica_path,

View File

@ -4,6 +4,9 @@
#include <Core/NamesAndTypes.h>
#include <Interpreters/TransactionVersionMetadata.h>
#include <Storages/MergeTree/MergeTreeDataPartState.h>
#include <Disks/WriteMode.h>
#include <boost/core/noncopyable.hpp>
#include <memory>
#include <optional>
namespace DB
@ -18,6 +21,7 @@ struct CanRemoveDescription
NameSet files_not_to_remove;
};
using CanRemoveCallback = std::function<CanRemoveDescription()>;
class IDataPartStorageIterator
@ -61,13 +65,10 @@ struct WriteSettings;
class TemporaryFileOnDisk;
class IDataPartStorageBuilder;
using DataPartStorageBuilderPtr = std::shared_ptr<IDataPartStorageBuilder>;
/// This is an abstraction of storage for data part files.
/// Ideally, it is assumed to contains read-only methods from IDisk.
/// It is not fulfilled now, but let's try our best.
class IDataPartStorage
class IDataPartStorage : public boost::noncopyable
{
public:
virtual ~IDataPartStorage() = default;
@ -81,16 +82,19 @@ public:
/// virtual std::string getRelativeRootPath() const = 0;
/// Get a storage for projection.
virtual std::shared_ptr<IDataPartStorage> getProjection(const std::string & name) const = 0;
virtual std::shared_ptr<IDataPartStorage> getProjection(const std::string & name) = 0;
virtual std::shared_ptr<const IDataPartStorage> getProjection(const std::string & name) const = 0;
/// Part directory exists.
virtual bool exists() const = 0;
/// File inside part directory exists. Specified path is relative to the part path.
virtual bool exists(const std::string & name) const = 0;
virtual bool isDirectory(const std::string & name) const = 0;
/// Modification time for part directory.
virtual Poco::Timestamp getLastModified() const = 0;
/// Iterate part directory. Iteration in subdirectory is not needed yet.
virtual DataPartStorageIteratorPtr iterate() const = 0;
@ -107,7 +111,6 @@ public:
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const = 0;
virtual void loadVersionMetadata(VersionMetadata & version, Poco::Logger * log) const = 0;
virtual void checkConsistency(const MergeTreeDataPartChecksums & checksums) const = 0;
struct ProjectionChecksums
@ -129,12 +132,12 @@ public:
/// Get a name like 'prefix_partdir_tryN' which does not exist in a root dir.
/// TODO: remove it.
virtual std::optional<String> getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached, bool broken) const = 0;
virtual std::optional<String> getRelativePathForPrefix(
Poco::Logger * log, const String & prefix, bool detached, bool broken) const = 0;
/// Reset part directory, used for im-memory parts.
/// Reset part directory, used for in-memory parts.
/// TODO: remove it.
virtual void setRelativePath(const std::string & path) = 0;
virtual void onRename(const std::string & new_root_path, const std::string & new_part_dir) = 0;
/// Some methods from IDisk. Needed to avoid getting internal IDisk interface.
virtual std::string getDiskName() const = 0;
@ -143,41 +146,26 @@ public:
virtual bool supportZeroCopyReplication() const { return false; }
virtual bool supportParallelWrite() const = 0;
virtual bool isBroken() const = 0;
virtual void syncRevision(UInt64 revision) = 0;
/// TODO: remove or at least remove const.
virtual void syncRevision(UInt64 revision) const = 0;
virtual UInt64 getRevision() const = 0;
virtual std::unordered_map<String, String> getSerializedMetadata(const std::vector<String> & paths) const = 0;
/// Get a path for internal disk if relevant. It is used mainly for logging.
virtual std::string getDiskPath() const = 0;
/// Check if data part is stored on one of the specified disk in set.
using DisksSet = std::unordered_set<DiskPtr>;
virtual DisksSet::const_iterator isStoredOnDisk(const DisksSet & disks) const { return disks.end(); }
/// Reserve space on the same disk.
/// Probably we should try to remove it later.
virtual ReservationPtr reserve(UInt64 /*bytes*/) const { return nullptr; }
virtual ReservationPtr tryReserve(UInt64 /*bytes*/) const { return nullptr; }
virtual size_t getVolumeIndex(const IStoragePolicy &) const { return 0; }
/// Some methods which change data part internals possibly after creation.
/// Probably we should try to remove it later.
virtual void writeChecksums(const MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const = 0;
virtual void writeColumns(const NamesAndTypesList & columns, const WriteSettings & settings) const = 0;
virtual void writeVersionMetadata(const VersionMetadata & version, bool fsync_part_dir) const = 0;
virtual void appendCSNToVersionMetadata(const VersionMetadata & version, VersionMetadata::WhichCSN which_csn) const = 0;
virtual void appendRemovalTIDToVersionMetadata(const VersionMetadata & version, bool clear) const = 0;
virtual void writeDeleteOnDestroyMarker(Poco::Logger * log) const = 0;
virtual void removeDeleteOnDestroyMarker() const = 0;
virtual void removeVersionMetadata() const = 0;
/// TODO: remove constness
virtual ReservationPtr reserve(UInt64 /*bytes*/) const { return nullptr; }
virtual ReservationPtr tryReserve(UInt64 /*bytes*/) const { return nullptr; }
/// A leak of abstraction.
/// Return some uniq string for file.
/// Required for distinguish different copies of the same part on remote FS.
virtual String getUniqueId() const = 0;
/// A leak of abstraction
virtual bool shallParticipateInMerges(const IStoragePolicy &) const { return true; }
/// Create a backup of a data part.
/// This method adds a new entry to backup_entries.
/// Also creates a new tmp_dir for internal disk (if disk is mentioned the first time).
@ -205,7 +193,7 @@ public:
const NameSet & files_to_copy_instead_of_hardlinks) const = 0;
/// Make a full copy of a data part into 'to/dir_path' (possibly to a different disk).
virtual std::shared_ptr<IDataPartStorage> clone(
virtual std::shared_ptr<IDataPartStorage> clonePart(
const std::string & to,
const std::string & dir_path,
const DiskPtr & disk,
@ -215,33 +203,22 @@ public:
/// Right now, this is needed for rename table query.
virtual void changeRootPath(const std::string & from_root, const std::string & to_root) = 0;
/// Leak of abstraction as well. We should use builder as one-time object which allow
/// us to build parts, while storage should be read-only method to access part properties
/// related to disk. However our code is really tricky and sometimes we need ad-hoc builders.
virtual DataPartStorageBuilderPtr getBuilder() const = 0;
};
using DataPartStoragePtr = std::shared_ptr<IDataPartStorage>;
/// This interface is needed to write data part.
class IDataPartStorageBuilder
{
public:
virtual ~IDataPartStorageBuilder() = default;
/// Reset part directory, used for im-memory parts
virtual void setRelativePath(const std::string & path) = 0;
virtual std::string getPartDirectory() const = 0;
virtual std::string getFullPath() const = 0;
virtual std::string getRelativePath() const = 0;
virtual bool exists() const = 0;
virtual void createDirectories() = 0;
virtual void createProjection(const std::string & name) = 0;
virtual std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & name, size_t buf_size, const WriteSettings & settings) = 0;
virtual std::unique_ptr<WriteBufferFromFileBase> writeFile(
const String & name,
size_t buf_size,
const WriteSettings & settings) = 0;
/// A special const method to write transaction file.
/// It's const, because file with transaction metadata
/// can be modified after part creation.
virtual std::unique_ptr<WriteBufferFromFileBase> writeTransactionFile(WriteMode mode) const = 0;
virtual void createFile(const String & name) = 0;
virtual void moveFile(const String & from_name, const String & to_name) = 0;
virtual void replaceFile(const String & from_name, const String & to_name) = 0;
virtual void removeFile(const String & name) = 0;
virtual void removeFileIfExists(const String & name) = 0;
@ -250,20 +227,12 @@ public:
virtual SyncGuardPtr getDirectorySyncGuard() const { return nullptr; }
virtual void createHardLinkFrom(const IDataPartStorage & source, const std::string & from, const std::string & to) const = 0;
virtual ReservationPtr reserve(UInt64 /*bytes*/) { return nullptr; }
virtual std::shared_ptr<IDataPartStorageBuilder> getProjection(const std::string & name) const = 0;
virtual DataPartStoragePtr getStorage() const = 0;
virtual void createHardLinkFrom(const IDataPartStorage & source, const std::string & from, const std::string & to) = 0;
/// Rename part.
/// Ideally, new_root_path should be the same as current root (but it is not true).
/// Examples are: 'all_1_2_1' -> 'detached/all_1_2_1'
/// 'moving/tmp_all_1_2_1' -> 'all_1_2_1'
///
/// To notify storage also call onRename for it with first two args
virtual void rename(
const std::string & new_root_path,
const std::string & new_part_dir,
@ -271,7 +240,35 @@ public:
bool remove_new_dir_if_exists,
bool fsync_part_dir) = 0;
virtual void commit() = 0;
/// Starts a transaction of mutable operations.
virtual void beginTransaction() = 0;
/// Commits a transaction of mutable operations.
virtual void commitTransaction() = 0;
virtual bool hasActiveTransaction() const = 0;
};
using DataPartStoragePtr = std::shared_ptr<const IDataPartStorage>;
using MutableDataPartStoragePtr = std::shared_ptr<IDataPartStorage>;
/// A holder that encapsulates data part storage and
/// gives access to const storage from const methods
/// and to mutable storage from non-const methods.
class DataPartStorageHolder : public boost::noncopyable
{
public:
explicit DataPartStorageHolder(MutableDataPartStoragePtr storage_)
: storage(std::move(storage_))
{
}
IDataPartStorage & getDataPartStorage() { return *storage; }
const IDataPartStorage & getDataPartStorage() const { return *storage; }
MutableDataPartStoragePtr getDataPartStoragePtr() { return storage; }
DataPartStoragePtr getDataPartStoragePtr() const { return storage; }
private:
MutableDataPartStoragePtr storage;
};
}

View File

@ -1,4 +1,5 @@
#include "IMergeTreeDataPart.h"
#include "Storages/MergeTree/IDataPartStorage.h"
#include <optional>
#include <boost/algorithm/string/join.hpp>
@ -101,7 +102,7 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Par
}
IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::store(
const MergeTreeData & data, const DataPartStorageBuilderPtr & data_part_storage_builder, Checksums & out_checksums) const
const MergeTreeData & data, IDataPartStorage & part_storage, Checksums & out_checksums) const
{
auto metadata_snapshot = data.getInMemoryMetadataPtr();
const auto & partition_key = metadata_snapshot->getPartitionKey();
@ -109,20 +110,20 @@ IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::s
auto minmax_column_names = data.getMinMaxColumnsNames(partition_key);
auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key);
return store(minmax_column_names, minmax_column_types, data_part_storage_builder, out_checksums);
return store(minmax_column_names, minmax_column_types, part_storage, out_checksums);
}
IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::store(
const Names & column_names,
const DataTypes & data_types,
const DataPartStorageBuilderPtr & data_part_storage_builder,
IDataPartStorage & part_storage,
Checksums & out_checksums) const
{
if (!initialized)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Attempt to store uninitialized MinMax index for part {}. This is a bug",
data_part_storage_builder->getFullPath());
part_storage.getFullPath());
WrittenFiles written_files;
@ -131,7 +132,7 @@ IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::s
String file_name = "minmax_" + escapeForFileName(column_names[i]) + ".idx";
auto serialization = data_types.at(i)->getDefaultSerialization();
auto out = data_part_storage_builder->writeFile(file_name, DBMS_DEFAULT_BUFFER_SIZE, {});
auto out = part_storage.writeFile(file_name, DBMS_DEFAULT_BUFFER_SIZE, {});
HashingWriteBuffer out_hashing(*out);
serialization->serializeBinary(hyperrectangle[i].left, out_hashing);
serialization->serializeBinary(hyperrectangle[i].right, out_hashing);
@ -301,13 +302,13 @@ static void decrementTypeMetric(MergeTreeDataPartType type)
IMergeTreeDataPart::IMergeTreeDataPart(
const MergeTreeData & storage_,
const String & name_,
const DataPartStoragePtr & data_part_storage_,
const MutableDataPartStoragePtr & data_part_storage_,
Type part_type_,
const IMergeTreeDataPart * parent_part_)
: storage(storage_)
: DataPartStorageHolder(data_part_storage_)
, storage(storage_)
, name(name_)
, info(MergeTreePartInfo::fromPartName(name_, storage.format_version))
, data_part_storage(parent_part_ ? parent_part_->data_part_storage : data_part_storage_)
, index_granularity_info(storage_, part_type_)
, part_type(part_type_)
, parent_part(parent_part_)
@ -315,6 +316,7 @@ IMergeTreeDataPart::IMergeTreeDataPart(
{
if (parent_part)
state = MergeTreeDataPartState::Active;
incrementStateMetric(state);
incrementTypeMetric(part_type);
@ -328,13 +330,13 @@ IMergeTreeDataPart::IMergeTreeDataPart(
const MergeTreeData & storage_,
const String & name_,
const MergeTreePartInfo & info_,
const DataPartStoragePtr & data_part_storage_,
const MutableDataPartStoragePtr & data_part_storage_,
Type part_type_,
const IMergeTreeDataPart * parent_part_)
: storage(storage_)
: DataPartStorageHolder(data_part_storage_)
, storage(storage_)
, name(name_)
, info(info_)
, data_part_storage(data_part_storage_)
, index_granularity_info(storage_, part_type_)
, part_type(part_type_)
, parent_part(parent_part_)
@ -342,6 +344,7 @@ IMergeTreeDataPart::IMergeTreeDataPart(
{
if (parent_part)
state = MergeTreeDataPartState::Active;
incrementStateMetric(state);
incrementTypeMetric(part_type);
@ -505,17 +508,17 @@ void IMergeTreeDataPart::removeIfNeeded()
std::string path;
try
{
path = data_part_storage->getRelativePath();
path = getDataPartStorage().getRelativePath();
if (!data_part_storage->exists()) // path
if (!getDataPartStorage().exists()) // path
return;
if (is_temp)
{
String file_name = fileName(data_part_storage->getPartDirectory());
String file_name = fileName(getDataPartStorage().getPartDirectory());
if (file_name.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "relative_path {} of part {} is invalid or not set", data_part_storage->getPartDirectory(), name);
throw Exception(ErrorCodes::LOGICAL_ERROR, "relative_path {} of part {} is invalid or not set", getDataPartStorage().getPartDirectory(), name);
if (!startsWith(file_name, "tmp") && !endsWith(file_name, ".tmp_proj"))
{
@ -620,7 +623,7 @@ String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(bool with_subc
}
if (!minimum_size_column)
throw Exception("Could not find a column of minimum size in MergeTree, part " + data_part_storage->getFullPath(), ErrorCodes::LOGICAL_ERROR);
throw Exception("Could not find a column of minimum size in MergeTree, part " + getDataPartStorage().getFullPath(), ErrorCodes::LOGICAL_ERROR);
return *minimum_size_column;
}
@ -698,9 +701,9 @@ void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool ch
for (const auto & projection : metadata_snapshot->projections)
{
String path = /*getRelativePath() + */ projection.name + ".proj";
if (data_part_storage->exists(path))
if (getDataPartStorage().exists(path))
{
auto projection_part_storage = data_part_storage->getProjection(projection.name + ".proj");
auto projection_part_storage = getDataPartStorage().getProjection(projection.name + ".proj");
auto part = storage.createPart(projection.name, {"all", 0, 0, 0}, projection_part_storage, this);
part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency);
projection_parts.emplace(projection.name, std::move(part));
@ -741,8 +744,8 @@ void IMergeTreeDataPart::loadIndex()
loaded_index[i]->reserve(index_granularity.getMarksCount());
}
String index_name = "primary" + getIndexExtensionFromFilesystem(data_part_storage).value();
String index_path = fs::path(data_part_storage->getRelativePath()) / index_name;
String index_name = "primary" + getIndexExtensionFromFilesystem(getDataPartStorage()).value();
String index_path = fs::path(getDataPartStorage().getRelativePath()) / index_name;
auto index_file = metadata_manager->read(index_name);
size_t marks_count = index_granularity.getMarksCount();
@ -781,7 +784,7 @@ void IMergeTreeDataPart::appendFilesOfIndex(Strings & files) const
if (metadata_snapshot->hasPrimaryKey())
{
String index_name = "primary" + getIndexExtensionFromFilesystem(data_part_storage).value();
String index_name = "primary" + getIndexExtensionFromFilesystem(getDataPartStorage()).value();
files.push_back(index_name);
}
}
@ -793,10 +796,10 @@ NameSet IMergeTreeDataPart::getFileNamesWithoutChecksums() const
NameSet result = {"checksums.txt", "columns.txt"};
if (data_part_storage->exists(DEFAULT_COMPRESSION_CODEC_FILE_NAME))
if (getDataPartStorage().exists(DEFAULT_COMPRESSION_CODEC_FILE_NAME))
result.emplace(DEFAULT_COMPRESSION_CODEC_FILE_NAME);
if (data_part_storage->exists(TXN_VERSION_METADATA_FILE_NAME))
if (getDataPartStorage().exists(TXN_VERSION_METADATA_FILE_NAME))
result.emplace(TXN_VERSION_METADATA_FILE_NAME);
return result;
@ -811,7 +814,7 @@ void IMergeTreeDataPart::loadDefaultCompressionCodec()
return;
}
String path = fs::path(data_part_storage->getRelativePath()) / DEFAULT_COMPRESSION_CODEC_FILE_NAME;
String path = fs::path(getDataPartStorage().getRelativePath()) / DEFAULT_COMPRESSION_CODEC_FILE_NAME;
bool exists = metadata_manager->exists(DEFAULT_COMPRESSION_CODEC_FILE_NAME);
if (!exists)
{
@ -851,6 +854,120 @@ void IMergeTreeDataPart::loadDefaultCompressionCodec()
}
}
template <typename Writer>
void IMergeTreeDataPart::writeMetadata(const String & filename, const WriteSettings & settings, Writer && writer)
{
auto & data_part_storage = getDataPartStorage();
auto tmp_filename = filename + ".tmp";
try
{
{
auto out = data_part_storage.writeFile(tmp_filename, 4096, settings);
writer(*out);
out->finalize();
}
data_part_storage.moveFile(tmp_filename, filename);
}
catch (...)
{
try
{
if (data_part_storage.exists(tmp_filename))
data_part_storage.removeFile(tmp_filename);
}
catch (...)
{
tryLogCurrentException("DataPartStorageOnDisk");
}
throw;
}
}
void IMergeTreeDataPart::writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings)
{
writeMetadata("checksums.txt", settings, [&checksums_](auto & buffer)
{
checksums_.write(buffer);
});
}
void IMergeTreeDataPart::writeColumns(const NamesAndTypesList & columns_, const WriteSettings & settings)
{
writeMetadata("columns.txt", settings, [&columns_](auto & buffer)
{
columns_.writeText(buffer);
});
}
void IMergeTreeDataPart::writeVersionMetadata(const VersionMetadata & version_, bool fsync_part_dir) const
{
static constexpr auto filename = "txn_version.txt";
static constexpr auto tmp_filename = "txn_version.txt.tmp";
auto & data_part_storage = const_cast<IDataPartStorage &>(getDataPartStorage());
try
{
{
/// TODO IDisk interface does not allow to open file with O_EXCL flag (for DiskLocal),
/// so we create empty file at first (expecting that createFile throws if file already exists)
/// and then overwrite it.
data_part_storage.createFile(tmp_filename);
auto write_settings = storage.getContext()->getWriteSettings();
auto buf = data_part_storage.writeFile(tmp_filename, 256, write_settings);
version_.write(*buf);
buf->finalize();
buf->sync();
}
SyncGuardPtr sync_guard;
if (fsync_part_dir)
sync_guard = data_part_storage.getDirectorySyncGuard();
data_part_storage.replaceFile(tmp_filename, filename);
}
catch (...)
{
try
{
if (data_part_storage.exists(tmp_filename))
data_part_storage.removeFile(tmp_filename);
}
catch (...)
{
tryLogCurrentException("DataPartStorageOnDisk");
}
throw;
}
}
void IMergeTreeDataPart::writeDeleteOnDestroyMarker()
{
static constexpr auto marker_path = "delete-on-destroy.txt";
try
{
getDataPartStorage().createFile(marker_path);
}
catch (Poco::Exception & e)
{
LOG_ERROR(storage.log, "{} (while creating DeleteOnDestroy marker: {})",
e.what(), (fs::path(getDataPartStorage().getFullPath()) / marker_path).string());
}
}
void IMergeTreeDataPart::removeDeleteOnDestroyMarker()
{
getDataPartStorage().removeFileIfExists("delete-on-destroy.txt");
}
void IMergeTreeDataPart::removeVersionMetadata()
{
getDataPartStorage().removeFileIfExists("txn_version.txt");
}
void IMergeTreeDataPart::appendFilesOfDefaultCompressionCodec(Strings & files)
{
files.push_back(DEFAULT_COMPRESSION_CODEC_FILE_NAME);
@ -880,7 +997,7 @@ CompressionCodecPtr IMergeTreeDataPart::detectDefaultCompressionCodec() const
String candidate_path = /*fs::path(getRelativePath()) */ (ISerialization::getFileNameForStream(part_column, substream_path) + ".bin");
/// We can have existing, but empty .bin files. Example: LowCardinality(Nullable(...)) columns and column_name.dict.null.bin file.
if (data_part_storage->exists(candidate_path) && data_part_storage->getFileSize(candidate_path) != 0)
if (getDataPartStorage().exists(candidate_path) && getDataPartStorage().getFileSize(candidate_path) != 0)
path_to_data_file = candidate_path;
}
});
@ -891,7 +1008,7 @@ CompressionCodecPtr IMergeTreeDataPart::detectDefaultCompressionCodec() const
continue;
}
result = getCompressionCodecForFile(data_part_storage, path_to_data_file);
result = getCompressionCodecForFile(getDataPartStorage(), path_to_data_file);
break;
}
}
@ -936,7 +1053,7 @@ void IMergeTreeDataPart::loadPartitionAndMinMaxIndex()
String calculated_partition_id = partition.getID(metadata_snapshot->getPartitionKey().sample_block);
if (calculated_partition_id != info.partition_id)
throw Exception(
"While loading part " + data_part_storage->getFullPath() + ": calculated partition ID: " + calculated_partition_id
"While loading part " + getDataPartStorage().getFullPath() + ": calculated partition ID: " + calculated_partition_id
+ " differs from partition ID in part name: " + info.partition_id,
ErrorCodes::CORRUPTED_DATA);
}
@ -965,7 +1082,7 @@ void IMergeTreeDataPart::loadChecksums(bool require)
bytes_on_disk = checksums.getTotalSizeOnDisk();
}
else
bytes_on_disk = data_part_storage->calculateTotalSizeOnDisk();
bytes_on_disk = getDataPartStorage().calculateTotalSizeOnDisk();
}
else
{
@ -977,7 +1094,7 @@ void IMergeTreeDataPart::loadChecksums(bool require)
LOG_WARNING(storage.log, "Checksums for part {} not found. Will calculate them from data on disk.", name);
checksums = checkDataPart(shared_from_this(), false);
data_part_storage->writeChecksums(checksums, {});
writeChecksums(checksums, {});
bytes_on_disk = checksums.getTotalSizeOnDisk();
}
@ -990,8 +1107,6 @@ void IMergeTreeDataPart::appendFilesOfChecksums(Strings & files)
void IMergeTreeDataPart::loadRowsCount()
{
//String path = fs::path(getRelativePath()) / "count.txt";
auto read_rows_count = [&]()
{
auto buf = metadata_manager->read("count.txt");
@ -1062,7 +1177,7 @@ void IMergeTreeDataPart::loadRowsCount()
}
else
{
if (data_part_storage->exists("count.txt"))
if (getDataPartStorage().exists("count.txt"))
{
read_rows_count();
return;
@ -1161,7 +1276,7 @@ void IMergeTreeDataPart::appendFilesOfUUID(Strings & files)
void IMergeTreeDataPart::loadColumns(bool require)
{
String path = fs::path(data_part_storage->getRelativePath()) / "columns.txt";
String path = fs::path(getDataPartStorage().getRelativePath()) / "columns.txt";
auto metadata_snapshot = storage.getInMemoryMetadataPtr();
if (parent_part)
metadata_snapshot = metadata_snapshot->projections.get(name).metadata;
@ -1172,18 +1287,18 @@ void IMergeTreeDataPart::loadColumns(bool require)
{
/// We can get list of columns only from columns.txt in compact parts.
if (require || part_type == Type::Compact)
throw Exception("No columns.txt in part " + name + ", expected path " + path + " on drive " + data_part_storage->getDiskName(),
throw Exception("No columns.txt in part " + name + ", expected path " + path + " on drive " + getDataPartStorage().getDiskName(),
ErrorCodes::NO_FILE_IN_DATA_PART);
/// If there is no file with a list of columns, write it down.
for (const NameAndTypePair & column : metadata_snapshot->getColumns().getAllPhysical())
if (data_part_storage->exists(getFileNameForColumn(column) + ".bin"))
if (getDataPartStorage().exists(getFileNameForColumn(column) + ".bin"))
loaded_columns.push_back(column);
if (columns.empty())
throw Exception("No columns in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART);
data_part_storage->writeColumns(loaded_columns, {});
writeColumns(loaded_columns, {});
}
else
{
@ -1227,7 +1342,7 @@ void IMergeTreeDataPart::assertHasVersionMetadata(MergeTreeTransaction * txn) co
name, storage.getStorageID().getNameForLogs(), version.creation_tid, txn ? txn->dumpDescription() : "<none>");
assert(!txn || storage.supportsTransactions());
assert(!txn || data_part_storage->exists(TXN_VERSION_METADATA_FILE_NAME));
assert(!txn || getDataPartStorage().exists(TXN_VERSION_METADATA_FILE_NAME));
}
void IMergeTreeDataPart::storeVersionMetadata(bool force) const
@ -1242,7 +1357,7 @@ void IMergeTreeDataPart::storeVersionMetadata(bool force) const
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Transactions are not supported for in-memory parts (table: {}, part: {})",
storage.getStorageID().getNameForLogs(), name);
data_part_storage->writeVersionMetadata(version, storage.getSettings()->fsync_part_directory);
writeVersionMetadata(version, storage.getSettings()->fsync_part_directory);
}
void IMergeTreeDataPart::appendCSNToVersionMetadata(VersionMetadata::WhichCSN which_csn) const
@ -1254,7 +1369,14 @@ void IMergeTreeDataPart::appendCSNToVersionMetadata(VersionMetadata::WhichCSN wh
chassert(!(which_csn == VersionMetadata::WhichCSN::REMOVAL && version.removal_csn == 0));
chassert(isStoredOnDisk());
data_part_storage->appendCSNToVersionMetadata(version, which_csn);
/// Small enough appends to file are usually atomic,
/// so we append new metadata instead of rewriting file to reduce number of fsyncs.
/// We don't need to do fsync when writing CSN, because in case of hard restart
/// we will be able to restore CSN from transaction log in Keeper.
auto out = getDataPartStorage().writeTransactionFile(WriteMode::Append);
version.writeCSN(*out, which_csn);
out->finalize();
}
void IMergeTreeDataPart::appendRemovalTIDToVersionMetadata(bool clear) const
@ -1277,13 +1399,74 @@ void IMergeTreeDataPart::appendRemovalTIDToVersionMetadata(bool clear) const
else
LOG_TEST(storage.log, "Appending removal TID for {} (creation: {}, removal {})", name, version.creation_tid, version.removal_tid);
data_part_storage->appendRemovalTIDToVersionMetadata(version, clear);
auto out = getDataPartStorage().writeTransactionFile(WriteMode::Append);
version.writeRemovalTID(*out, clear);
out->finalize();
/// fsync is not required when we clearing removal TID, because after hard restart we will fix metadata
if (!clear)
out->sync();
}
static std::unique_ptr<ReadBufferFromFileBase> openForReading(const IDataPartStorage & part_storage, const String & filename)
{
size_t file_size = part_storage.getFileSize(filename);
return part_storage.readFile(filename, ReadSettings().adjustBufferSize(file_size), file_size, file_size);
}
void IMergeTreeDataPart::loadVersionMetadata() const
try
{
data_part_storage->loadVersionMetadata(version, storage.log);
static constexpr auto version_file_name = "txn_version.txt";
static constexpr auto tmp_version_file_name = "txn_version.txt.tmp";
auto & data_part_storage = const_cast<IDataPartStorage &>(getDataPartStorage());
auto remove_tmp_file = [&]()
{
auto last_modified = data_part_storage.getLastModified();
auto buf = openForReading(data_part_storage, tmp_version_file_name);
String content;
readStringUntilEOF(content, *buf);
LOG_WARNING(storage.log, "Found file {} that was last modified on {}, has size {} and the following content: {}",
tmp_version_file_name, last_modified.epochTime(), content.size(), content);
data_part_storage.removeFile(tmp_version_file_name);
};
if (data_part_storage.exists(version_file_name))
{
auto buf = openForReading(data_part_storage, version_file_name);
version.read(*buf);
if (data_part_storage.exists(tmp_version_file_name))
remove_tmp_file();
return;
}
/// Four (?) cases are possible:
/// 1. Part was created without transactions.
/// 2. Version metadata file was not renamed from *.tmp on part creation.
/// 3. Version metadata were written to *.tmp file, but hard restart happened before fsync.
/// 4. Fsyncs in storeVersionMetadata() work incorrectly.
if (!data_part_storage.exists(tmp_version_file_name))
{
/// Case 1.
/// We do not have version metadata and transactions history for old parts,
/// so let's consider that such parts were created by some ancient transaction
/// and were committed with some prehistoric CSN.
/// NOTE It might be Case 3, but version metadata file is written on part creation before other files,
/// so it's not Case 3 if part is not broken.
version.setCreationTID(Tx::PrehistoricTID, nullptr);
version.creation_csn = Tx::PrehistoricCSN;
return;
}
/// Case 2.
/// Content of *.tmp file may be broken, just use fake TID.
/// Transaction was not committed if *.tmp file was not renamed, so we should complete rollback by removing part.
version.setCreationTID(Tx::DummyTID, nullptr);
version.creation_csn = Tx::RolledBackCSN;
remove_tmp_file();
}
catch (Exception & e)
{
@ -1320,15 +1503,15 @@ bool IMergeTreeDataPart::assertHasValidVersionMetadata() const
if (state == MergeTreeDataPartState::Temporary)
return true;
if (!data_part_storage->exists())
if (!getDataPartStorage().exists())
return true;
String content;
String version_file_name = TXN_VERSION_METADATA_FILE_NAME;
try
{
size_t file_size = data_part_storage->getFileSize(TXN_VERSION_METADATA_FILE_NAME);
auto buf = data_part_storage->readFile(TXN_VERSION_METADATA_FILE_NAME, ReadSettings().adjustBufferSize(file_size), file_size, std::nullopt);
size_t file_size = getDataPartStorage().getFileSize(TXN_VERSION_METADATA_FILE_NAME);
auto buf = getDataPartStorage().readFile(TXN_VERSION_METADATA_FILE_NAME, ReadSettings().adjustBufferSize(file_size), file_size, std::nullopt);
readStringUntilEOF(content, *buf);
ReadBufferFromString str_buf{content};
@ -1362,10 +1545,11 @@ void IMergeTreeDataPart::appendFilesOfColumns(Strings & files)
bool IMergeTreeDataPart::shallParticipateInMerges(const StoragePolicyPtr & storage_policy) const
{
return data_part_storage->shallParticipateInMerges(*storage_policy);
auto disk_name = getDataPartStorage().getDiskName();
return !storage_policy->getVolumeByDiskName(disk_name)->areMergesAvoided();
}
void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_new_dir_if_exists, DataPartStorageBuilderPtr builder) const
void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_new_dir_if_exists)
try
{
assertOnDisk();
@ -1376,22 +1560,21 @@ try
if (parent_part)
{
/// For projections, move is only possible inside parent part dir.
relative_path = parent_part->data_part_storage->getRelativePath();
relative_path = parent_part->getDataPartStorage().getRelativePath();
}
String from = data_part_storage->getRelativePath();
auto old_projection_root_path = getDataPartStorage().getRelativePath();
auto to = fs::path(relative_path) / new_relative_path;
metadata_manager->deleteAll(true);
metadata_manager->assertAllDeleted(true);
builder->rename(to.parent_path(), to.filename(), storage.log, remove_new_dir_if_exists, fsync_dir);
data_part_storage->onRename(to.parent_path(), to.filename());
getDataPartStorage().rename(to.parent_path(), to.filename(), storage.log, remove_new_dir_if_exists, fsync_dir);
metadata_manager->updateAll(true);
for (const auto & [p_name, part] : projection_parts)
{
part->data_part_storage = data_part_storage->getProjection(p_name + ".proj");
}
auto new_projection_root_path = to.string();
for (const auto & [_, part] : projection_parts)
part->getDataPartStorage().changeRootPath(old_projection_root_path, new_projection_root_path);
}
catch (...)
{
@ -1432,14 +1615,14 @@ void IMergeTreeDataPart::initializePartMetadataManager()
void IMergeTreeDataPart::initializeIndexGranularityInfo()
{
auto mrk_ext = MergeTreeIndexGranularityInfo::getMarksExtensionFromFilesystem(data_part_storage);
auto mrk_ext = MergeTreeIndexGranularityInfo::getMarksExtensionFromFilesystem(getDataPartStorage());
if (mrk_ext)
index_granularity_info = MergeTreeIndexGranularityInfo(storage, MarkType{*mrk_ext});
else
index_granularity_info = MergeTreeIndexGranularityInfo(storage, part_type);
}
void IMergeTreeDataPart::remove() const
void IMergeTreeDataPart::remove()
{
assert(assertHasValidVersionMetadata());
part_is_probably_removed_from_disk = true;
@ -1456,7 +1639,6 @@ void IMergeTreeDataPart::remove() const
return CanRemoveDescription{.can_remove_anything = can_remove, .files_not_to_remove = files_not_to_remove };
};
if (!isStoredOnDisk())
return;
@ -1475,7 +1657,7 @@ void IMergeTreeDataPart::remove() const
projection_checksums.emplace_back(IDataPartStorage::ProjectionChecksums{.name = p_name, .checksums = projection_part->checksums});
}
data_part_storage->remove(std::move(can_remove_callback), checksums, projection_checksums, is_temp, getState(), storage.log);
getDataPartStorage().remove(std::move(can_remove_callback), checksums, projection_checksums, is_temp, getState(), storage.log);
}
std::optional<String> IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix, bool detached, bool broken) const
@ -1492,7 +1674,7 @@ std::optional<String> IMergeTreeDataPart::getRelativePathForPrefix(const String
if (detached && parent_part)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot detach projection");
return data_part_storage->getRelativePathForPrefix(storage.log, prefix, detached, broken);
return getDataPartStorage().getRelativePathForPrefix(storage.log, prefix, detached, broken);
}
std::optional<String> IMergeTreeDataPart::getRelativePathForDetachedPart(const String & prefix, bool broken) const
@ -1507,11 +1689,11 @@ std::optional<String> IMergeTreeDataPart::getRelativePathForDetachedPart(const S
return {};
}
void IMergeTreeDataPart::renameToDetached(const String & prefix, DataPartStorageBuilderPtr builder) const
void IMergeTreeDataPart::renameToDetached(const String & prefix)
{
auto path_to_detach = getRelativePathForDetachedPart(prefix, /* broken */ false);
assert(path_to_detach);
renameTo(path_to_detach.value(), true, builder);
renameTo(path_to_detach.value(), true);
part_is_probably_removed_from_disk = true;
}
@ -1530,7 +1712,7 @@ void IMergeTreeDataPart::makeCloneInDetached(const String & prefix, const Storag
if (!maybe_path_in_detached)
return;
data_part_storage->freeze(
getDataPartStorage().freeze(
storage.relative_data_path,
*maybe_path_in_detached,
/*make_source_readonly*/ true,
@ -1539,17 +1721,17 @@ void IMergeTreeDataPart::makeCloneInDetached(const String & prefix, const Storag
{});
}
DataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const
MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const
{
assertOnDisk();
if (disk->getName() == data_part_storage->getDiskName())
throw Exception("Can not clone data part " + name + " to same disk " + data_part_storage->getDiskName(), ErrorCodes::LOGICAL_ERROR);
if (disk->getName() == getDataPartStorage().getDiskName())
throw Exception("Can not clone data part " + name + " to same disk " + getDataPartStorage().getDiskName(), ErrorCodes::LOGICAL_ERROR);
if (directory_name.empty())
throw Exception("Can not clone data part " + name + " to empty directory.", ErrorCodes::LOGICAL_ERROR);
String path_to_clone = fs::path(storage.relative_data_path) / directory_name / "";
return data_part_storage->clone(path_to_clone, data_part_storage->getPartDirectory(), disk, storage.log);
return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, storage.log);
}
void IMergeTreeDataPart::checkConsistencyBase() const
@ -1590,26 +1772,26 @@ void IMergeTreeDataPart::checkConsistencyBase() const
}
}
data_part_storage->checkConsistency(checksums);
getDataPartStorage().checkConsistency(checksums);
}
else
{
auto check_file_not_empty = [this](const String & file_path)
{
UInt64 file_size;
if (!data_part_storage->exists(file_path) || (file_size = data_part_storage->getFileSize(file_path)) == 0)
if (!getDataPartStorage().exists(file_path) || (file_size = getDataPartStorage().getFileSize(file_path)) == 0)
throw Exception(
ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART,
"Part {} is broken: {} is empty",
data_part_storage->getFullPath(),
std::string(fs::path(data_part_storage->getFullPath()) / file_path));
getDataPartStorage().getFullPath(),
std::string(fs::path(getDataPartStorage().getFullPath()) / file_path));
return file_size;
};
/// Check that the primary key index is not empty.
if (!pk.column_names.empty())
{
String index_name = "primary" + getIndexExtensionFromFilesystem(data_part_storage).value();
String index_name = "primary" + getIndexExtensionFromFilesystem(getDataPartStorage()).value();
check_file_not_empty(index_name);
}
@ -1753,7 +1935,7 @@ bool IMergeTreeDataPart::checkAllTTLCalculated(const StorageMetadataPtr & metada
String IMergeTreeDataPart::getUniqueId() const
{
return data_part_storage->getUniqueId();
return getDataPartStorage().getUniqueId();
}
String IMergeTreeDataPart::getZeroLevelPartBlockID(std::string_view token) const
@ -1792,11 +1974,11 @@ IMergeTreeDataPart::uint128 IMergeTreeDataPart::getActualChecksumByFile(const St
return it->second.file_hash;
}
if (!data_part_storage->exists(file_name))
if (!getDataPartStorage().exists(file_name))
{
return {};
}
std::unique_ptr<ReadBufferFromFileBase> in_file = data_part_storage->readFile(file_name, {}, std::nullopt, std::nullopt);
std::unique_ptr<ReadBufferFromFileBase> in_file = getDataPartStorage().readFile(file_name, {}, std::nullopt, std::nullopt);
HashingReadBuffer in_hash(*in_file);
String value;
@ -1824,11 +2006,11 @@ bool isInMemoryPart(const MergeTreeDataPartPtr & data_part)
return (data_part && data_part->getType() == MergeTreeDataPartType::InMemory);
}
std::optional<std::string> getIndexExtensionFromFilesystem(const DataPartStoragePtr & data_part_storage)
std::optional<std::string> getIndexExtensionFromFilesystem(const IDataPartStorage & data_part_storage)
{
if (data_part_storage->exists())
if (data_part_storage.exists())
{
for (auto it = data_part_storage->iterate(); it->isValid(); it->next())
for (auto it = data_part_storage.iterate(); it->isValid(); it->next())
{
const auto & extension = fs::path(it->name()).extension();
if (extension == getIndexExtension(false)

View File

@ -1,5 +1,6 @@
#pragma once
#include "IO/WriteSettings.h"
#include <Core/Block.h>
#include <base/types.h>
#include <Core/NamesAndTypes.h>
@ -46,7 +47,7 @@ class UncompressedCache;
class MergeTreeTransaction;
/// Description of the data part.
class IMergeTreeDataPart : public std::enable_shared_from_this<IMergeTreeDataPart>
class IMergeTreeDataPart : public std::enable_shared_from_this<IMergeTreeDataPart>, public DataPartStorageHolder
{
public:
static constexpr auto DATA_FILE_EXTENSION = ".bin";
@ -67,19 +68,18 @@ public:
using uint128 = IPartMetadataManager::uint128;
IMergeTreeDataPart(
const MergeTreeData & storage_,
const String & name_,
const MergeTreePartInfo & info_,
const DataPartStoragePtr & data_part_storage_,
const MutableDataPartStoragePtr & data_part_storage_,
Type part_type_,
const IMergeTreeDataPart * parent_part_);
IMergeTreeDataPart(
const MergeTreeData & storage_,
const String & name_,
const DataPartStoragePtr & data_part_storage_,
const MutableDataPartStoragePtr & data_part_storage_,
Type part_type_,
const IMergeTreeDataPart * parent_part_);
@ -94,13 +94,12 @@ public:
const ReadBufferFromFileBase::ProfileCallback & profile_callback_) const = 0;
virtual MergeTreeWriterPtr getWriter(
DataPartStorageBuilderPtr data_part_storage_builder,
const NamesAndTypesList & columns_list,
const StorageMetadataPtr & metadata_snapshot,
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
const CompressionCodecPtr & default_codec_,
const MergeTreeWriterSettings & writer_settings,
const MergeTreeIndexGranularity & computed_index_granularity) const = 0;
const MergeTreeIndexGranularity & computed_index_granularity) = 0;
virtual bool isStoredOnDisk() const = 0;
@ -152,7 +151,7 @@ public:
/// Throws an exception if part is not stored in on-disk format.
void assertOnDisk() const;
void remove() const;
void remove();
/// Initialize columns (from columns.txt if exists, or create from column files if not).
/// Load checksums from checksums.txt if exists. Load index if required.
@ -200,10 +199,6 @@ public:
/// processed by multiple shards.
UUID uuid = UUIDHelpers::Nil;
/// This is an object which encapsulates all the operations with disk.
/// Contains a path to stored data.
DataPartStoragePtr data_part_storage;
MergeTreeIndexGranularityInfo index_granularity_info;
size_t rows_count = 0;
@ -289,8 +284,8 @@ public:
using WrittenFiles = std::vector<std::unique_ptr<WriteBufferFromFileBase>>;
[[nodiscard]] WrittenFiles store(const MergeTreeData & data, const DataPartStorageBuilderPtr & data_part_storage_builder, Checksums & checksums) const;
[[nodiscard]] WrittenFiles store(const Names & column_names, const DataTypes & data_types, const DataPartStorageBuilderPtr & data_part_storage_builder, Checksums & checksums) const;
[[nodiscard]] WrittenFiles store(const MergeTreeData & data, IDataPartStorage & part_storage, Checksums & checksums) const;
[[nodiscard]] WrittenFiles store(const Names & column_names, const DataTypes & data_types, IDataPartStorage & part_storage, Checksums & checksums) const;
void update(const Block & block, const Names & column_names);
void merge(const MinMaxIndex & other);
@ -321,17 +316,17 @@ public:
size_t getFileSizeOrZero(const String & file_name) const;
/// Moves a part to detached/ directory and adds prefix to its name
void renameToDetached(const String & prefix, DataPartStorageBuilderPtr builder) const;
void renameToDetached(const String & prefix);
/// Makes checks and move part to new directory
/// Changes only relative_dir_name, you need to update other metadata (name, is_temp) explicitly
virtual void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists, DataPartStorageBuilderPtr builder) const;
virtual void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists);
/// Makes clone of a part in detached/ directory via hard links
virtual void makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const;
/// Makes full clone of part in specified subdirectory (relative to storage data directory, e.g. "detached") on another disk
DataPartStoragePtr makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const;
MutableDataPartStoragePtr makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const;
/// Checks that .bin and .mrk files exist.
///
@ -445,6 +440,12 @@ public:
/// True if here is lightweight deleted mask file in part.
bool hasLightweightDelete() const { return columns.contains(LightweightDeleteDescription::FILTER_COLUMN.name); }
void writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings);
void writeDeleteOnDestroyMarker();
void removeDeleteOnDestroyMarker();
void removeVersionMetadata();
protected:
/// Total size of all columns, calculated once in calcuateColumnSizesOnDisk
@ -566,6 +567,12 @@ private:
/// any specifial compression.
void loadDefaultCompressionCodec();
void writeColumns(const NamesAndTypesList & columns_, const WriteSettings & settings);
void writeVersionMetadata(const VersionMetadata & version_, bool fsync_part_dir) const;
template <typename Writer>
void writeMetadata(const String & filename, const WriteSettings & settings, Writer && writer);
static void appendFilesOfDefaultCompressionCodec(Strings & files);
/// Found column without specific compression and return codec
@ -585,7 +592,7 @@ bool isCompactPart(const MergeTreeDataPartPtr & data_part);
bool isWidePart(const MergeTreeDataPartPtr & data_part);
bool isInMemoryPart(const MergeTreeDataPartPtr & data_part);
inline String getIndexExtension(bool is_compressed_primary_key) { return is_compressed_primary_key ? ".cidx" : ".idx"; }
std::optional<String> getIndexExtensionFromFilesystem(const DataPartStoragePtr & data_part_storage);
std::optional<String> getIndexExtensionFromFilesystem(const IDataPartStorage & data_part_storage);
bool isCompressedFromIndexExtension(const String & index_extension);
}

View File

@ -7,7 +7,8 @@ namespace DB
{
class IDataPartStorage;
using DataPartStoragePtr = std::shared_ptr<IDataPartStorage>;
using DataPartStoragePtr = std::shared_ptr<const IDataPartStorage>;
class MergeTreeIndexGranularity;
struct MergeTreeDataPartChecksums;
struct MergeTreeIndexGranularityInfo;
@ -36,7 +37,7 @@ public:
virtual bool isProjectionPart() const = 0;
virtual const DataPartStoragePtr & getDataPartStorage() const = 0;
virtual DataPartStoragePtr getDataPartStorage() const = 0;
virtual const NamesAndTypesList & getColumns() const = 0;

View File

@ -38,14 +38,12 @@ Block permuteBlockIfNeeded(const Block & block, const IColumn::Permutation * per
}
IMergeTreeDataPartWriter::IMergeTreeDataPartWriter(
const MergeTreeData::DataPartPtr & data_part_,
DataPartStorageBuilderPtr data_part_storage_builder_,
const MergeTreeMutableDataPartPtr & data_part_,
const NamesAndTypesList & columns_list_,
const StorageMetadataPtr & metadata_snapshot_,
const MergeTreeWriterSettings & settings_,
const MergeTreeIndexGranularity & index_granularity_)
: data_part(data_part_)
, data_part_storage_builder(std::move(data_part_storage_builder_))
, storage(data_part_->storage)
, metadata_snapshot(metadata_snapshot_)
, columns_list(columns_list_)

View File

@ -22,8 +22,7 @@ class IMergeTreeDataPartWriter : private boost::noncopyable
{
public:
IMergeTreeDataPartWriter(
const MergeTreeData::DataPartPtr & data_part_,
DataPartStorageBuilderPtr data_part_storage_builder_,
const MergeTreeMutableDataPartPtr & data_part_,
const NamesAndTypesList & columns_list_,
const StorageMetadataPtr & metadata_snapshot_,
const MergeTreeWriterSettings & settings_,
@ -42,8 +41,7 @@ public:
protected:
const MergeTreeData::DataPartPtr data_part;
DataPartStorageBuilderPtr data_part_storage_builder;
const MergeTreeMutableDataPartPtr data_part;
const MergeTreeData & storage;
const StorageMetadataPtr metadata_snapshot;
const NamesAndTypesList columns_list;

View File

@ -6,14 +6,13 @@ namespace DB
{
IMergedBlockOutputStream::IMergedBlockOutputStream(
DataPartStorageBuilderPtr data_part_storage_builder_,
const MergeTreeDataPartPtr & data_part,
const MergeTreeMutableDataPartPtr & data_part,
const StorageMetadataPtr & metadata_snapshot_,
const NamesAndTypesList & columns_list,
bool reset_columns_)
: storage(data_part->storage)
, metadata_snapshot(metadata_snapshot_)
, data_part_storage_builder(std::move(data_part_storage_builder_))
, data_part_storage(data_part->getDataPartStoragePtr())
, reset_columns(reset_columns_)
{
if (reset_columns)

View File

@ -1,5 +1,6 @@
#pragma once
#include "Storages/MergeTree/IDataPartStorage.h"
#include <Storages/MergeTree/MergeTreeIndexGranularity.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/IMergeTreeDataPart.h>
@ -12,8 +13,7 @@ class IMergedBlockOutputStream
{
public:
IMergedBlockOutputStream(
DataPartStorageBuilderPtr data_part_storage_builder_,
const MergeTreeDataPartPtr & data_part,
const MergeTreeMutableDataPartPtr & data_part,
const StorageMetadataPtr & metadata_snapshot_,
const NamesAndTypesList & columns_list,
bool reset_columns_);
@ -42,7 +42,7 @@ protected:
const MergeTreeData & storage;
StorageMetadataPtr metadata_snapshot;
DataPartStorageBuilderPtr data_part_storage_builder;
MutableDataPartStoragePtr data_part_storage;
IMergeTreeDataPart::MergeTreeWriterPtr writer;
bool reset_columns = false;

View File

@ -12,7 +12,8 @@ public:
explicit LoadedMergeTreeDataPartInfoForReader(MergeTreeData::DataPartPtr data_part_)
: IMergeTreeDataPartInfoForReader(data_part_->storage.getContext())
, data_part(data_part_)
{}
{
}
bool isCompactPart() const override { return DB::isCompactPart(data_part); }
@ -22,7 +23,7 @@ public:
bool isProjectionPart() const override { return data_part->isProjectionPart(); }
const DataPartStoragePtr & getDataPartStorage() const override { return data_part->data_part_storage; }
DataPartStoragePtr getDataPartStorage() const override { return data_part->getDataPartStoragePtr(); }
const NamesAndTypesList & getColumns() const override { return data_part->getColumns(); }

View File

@ -160,7 +160,9 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
for (auto & part_ptr : parts)
{
ttl_infos.update(part_ptr->ttl_infos);
max_volume_index = std::max(max_volume_index, part_ptr->data_part_storage->getVolumeIndex(*storage.getStoragePolicy()));
auto disk_name = part_ptr->getDataPartStorage().getDiskName();
size_t volume_index = storage.getStoragePolicy()->getVolumeIndexByDiskName(disk_name);
max_volume_index = std::max(max_volume_index, volume_index);
}
/// It will live until the whole task is being destroyed
@ -294,12 +296,10 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
bool MergeFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWriter write_part_log)
{
part = merge_task->getFuture().get();
auto builder = merge_task->getBuilder();
/// Task is not needed
merge_task.reset();
storage.merger_mutator.renameMergedTemporaryPart(part, parts, NO_TRANSACTION_PTR, *transaction_ptr, builder);
storage.merger_mutator.renameMergedTemporaryPart(part, parts, NO_TRANSACTION_PTR, *transaction_ptr);
try
{

View File

@ -65,7 +65,7 @@ MergeListElement::MergeListElement(
for (const auto & source_part : future_part->parts)
{
source_part_names.emplace_back(source_part->name);
source_part_paths.emplace_back(source_part->data_part_storage->getFullPath());
source_part_paths.emplace_back(source_part->getDataPartStorage().getFullPath());
total_size_bytes_compressed += source_part->getBytesOnDisk();
total_size_marks += source_part->getMarksCount();

View File

@ -115,10 +115,9 @@ void MergePlainMergeTreeTask::prepare()
void MergePlainMergeTreeTask::finish()
{
new_part = merge_task->getFuture().get();
auto builder = merge_task->getBuilder();
MergeTreeData::Transaction transaction(storage, txn.get());
storage.merger_mutator.renameMergedTemporaryPart(new_part, future_part->parts, txn, transaction, builder);
storage.merger_mutator.renameMergedTemporaryPart(new_part, future_part->parts, txn, transaction);
transaction.commit();
write_part_log({});

View File

@ -1,3 +1,4 @@
#include "Storages/MergeTree/IDataPartStorage.h"
#include <Storages/MergeTree/MergeTask.h>
#include <memory>
@ -125,23 +126,26 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
ctx->disk = global_ctx->space_reservation->getDisk();
String local_tmp_part_basename = local_tmp_prefix + global_ctx->future_part->name + local_tmp_suffix;
MutableDataPartStoragePtr data_part_storage;
if (global_ctx->parent_path_storage_builder)
if (global_ctx->parent_part)
{
global_ctx->data_part_storage_builder = global_ctx->parent_path_storage_builder->getProjection(local_tmp_part_basename);
data_part_storage = global_ctx->parent_part->getDataPartStorage().getProjection(local_tmp_part_basename);
}
else
{
auto local_single_disk_volume = std::make_shared<SingleDiskVolume>("volume_" + global_ctx->future_part->name, ctx->disk, 0);
global_ctx->data_part_storage_builder = std::make_shared<DataPartStorageBuilderOnDisk>(
data_part_storage = std::make_shared<DataPartStorageOnDisk>(
local_single_disk_volume,
global_ctx->data->relative_data_path,
local_tmp_part_basename);
data_part_storage->beginTransaction();
}
if (global_ctx->data_part_storage_builder->exists())
throw Exception("Directory " + global_ctx->data_part_storage_builder->getFullPath() + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS);
if (data_part_storage->exists())
throw Exception("Directory " + data_part_storage->getFullPath() + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS);
if (!global_ctx->parent_part)
global_ctx->temporary_directory_lock = global_ctx->data->getTemporaryPartDirectoryHolder(local_tmp_part_basename);
@ -163,8 +167,6 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
global_ctx->merging_columns,
global_ctx->merging_column_names);
auto data_part_storage = global_ctx->data_part_storage_builder->getStorage();
global_ctx->new_data_part = global_ctx->data->createPart(
global_ctx->future_part->name,
global_ctx->future_part->type,
@ -302,7 +304,6 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
global_ctx->to = std::make_shared<MergedBlockOutputStream>(
global_ctx->new_data_part,
global_ctx->data_part_storage_builder,
global_ctx->metadata_snapshot,
global_ctx->merging_columns,
MergeTreeIndexFactory::instance().getMany(global_ctx->metadata_snapshot->getSecondaryIndices()),
@ -501,7 +502,6 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const
ctx->executor = std::make_unique<PullingPipelineExecutor>(ctx->column_parts_pipeline);
ctx->column_to = std::make_unique<MergedColumnOnlyOutputStream>(
global_ctx->data_part_storage_builder,
global_ctx->new_data_part,
global_ctx->metadata_snapshot,
ctx->executor->getHeader(),
@ -654,7 +654,6 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c
global_ctx->deduplicate_by_columns,
projection_merging_params,
global_ctx->new_data_part.get(),
global_ctx->data_part_storage_builder.get(),
".proj",
NO_TRANSACTION_PTR,
global_ctx->data,

View File

@ -59,8 +59,7 @@ public:
bool deduplicate_,
Names deduplicate_by_columns_,
MergeTreeData::MergingParams merging_params_,
const IMergeTreeDataPart * parent_part_,
const IDataPartStorageBuilder * parent_path_storage_builder_,
IMergeTreeDataPart * parent_part_,
String suffix_,
MergeTreeTransactionPtr txn,
MergeTreeData * data_,
@ -82,7 +81,6 @@ public:
global_ctx->deduplicate = std::move(deduplicate_);
global_ctx->deduplicate_by_columns = std::move(deduplicate_by_columns_);
global_ctx->parent_part = std::move(parent_part_);
global_ctx->parent_path_storage_builder = std::move(parent_path_storage_builder_);
global_ctx->data = std::move(data_);
global_ctx->mutator = std::move(mutator_);
global_ctx->merges_blocker = std::move(merges_blocker_);
@ -102,11 +100,6 @@ public:
return global_ctx->promise.get_future();
}
DataPartStorageBuilderPtr getBuilder()
{
return global_ctx->data_part_storage_builder;
}
bool execute();
private:
@ -141,8 +134,7 @@ private:
StorageMetadataPtr metadata_snapshot{nullptr};
FutureMergedMutatedPartPtr future_part{nullptr};
/// This will be either nullptr or new_data_part, so raw pointer is ok.
const IMergeTreeDataPart * parent_part{nullptr};
const IDataPartStorageBuilder * parent_path_storage_builder{nullptr};
IMergeTreeDataPart * parent_part{nullptr};
ContextPtr context{nullptr};
time_t time_of_merge{0};
ReservationSharedPtr space_reservation{nullptr};
@ -168,7 +160,6 @@ private:
std::unique_ptr<PullingPipelineExecutor> merging_executor;
MergeTreeData::MutableDataPartPtr new_data_part{nullptr};
DataPartStorageBuilderPtr data_part_storage_builder;
/// If lightweight delete mask is present then some input rows are filtered out right after reading.
std::shared_ptr<std::atomic<size_t>> input_rows_filtered{std::make_shared<std::atomic<size_t>>(0)};

View File

@ -607,7 +607,7 @@ Block MergeTreeBaseSelectProcessor::transformHeader(
if (!row_level_column.type->canBeUsedInBooleanContext())
{
throw Exception("Invalid type for filter in PREWHERE: " + row_level_column.type->getName(),
ErrorCodes::LOGICAL_ERROR);
ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
}
block.erase(prewhere_info->row_level_column_name);
@ -620,7 +620,7 @@ Block MergeTreeBaseSelectProcessor::transformHeader(
if (!prewhere_column.type->canBeUsedInBooleanContext())
{
throw Exception("Invalid type for filter in PREWHERE: " + prewhere_column.type->getName(),
ErrorCodes::LOGICAL_ERROR);
ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
}
if (prewhere_info->remove_prewhere_column)
@ -628,13 +628,13 @@ Block MergeTreeBaseSelectProcessor::transformHeader(
else
{
WhichDataType which(removeNullable(recursiveRemoveLowCardinality(prewhere_column.type)));
if (which.isInt() || which.isUInt())
if (which.isNativeInt() || which.isNativeUInt())
prewhere_column.column = prewhere_column.type->createColumnConst(block.rows(), 1u)->convertToFullColumnIfConst();
else if (which.isFloat())
prewhere_column.column = prewhere_column.type->createColumnConst(block.rows(), 1.0f)->convertToFullColumnIfConst();
else
throw Exception("Illegal type " + prewhere_column.type->getName() + " of column for filter.",
ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Illegal type {} of column for filter", prewhere_column.type->getName());
}
}

View File

@ -943,8 +943,8 @@ Int64 MergeTreeData::getMaxBlockNumber() const
}
void MergeTreeData::loadDataPartsFromDisk(
DataPartsVector & broken_parts_to_detach,
DataPartsVector & duplicate_parts_to_remove,
MutableDataPartsVector & broken_parts_to_detach,
MutableDataPartsVector & duplicate_parts_to_remove,
ThreadPool & pool,
size_t num_parts,
std::queue<std::vector<std::pair<String, DiskPtr>>> & parts_queue,
@ -1082,7 +1082,6 @@ void MergeTreeData::loadDataPartsFromDisk(
if (size_of_part.has_value())
part_size_str = formatReadableSizeWithBinarySuffix(*size_of_part);
LOG_ERROR(log,
"Detaching broken part {}{} (size: {}). "
"If it happened after update, it is likely because of backward incompatibility. "
@ -1200,8 +1199,7 @@ void MergeTreeData::loadDataPartsFromDisk(
void MergeTreeData::loadDataPartsFromWAL(
DataPartsVector & /* broken_parts_to_detach */,
DataPartsVector & duplicate_parts_to_remove,
MutableDataPartsVector & duplicate_parts_to_remove,
MutableDataPartsVector & parts_from_wal)
{
for (auto & part : parts_from_wal)
@ -1215,7 +1213,7 @@ void MergeTreeData::loadDataPartsFromWAL(
{
if ((*it)->checksums.getTotalChecksumHex() == part->checksums.getTotalChecksumHex())
{
LOG_ERROR(log, "Remove duplicate part {}", part->data_part_storage->getFullPath());
LOG_ERROR(log, "Remove duplicate part {}", part->getDataPartStorage().getFullPath());
duplicate_parts_to_remove.push_back(part);
}
else
@ -1329,8 +1327,8 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
auto part_lock = lockParts();
data_parts_indexes.clear();
DataPartsVector broken_parts_to_detach;
DataPartsVector duplicate_parts_to_remove;
MutableDataPartsVector broken_parts_to_detach;
MutableDataPartsVector duplicate_parts_to_remove;
if (num_parts > 0)
loadDataPartsFromDisk(
@ -1384,7 +1382,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
parts_from_wal.insert(
parts_from_wal.end(), std::make_move_iterator(disk_wal_parts.begin()), std::make_move_iterator(disk_wal_parts.end()));
loadDataPartsFromWAL(broken_parts_to_detach, duplicate_parts_to_remove, parts_from_wal);
loadDataPartsFromWAL(duplicate_parts_to_remove, parts_from_wal);
num_parts += parts_from_wal.size();
}
@ -1397,11 +1395,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
}
for (auto & part : broken_parts_to_detach)
{
auto builder = part->data_part_storage->getBuilder();
part->renameToDetached("broken-on-start", builder); /// detached parts must not have '_' in prefixes
builder->commit();
}
part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes
for (auto & part : duplicate_parts_to_remove)
part->remove();
@ -1689,6 +1683,15 @@ scope_guard MergeTreeData::getTemporaryPartDirectoryHolder(const String & part_d
return [this, part_dir_name]() { temporary_parts.remove(part_dir_name); };
}
MergeTreeData::MutableDataPartPtr MergeTreeData::preparePartForRemoval(const DataPartPtr & part)
{
auto state = part->getState();
if (state != DataPartState::Deleting && state != DataPartState::DeleteOnDestroy)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Cannot remove part {}, because it has state: {}", part->name, magic_enum::enum_name(part->getState()));
return std::const_pointer_cast<IMergeTreeDataPart>(part);
}
MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force)
{
@ -1864,7 +1867,7 @@ void MergeTreeData::flushAllInMemoryPartsIfNeeded()
{
if (auto part_in_memory = asInMemoryPart(part))
{
part_in_memory->flushToDisk(part_in_memory->data_part_storage->getPartDirectory(), metadata_snapshot);
part_in_memory->flushToDisk(part_in_memory->getDataPartStorage().getPartDirectory(), metadata_snapshot);
}
}
}
@ -1948,7 +1951,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
if (thread_group)
CurrentThread::attachToIfDetached(thread_group);
part->remove();
preparePartForRemoval(part)->remove();
if (part_names_succeed)
{
std::lock_guard lock(part_names_mutex);
@ -1964,7 +1967,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
LOG_DEBUG(log, "Removing {} parts from filesystem: {}", parts_to_remove.size(), fmt::join(parts_to_remove, ", "));
for (const DataPartPtr & part : parts_to_remove)
{
part->remove();
preparePartForRemoval(part)->remove();
if (part_names_succeed)
part_names_succeed->insert(part->name);
}
@ -2144,11 +2147,14 @@ void MergeTreeData::rename(const String & new_table_path, const StorageID & new_
if (!getStorageID().hasUUID())
getContext()->dropCaches();
/// TODO: remove const_cast
for (const auto & part : data_parts_by_info)
part->data_part_storage->changeRootPath(relative_data_path, new_table_path);
{
auto & part_mutable = const_cast<IMergeTreeDataPart &>(*part);
part_mutable.getDataPartStorage().changeRootPath(relative_data_path, new_table_path);
}
relative_data_path = new_table_path;
renameInMemory(new_table_id);
}
@ -2166,7 +2172,12 @@ void MergeTreeData::dropAllData()
auto lock = lockParts();
DataPartsVector all_parts(data_parts_by_info.begin(), data_parts_by_info.end());
DataPartsVector all_parts;
for (auto it = data_parts_by_info.begin(); it != data_parts_by_info.end(); ++it)
{
modifyPartState(it, DataPartState::Deleting);
all_parts.push_back(*it);
}
{
std::lock_guard wal_lock(write_ahead_log_mutex);
@ -2179,7 +2190,6 @@ void MergeTreeData::dropAllData()
if (!getStorageID().hasUUID())
getContext()->dropCaches();
/// Removing of each data part before recursive removal of directory is to speed-up removal, because there will be less number of syscalls.
NameSet part_names_failed;
try
@ -2726,7 +2736,7 @@ MergeTreeDataPartType MergeTreeData::choosePartTypeOnDisk(size_t bytes_uncompres
MergeTreeData::MutableDataPartPtr MergeTreeData::createPart(const String & name,
MergeTreeDataPartType type, const MergeTreePartInfo & part_info,
const DataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part) const
const MutableDataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part) const
{
if (type == MergeTreeDataPartType::Compact)
return std::make_shared<MergeTreeDataPartCompact>(*this, name, part_info, data_part_storage, parent_part);
@ -2739,17 +2749,17 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createPart(const String & name,
}
MergeTreeData::MutableDataPartPtr MergeTreeData::createPart(
const String & name, const DataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part) const
const String & name, const MutableDataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part) const
{
return createPart(name, MergeTreePartInfo::fromPartName(name, format_version), data_part_storage, parent_part);
}
MergeTreeData::MutableDataPartPtr MergeTreeData::createPart(
const String & name, const MergeTreePartInfo & part_info,
const DataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part) const
const MutableDataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part) const
{
MergeTreeDataPartType type;
auto mrk_ext = MergeTreeIndexGranularityInfo::getMarksExtensionFromFilesystem(data_part_storage);
auto mrk_ext = MergeTreeIndexGranularityInfo::getMarksExtensionFromFilesystem(*data_part_storage);
if (mrk_ext)
{
@ -2943,12 +2953,11 @@ MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace(
bool MergeTreeData::renameTempPartAndAdd(
MutableDataPartPtr & part,
Transaction & out_transaction,
DataPartStorageBuilderPtr builder,
DataPartsLock & lock)
{
DataPartsVector covered_parts;
if (!renameTempPartAndReplaceImpl(part, out_transaction, lock, builder, &covered_parts))
if (!renameTempPartAndReplaceImpl(part, out_transaction, lock, &covered_parts))
return false;
if (!covered_parts.empty())
@ -2982,32 +2991,31 @@ void MergeTreeData::checkPartCanBeAddedToTable(MutableDataPartPtr & part, DataPa
}
}
void MergeTreeData::preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction, DataPartStorageBuilderPtr builder)
void MergeTreeData::preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction)
{
part->is_temp = false;
part->setState(DataPartState::PreActive);
assert([&]()
{
String dir_name = fs::path(part->data_part_storage->getRelativePath()).filename();
String dir_name = fs::path(part->getDataPartStorage().getRelativePath()).filename();
bool may_be_cleaned_up = dir_name.starts_with("tmp_") || dir_name.starts_with("tmp-fetch_");
return !may_be_cleaned_up || temporary_parts.contains(dir_name);
}());
part->renameTo(part->name, true, builder);
part->renameTo(part->name, true);
data_parts_indexes.insert(part);
out_transaction.addPart(part, builder);
out_transaction.addPart(part);
}
bool MergeTreeData::renameTempPartAndReplaceImpl(
MutableDataPartPtr & part,
Transaction & out_transaction,
DataPartsLock & lock,
DataPartStorageBuilderPtr builder,
DataPartsVector * out_covered_parts)
{
LOG_TRACE(log, "Renaming temporary part {} to {}.", part->data_part_storage->getPartDirectory(), part->name);
LOG_TRACE(log, "Renaming temporary part {} to {}.", part->getDataPartStorage().getPartDirectory(), part->name);
if (&out_transaction.data != this)
throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.",
@ -3029,7 +3037,7 @@ bool MergeTreeData::renameTempPartAndReplaceImpl(
/// All checks are passed. Now we can rename the part on disk.
/// So, we maintain invariant: if a non-temporary part in filesystem then it is in data_parts
preparePartForCommit(part, out_transaction, builder);
preparePartForCommit(part, out_transaction);
if (out_covered_parts)
{
@ -3045,21 +3053,19 @@ bool MergeTreeData::renameTempPartAndReplaceImpl(
MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplaceUnlocked(
MutableDataPartPtr & part,
Transaction & out_transaction,
DataPartStorageBuilderPtr builder,
DataPartsLock & lock)
{
DataPartsVector covered_parts;
renameTempPartAndReplaceImpl(part, out_transaction, lock, builder, &covered_parts);
renameTempPartAndReplaceImpl(part, out_transaction, lock, &covered_parts);
return covered_parts;
}
MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
MutableDataPartPtr & part,
Transaction & out_transaction,
DataPartStorageBuilderPtr builder)
Transaction & out_transaction)
{
auto part_lock = lockParts();
return renameTempPartAndReplaceUnlocked(part, out_transaction, builder, part_lock);
return renameTempPartAndReplaceUnlocked(part, out_transaction, part_lock);
}
void MergeTreeData::removePartsFromWorkingSet(MergeTreeTransaction * txn, const MergeTreeData::DataPartsVector & remove, bool clear_without_timeout, DataPartsLock & acquired_lock)
@ -3240,9 +3246,9 @@ void MergeTreeData::outdateBrokenPartAndCloneToDetached(const DataPartPtr & part
{
auto metadata_snapshot = getInMemoryMetadataPtr();
if (prefix.empty())
LOG_INFO(log, "Cloning part {} to {} and making it obsolete.", part_to_detach->data_part_storage->getPartDirectory(), part_to_detach->name);
LOG_INFO(log, "Cloning part {} to {} and making it obsolete.", part_to_detach->getDataPartStorage().getPartDirectory(), part_to_detach->name);
else
LOG_INFO(log, "Cloning part {} to {}_{} and making it obsolete.", part_to_detach->data_part_storage->getPartDirectory(), prefix, part_to_detach->name);
LOG_INFO(log, "Cloning part {} to {}_{} and making it obsolete.", part_to_detach->getDataPartStorage().getPartDirectory(), prefix, part_to_detach->name);
part_to_detach->makeCloneInDetached(prefix, metadata_snapshot);
@ -3254,9 +3260,9 @@ void MergeTreeData::outdateBrokenPartAndCloneToDetached(const DataPartPtr & part
void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeTreeData::DataPartPtr & part_to_detach, const String & prefix, bool restore_covered)
{
if (prefix.empty())
LOG_INFO(log, "Renaming {} to {} and forgetting it.", part_to_detach->data_part_storage->getPartDirectory(), part_to_detach->name);
LOG_INFO(log, "Renaming {} to {} and forgetting it.", part_to_detach->getDataPartStorage().getPartDirectory(), part_to_detach->name);
else
LOG_INFO(log, "Renaming {} to {}_{} and forgetting it.", part_to_detach->data_part_storage->getPartDirectory(), prefix, part_to_detach->name);
LOG_INFO(log, "Renaming {} to {}_{} and forgetting it.", part_to_detach->getDataPartStorage().getPartDirectory(), prefix, part_to_detach->name);
auto lock = lockParts();
bool removed_active_part = false;
@ -3279,11 +3285,7 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT
}
modifyPartState(it_part, DataPartState::Deleting);
auto builder = part->data_part_storage->getBuilder();
part->renameToDetached(prefix, builder);
builder->commit();
preparePartForRemoval(part)->renameToDetached(prefix);
data_parts_indexes.erase(it_part);
if (restore_covered && part->info.level == 0)
@ -3437,7 +3439,7 @@ void MergeTreeData::tryRemovePartImmediately(DataPartPtr && part)
try
{
part_to_delete->remove();
preparePartForRemoval(part_to_delete)->remove();
}
catch (...)
{
@ -3647,9 +3649,9 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy)
/// when allow_remote_fs_zero_copy_replication turned on and off again
original_active_part->force_keep_shared_data = false;
if (original_active_part->data_part_storage->supportZeroCopyReplication() &&
part_copy->data_part_storage->supportZeroCopyReplication() &&
original_active_part->data_part_storage->getUniqueId() == part_copy->data_part_storage->getUniqueId())
if (original_active_part->getDataPartStorage().supportZeroCopyReplication() &&
part_copy->getDataPartStorage().supportZeroCopyReplication() &&
original_active_part->getDataPartStorage().getUniqueId() == part_copy->getDataPartStorage().getUniqueId())
{
/// May be when several volumes use the same S3/HDFS storage
original_active_part->force_keep_shared_data = true;
@ -3669,7 +3671,7 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy)
/// All other locks are taken in StorageReplicatedMergeTree
lockSharedData(*part_copy);
original_active_part->data_part_storage->writeDeleteOnDestroyMarker(log);
preparePartForRemoval(original_active_part)->writeDeleteOnDestroyMarker();
return;
}
}
@ -3803,9 +3805,9 @@ MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_na
static void loadPartAndFixMetadataImpl(MergeTreeData::MutableDataPartPtr part)
{
part->loadColumnsChecksumsIndexes(false, true);
part->modification_time = part->data_part_storage->getLastModified().epochTime();
part->data_part_storage->removeDeleteOnDestroyMarker();
part->data_part_storage->removeVersionMetadata();
part->modification_time = part->getDataPartStorage().getLastModified().epochTime();
part->removeDeleteOnDestroyMarker();
part->removeVersionMetadata();
}
void MergeTreeData::calculateColumnAndSecondaryIndexSizesImpl()
@ -3965,7 +3967,7 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String &
auto disk = getStoragePolicy()->getDiskByName(name);
std::erase_if(parts, [&](auto part_ptr)
{
return part_ptr->data_part_storage->getDiskName() == disk->getName();
return part_ptr->getDataPartStorage().getDiskName() == disk->getName();
});
if (parts.empty())
@ -4015,7 +4017,7 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String
{
for (const auto & disk : volume->getDisks())
{
if (part_ptr->data_part_storage->getDiskName() == disk->getName())
if (part_ptr->getDataPartStorage().getDiskName() == disk->getName())
{
return true;
}
@ -4212,7 +4214,7 @@ BackupEntries MergeTreeData::backupParts(const DataPartsVector & data_parts, con
make_temporary_hard_links = false;
hold_storage_and_part_ptrs = true;
}
else if (supportsReplication() && part->data_part_storage->supportZeroCopyReplication() && getSettings()->allow_remote_fs_zero_copy_replication)
else if (supportsReplication() && part->getDataPartStorage().supportZeroCopyReplication() && getSettings()->allow_remote_fs_zero_copy_replication)
{
/// Hard links don't work correctly with zero copy replication.
make_temporary_hard_links = false;
@ -4224,7 +4226,7 @@ BackupEntries MergeTreeData::backupParts(const DataPartsVector & data_parts, con
table_lock = lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout);
BackupEntries backup_entries_from_part;
part->data_part_storage->backup(
part->getDataPartStorage().backup(
part->checksums,
part->getFileNamesWithoutChecksums(),
data_path_in_backup,
@ -4235,7 +4237,7 @@ BackupEntries MergeTreeData::backupParts(const DataPartsVector & data_parts, con
auto projection_parts = part->getProjectionParts();
for (const auto & [projection_name, projection_part] : projection_parts)
{
projection_part->data_part_storage->backup(
projection_part->getDataPartStorage().backup(
projection_part->checksums,
projection_part->getFileNamesWithoutChecksums(),
fs::path{data_path_in_backup} / part->name,
@ -4911,22 +4913,16 @@ ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size, SpacePtr space)
return checkAndReturnReservation(expected_size, std::move(reservation));
}
ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size, const DataPartStoragePtr & data_part_storage)
ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size, const IDataPartStorage & data_part_storage)
{
expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size);
return data_part_storage->reserve(expected_size);
return data_part_storage.reserve(expected_size);
}
ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size, const DataPartStorageBuilderPtr & data_part_storage_builder)
ReservationPtr MergeTreeData::tryReserveSpace(UInt64 expected_size, const IDataPartStorage & data_part_storage)
{
expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size);
return data_part_storage_builder->reserve(expected_size);
}
ReservationPtr MergeTreeData::tryReserveSpace(UInt64 expected_size, const DataPartStoragePtr & data_part_storage)
{
expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size);
return data_part_storage->tryReserve(expected_size);
return data_part_storage.tryReserve(expected_size);
}
ReservationPtr MergeTreeData::tryReserveSpace(UInt64 expected_size, SpacePtr space)
@ -5063,7 +5059,7 @@ bool MergeTreeData::shouldPerformTTLMoveOnInsert(const SpacePtr & move_destinati
if (move_destination->isDisk())
{
auto disk = std::static_pointer_cast<IDisk>(move_destination);
if (auto volume = getStoragePolicy()->tryGetVolumeByDisk(disk))
if (auto volume = getStoragePolicy()->tryGetVolumeByDiskName(disk->getName()))
return volume->perform_ttl_move_on_insert;
}
return false;
@ -5075,11 +5071,11 @@ bool MergeTreeData::isPartInTTLDestination(const TTLDescription & ttl, const IMe
if (ttl.destination_type == DataDestinationType::VOLUME)
{
for (const auto & disk : policy->getVolumeByName(ttl.destination_name)->getDisks())
if (disk->getName() == part.data_part_storage->getDiskName())
if (disk->getName() == part.getDataPartStorage().getDiskName())
return true;
}
else if (ttl.destination_type == DataDestinationType::DISK)
return policy->getDiskByName(ttl.destination_name)->getName() == part.data_part_storage->getDiskName();
return policy->getDiskByName(ttl.destination_name)->getName() == part.getDataPartStorage().getDiskName();
return false;
}
@ -5151,7 +5147,7 @@ void MergeTreeData::Transaction::rollbackPartsToTemporaryState()
WriteBufferFromOwnString buf;
buf << " Rollbacking parts state to temporary and removing from working set:";
for (const auto & part : precommitted_parts)
buf << " " << part->data_part_storage->getPartDirectory();
buf << " " << part->getDataPartStorage().getPartDirectory();
buf << ".";
LOG_DEBUG(data.log, "Undoing transaction.{}", buf.str());
@ -5162,12 +5158,11 @@ void MergeTreeData::Transaction::rollbackPartsToTemporaryState()
clear();
}
void MergeTreeData::Transaction::addPart(MutableDataPartPtr & part, DataPartStorageBuilderPtr builder)
void MergeTreeData::Transaction::addPart(MutableDataPartPtr & part)
{
precommitted_parts.insert(part);
if (asInMemoryPart(part))
has_in_memory_parts = true;
part_builders.push_back(builder);
}
void MergeTreeData::Transaction::rollback()
@ -5177,7 +5172,7 @@ void MergeTreeData::Transaction::rollback()
WriteBufferFromOwnString buf;
buf << " Removing parts:";
for (const auto & part : precommitted_parts)
buf << " " << part->data_part_storage->getPartDirectory();
buf << " " << part->getDataPartStorage().getPartDirectory();
buf << ".";
LOG_DEBUG(data.log, "Undoing transaction.{}", buf.str());
@ -5205,8 +5200,9 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData:
auto parts_lock = acquired_parts_lock ? MergeTreeData::DataPartsLock() : data.lockParts();
auto * owing_parts_lock = acquired_parts_lock ? acquired_parts_lock : &parts_lock;
for (auto & builder : part_builders)
builder->commit();
for (const auto & part : precommitted_parts)
if (part->getDataPartStorage().hasActiveTransaction())
part->getDataPartStorage().commitTransaction();
bool commit_to_wal = has_in_memory_parts && settings->in_memory_parts_enable_wal;
if (txn || commit_to_wal)
@ -5215,7 +5211,7 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData:
if (commit_to_wal)
wal = data.getWriteAheadLog();
for (const DataPartPtr & part : precommitted_parts)
for (const auto & part : precommitted_parts)
{
if (txn)
{
@ -5240,7 +5236,7 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData:
size_t reduce_rows = 0;
size_t reduce_parts = 0;
for (const DataPartPtr & part : precommitted_parts)
for (const auto & part : precommitted_parts)
{
DataPartPtr covering_part;
DataPartsVector covered_parts = data.getActivePartsToReplace(part->info, part->name, covering_part, *owing_parts_lock);
@ -6232,7 +6228,7 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAn
bool does_storage_policy_allow_same_disk = false;
for (const DiskPtr & disk : getStoragePolicy()->getDisks())
{
if (disk->getName() == src_part->data_part_storage->getDiskName())
if (disk->getName() == src_part->getDataPartStorage().getDiskName())
{
does_storage_policy_allow_same_disk = true;
break;
@ -6242,7 +6238,7 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAn
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Could not clone and load part {} because disk does not belong to storage policy",
quoteString(src_part->data_part_storage->getFullPath()));
quoteString(src_part->getDataPartStorage().getFullPath()));
String dst_part_name = src_part->getNewName(dst_part_info);
assert(!tmp_part_prefix.empty());
@ -6250,9 +6246,8 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAn
auto temporary_directory_lock = getTemporaryPartDirectoryHolder(tmp_dst_part_name);
/// Why it is needed if we only hardlink files?
auto reservation = src_part->data_part_storage->reserve(src_part->getBytesOnDisk());
auto src_part_storage = src_part->data_part_storage;
auto reservation = src_part->getDataPartStorage().reserve(src_part->getBytesOnDisk());
auto src_part_storage = src_part->getDataPartStoragePtr();
/// If source part is in memory, flush it to disk and clone it already in on-disk format
if (auto src_part_in_memory = asInMemoryPart(src_part))
@ -6279,7 +6274,7 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAn
hardlinked_files->source_part_name = src_part->name;
hardlinked_files->source_table_shared_id = src_part->storage.getTableSharedID();
for (auto it = src_part->data_part_storage->iterate(); it->isValid(); it->next())
for (auto it = src_part->getDataPartStorage().iterate(); it->isValid(); it->next())
{
if (!files_to_copy_instead_of_hardlinks.contains(it->name())
&& it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME
@ -6338,14 +6333,14 @@ Strings MergeTreeData::getDataPaths() const
void MergeTreeData::reportBrokenPart(MergeTreeData::DataPartPtr & data_part) const
{
if (data_part->data_part_storage && data_part->data_part_storage->isBroken())
if (data_part->getDataPartStorage().isBroken())
{
auto parts = getDataPartsForInternalUsage();
LOG_WARNING(log, "Scanning parts to recover on broken disk {}@{}.", data_part->data_part_storage->getDiskName(), data_part->data_part_storage->getDiskPath());
LOG_WARNING(log, "Scanning parts to recover on broken disk {}@{}.", data_part->getDataPartStorage().getDiskName(), data_part->getDataPartStorage().getDiskPath());
for (const auto & part : parts)
{
if (part->data_part_storage && part->data_part_storage->getDiskName() == data_part->data_part_storage->getDiskName())
if (part->getDataPartStorage().getDiskName() == data_part->getDataPartStorage().getDiskName())
broken_part_callback(part->name);
}
}
@ -6436,7 +6431,7 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(
LOG_DEBUG(log, "Freezing part {} snapshot will be placed at {}", part->name, backup_path);
auto data_part_storage = part->data_part_storage;
auto data_part_storage = part->getDataPartStoragePtr();
String src_part_path = data_part_storage->getRelativePath();
String backup_part_path = fs::path(backup_path) / relative_data_path;
if (auto part_in_memory = asInMemoryPart(part))
@ -6450,12 +6445,12 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(
// Store metadata for replicated table.
// Do nothing for non-replicated.
createAndStoreFreezeMetadata(disk, part, fs::path(backup_part_path) / part->data_part_storage->getPartDirectory());
createAndStoreFreezeMetadata(disk, part, fs::path(backup_part_path) / part->getDataPartStorage().getPartDirectory());
};
auto new_storage = data_part_storage->freeze(
backup_part_path,
part->data_part_storage->getPartDirectory(),
part->getDataPartStorage().getPartDirectory(),
/*make_source_readonly*/ true,
callback,
/*copy_instead_of_hardlink*/ false,
@ -6577,8 +6572,8 @@ try
if (result_part)
{
part_log_elem.disk_name = result_part->data_part_storage->getDiskName();
part_log_elem.path_on_disk = result_part->data_part_storage->getFullPath();
part_log_elem.disk_name = result_part->getDataPartStorage().getDiskName();
part_log_elem.path_on_disk = result_part->getDataPartStorage().getFullPath();
part_log_elem.bytes_compressed_on_disk = result_part->getBytesOnDisk();
part_log_elem.rows = result_part->rows_count;
part_log_elem.part_type = result_part->getType();
@ -6734,7 +6729,7 @@ bool MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagge
for (const auto & moving_part : moving_tagger->parts_to_move)
{
Stopwatch stopwatch;
DataPartPtr cloned_part;
MutableDataPartPtr cloned_part;
auto write_part_log = [&](const ExecutionStatus & execution_status)
{
@ -6997,7 +6992,7 @@ ReservationPtr MergeTreeData::balancedReservation(
if (part->isStoredOnDisk() && part->getBytesOnDisk() >= min_bytes_to_rebalance_partition_over_jbod
&& part_info.partition_id == part->info.partition_id)
{
auto name = part->data_part_storage->getDiskName();
auto name = part->getDataPartStorage().getDiskName();
auto it = disk_occupation.find(name);
if (it != disk_occupation.end())
{

View File

@ -214,6 +214,7 @@ public:
};
using DataParts = std::set<DataPartPtr, LessDataPart>;
using MutableDataParts = std::set<MutableDataPartPtr, LessDataPart>;
using DataPartsVector = std::vector<DataPartPtr>;
using DataPartsLock = std::unique_lock<std::mutex>;
@ -225,15 +226,15 @@ public:
/// After this method setColumns must be called
MutableDataPartPtr createPart(const String & name,
MergeTreeDataPartType type, const MergeTreePartInfo & part_info,
const DataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part = nullptr) const;
const MutableDataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part = nullptr) const;
/// Create part, that already exists on filesystem.
/// After this methods 'loadColumnsChecksumsIndexes' must be called.
MutableDataPartPtr createPart(const String & name,
const DataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part = nullptr) const;
const MutableDataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part = nullptr) const;
MutableDataPartPtr createPart(const String & name, const MergeTreePartInfo & part_info,
const DataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part = nullptr) const;
const MutableDataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part = nullptr) const;
/// Auxiliary object to add a set of parts into the working set in two steps:
/// * First, as PreActive parts (the parts are ready, but not yet in the active set).
@ -247,7 +248,7 @@ public:
DataPartsVector commit(MergeTreeData::DataPartsLock * acquired_parts_lock = nullptr);
void addPart(MutableDataPartPtr & part, DataPartStorageBuilderPtr builder);
void addPart(MutableDataPartPtr & part);
void rollback();
@ -275,9 +276,8 @@ public:
MergeTreeData & data;
MergeTreeTransaction * txn;
DataParts precommitted_parts;
std::vector<DataPartStorageBuilderPtr> part_builders;
DataParts locked_parts;
MutableDataParts precommitted_parts;
MutableDataParts locked_parts;
bool has_in_memory_parts = false;
void clear();
@ -414,9 +414,8 @@ public:
SelectQueryInfo & info) const override;
ReservationPtr reserveSpace(UInt64 expected_size, VolumePtr & volume) const;
static ReservationPtr tryReserveSpace(UInt64 expected_size, const DataPartStoragePtr & data_part_storage);
static ReservationPtr reserveSpace(UInt64 expected_size, const DataPartStoragePtr & data_part_storage);
static ReservationPtr reserveSpace(UInt64 expected_size, const DataPartStorageBuilderPtr & data_part_storage_builder);
static ReservationPtr tryReserveSpace(UInt64 expected_size, const IDataPartStorage & data_part_storage);
static ReservationPtr reserveSpace(UInt64 expected_size, const IDataPartStorage & data_part_storage);
static bool partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right);
@ -555,21 +554,18 @@ public:
bool renameTempPartAndAdd(
MutableDataPartPtr & part,
Transaction & transaction,
DataPartStorageBuilderPtr builder,
DataPartsLock & lock);
/// The same as renameTempPartAndAdd but the block range of the part can contain existing parts.
/// Returns all parts covered by the added part (in ascending order).
DataPartsVector renameTempPartAndReplace(
MutableDataPartPtr & part,
Transaction & out_transaction,
DataPartStorageBuilderPtr builder);
Transaction & out_transaction);
/// Unlocked version of previous one. Useful when added multiple parts with a single lock.
DataPartsVector renameTempPartAndReplaceUnlocked(
MutableDataPartPtr & part,
Transaction & out_transaction,
DataPartStorageBuilderPtr builder,
DataPartsLock & lock);
/// Remove parts from working set immediately (without wait for background
@ -979,7 +975,7 @@ public:
/// Fetch part only if some replica has it on shared storage like S3
/// Overridden in StorageReplicatedMergeTree
virtual DataPartStoragePtr tryToFetchIfShared(const IMergeTreeDataPart &, const DiskPtr &, const String &) { return nullptr; }
virtual MutableDataPartStoragePtr tryToFetchIfShared(const IMergeTreeDataPart &, const DiskPtr &, const String &) { return nullptr; }
/// Check shared data usage on other replicas for detached/freezed part
/// Remove local files and remote files if needed
@ -1264,13 +1260,12 @@ protected:
static void incrementMergedPartsProfileEvent(MergeTreeDataPartType type);
private:
/// Checking that candidate part doesn't break invariants: correct partition and doesn't exist already
void checkPartCanBeAddedToTable(MutableDataPartPtr & part, DataPartsLock & lock) const;
/// Preparing itself to be committed in memory: fill some fields inside part, add it to data_parts_indexes
/// in precommitted state and to transaction
void preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction, DataPartStorageBuilderPtr builder);
void preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction);
/// Low-level method for preparing parts for commit (in-memory).
/// FIXME Merge MergeTreeTransaction and Transaction
@ -1278,7 +1273,6 @@ private:
MutableDataPartPtr & part,
Transaction & out_transaction,
DataPartsLock & lock,
DataPartStorageBuilderPtr builder,
DataPartsVector * out_covered_parts);
/// RAII Wrapper for atomic work with currently moving parts
@ -1334,8 +1328,8 @@ private:
virtual std::unique_ptr<MergeTreeSettings> getDefaultSettings() const = 0;
void loadDataPartsFromDisk(
DataPartsVector & broken_parts_to_detach,
DataPartsVector & duplicate_parts_to_remove,
MutableDataPartsVector & broken_parts_to_detach,
MutableDataPartsVector & duplicate_parts_to_remove,
ThreadPool & pool,
size_t num_parts,
std::queue<std::vector<std::pair<String, DiskPtr>>> & parts_queue,
@ -1343,8 +1337,7 @@ private:
const MergeTreeSettingsPtr & settings);
void loadDataPartsFromWAL(
DataPartsVector & broken_parts_to_detach,
DataPartsVector & duplicate_parts_to_remove,
MutableDataPartsVector & duplicate_parts_to_remove,
MutableDataPartsVector & parts_from_wal);
/// Create zero-copy exclusive lock for part and disk. Useful for coordination of
@ -1356,6 +1349,8 @@ private:
/// Otherwise, in non-parallel case will break and return.
void clearPartsFromFilesystemImpl(const DataPartsVector & parts, NameSet * part_names_succeed);
static MutableDataPartPtr preparePartForRemoval(const DataPartPtr & part);
TemporaryParts temporary_parts;
};

View File

@ -483,8 +483,7 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart(
const Names & deduplicate_by_columns,
const MergeTreeData::MergingParams & merging_params,
const MergeTreeTransactionPtr & txn,
const IMergeTreeDataPart * parent_part,
const IDataPartStorageBuilder * parent_path_storage_builder,
IMergeTreeDataPart * parent_part,
const String & suffix)
{
return std::make_shared<MergeTask>(
@ -499,7 +498,6 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart(
deduplicate_by_columns,
merging_params,
parent_part,
parent_path_storage_builder,
suffix,
txn,
&data,
@ -541,8 +539,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart
MergeTreeData::MutableDataPartPtr & new_data_part,
const MergeTreeData::DataPartsVector & parts,
const MergeTreeTransactionPtr & txn,
MergeTreeData::Transaction & out_transaction,
DataPartStorageBuilderPtr builder)
MergeTreeData::Transaction & out_transaction)
{
/// Some of source parts was possibly created in transaction, so non-transactional merge may break isolation.
if (data.transactions_enabled.load(std::memory_order_relaxed) && !txn)
@ -550,7 +547,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart
"but transactions were enabled for this table");
/// Rename new part, add to the set and remove original parts.
auto replaced_parts = data.renameTempPartAndReplace(new_data_part, out_transaction, builder);
auto replaced_parts = data.renameTempPartAndReplace(new_data_part, out_transaction);
/// Let's check that all original parts have been deleted and only them.
if (replaced_parts.size() != parts.size())

View File

@ -113,8 +113,7 @@ public:
const Names & deduplicate_by_columns,
const MergeTreeData::MergingParams & merging_params,
const MergeTreeTransactionPtr & txn,
const IMergeTreeDataPart * parent_part = nullptr,
const IDataPartStorageBuilder * parent_path_storage_builder = nullptr,
IMergeTreeDataPart * parent_part = nullptr,
const String & suffix = "");
/// Mutate a single data part with the specified commands. Will create and return a temporary part.
@ -133,8 +132,7 @@ public:
MergeTreeData::MutableDataPartPtr & new_data_part,
const MergeTreeData::DataPartsVector & parts,
const MergeTreeTransactionPtr & txn,
MergeTreeData::Transaction & out_transaction,
DataPartStorageBuilderPtr builder);
MergeTreeData::Transaction & out_transaction);
/// The approximate amount of disk space needed for merge or mutation. With a surplus.

View File

@ -22,7 +22,7 @@ namespace ErrorCodes
MergeTreeDataPartCompact::MergeTreeDataPartCompact(
MergeTreeData & storage_,
const String & name_,
const DataPartStoragePtr & data_part_storage_,
const MutableDataPartStoragePtr & data_part_storage_,
const IMergeTreeDataPart * parent_part_)
: IMergeTreeDataPart(storage_, name_, data_part_storage_, Type::Compact, parent_part_)
{
@ -32,7 +32,7 @@ MergeTreeDataPartCompact::MergeTreeDataPartCompact(
const MergeTreeData & storage_,
const String & name_,
const MergeTreePartInfo & info_,
const DataPartStoragePtr & data_part_storage_,
const MutableDataPartStoragePtr & data_part_storage_,
const IMergeTreeDataPart * parent_part_)
: IMergeTreeDataPart(storage_, name_, info_, data_part_storage_, Type::Compact, parent_part_)
{
@ -58,13 +58,12 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader(
}
IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter(
DataPartStorageBuilderPtr data_part_storage_builder,
const NamesAndTypesList & columns_list,
const StorageMetadataPtr & metadata_snapshot,
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
const CompressionCodecPtr & default_codec_,
const MergeTreeWriterSettings & writer_settings,
const MergeTreeIndexGranularity & computed_index_granularity) const
const MergeTreeIndexGranularity & computed_index_granularity)
{
NamesAndTypesList ordered_columns_list;
std::copy_if(columns_list.begin(), columns_list.end(), std::back_inserter(ordered_columns_list),
@ -75,7 +74,7 @@ IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter(
{ return *getColumnPosition(lhs.name) < *getColumnPosition(rhs.name); });
return std::make_unique<MergeTreeDataPartWriterCompact>(
shared_from_this(), std::move(data_part_storage_builder), ordered_columns_list, metadata_snapshot,
shared_from_this(), ordered_columns_list, metadata_snapshot,
indices_to_recalc, getMarksFileExtension(),
default_codec_, writer_settings, computed_index_granularity);
}
@ -97,21 +96,21 @@ void MergeTreeDataPartCompact::calculateEachColumnSizes(ColumnSizeByName & /*eac
void MergeTreeDataPartCompact::loadIndexGranularityImpl(
MergeTreeIndexGranularity & index_granularity_, const MergeTreeIndexGranularityInfo & index_granularity_info_,
size_t columns_count, const DataPartStoragePtr & data_part_storage_)
size_t columns_count, const IDataPartStorage & data_part_storage_)
{
if (!index_granularity_info_.mark_type.adaptive)
throw Exception("MergeTreeDataPartCompact cannot be created with non-adaptive granulary.", ErrorCodes::NOT_IMPLEMENTED);
auto marks_file_path = index_granularity_info_.getMarksFilePath("data");
if (!data_part_storage_->exists(marks_file_path))
if (!data_part_storage_.exists(marks_file_path))
throw Exception(
ErrorCodes::NO_FILE_IN_DATA_PART,
"Marks file '{}' doesn't exist",
std::string(fs::path(data_part_storage_->getFullPath()) / marks_file_path));
std::string(fs::path(data_part_storage_.getFullPath()) / marks_file_path));
size_t marks_file_size = data_part_storage_->getFileSize(marks_file_path);
size_t marks_file_size = data_part_storage_.getFileSize(marks_file_path);
std::unique_ptr<ReadBufferFromFileBase> buffer = data_part_storage_->readFile(
std::unique_ptr<ReadBufferFromFileBase> buffer = data_part_storage_.readFile(
marks_file_path, ReadSettings().adjustBufferSize(marks_file_size), marks_file_size, std::nullopt);
std::unique_ptr<ReadBuffer> marks_reader;
@ -140,7 +139,7 @@ void MergeTreeDataPartCompact::loadIndexGranularity()
if (columns.empty())
throw Exception("No columns in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART);
loadIndexGranularityImpl(index_granularity, index_granularity_info, columns.size(), data_part_storage);
loadIndexGranularityImpl(index_granularity, index_granularity_info, columns.size(), getDataPartStorage());
}
bool MergeTreeDataPartCompact::hasColumnFiles(const NameAndTypePair & column) const
@ -171,12 +170,12 @@ void MergeTreeDataPartCompact::checkConsistency(bool require_part_metadata) cons
throw Exception(
ErrorCodes::NO_FILE_IN_DATA_PART,
"No marks file checksum for column in part {}",
data_part_storage->getFullPath());
getDataPartStorage().getFullPath());
if (!checksums.files.contains(DATA_FILE_NAME_WITH_EXTENSION))
throw Exception(
ErrorCodes::NO_FILE_IN_DATA_PART,
"No data file checksum for in part {}",
data_part_storage->getFullPath());
getDataPartStorage().getFullPath());
}
}
else
@ -184,33 +183,33 @@ void MergeTreeDataPartCompact::checkConsistency(bool require_part_metadata) cons
{
/// count.txt should be present even in non custom-partitioned parts
std::string file_path = "count.txt";
if (!data_part_storage->exists(file_path) || data_part_storage->getFileSize(file_path) == 0)
if (!getDataPartStorage().exists(file_path) || getDataPartStorage().getFileSize(file_path) == 0)
throw Exception(
ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART,
"Part {} is broken: {} is empty",
data_part_storage->getRelativePath(),
std::string(fs::path(data_part_storage->getFullPath()) / file_path));
getDataPartStorage().getRelativePath(),
std::string(fs::path(getDataPartStorage().getFullPath()) / file_path));
}
/// Check that marks are nonempty and have the consistent size with columns number.
if (data_part_storage->exists(mrk_file_name))
if (getDataPartStorage().exists(mrk_file_name))
{
UInt64 file_size = data_part_storage->getFileSize(mrk_file_name);
UInt64 file_size = getDataPartStorage().getFileSize(mrk_file_name);
if (!file_size)
throw Exception(
ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART,
"Part {} is broken: {} is empty.",
data_part_storage->getRelativePath(),
std::string(fs::path(data_part_storage->getFullPath()) / mrk_file_name));
getDataPartStorage().getRelativePath(),
std::string(fs::path(getDataPartStorage().getFullPath()) / mrk_file_name));
UInt64 expected_file_size = index_granularity_info.getMarkSizeInBytes(columns.size()) * index_granularity.getMarksCount();
if (expected_file_size != file_size)
throw Exception(
ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART,
"Part {} is broken: bad size of marks file '{}': {}, must be: {}",
data_part_storage->getRelativePath(),
std::string(fs::path(data_part_storage->getFullPath()) / mrk_file_name),
getDataPartStorage().getRelativePath(),
std::string(fs::path(getDataPartStorage().getFullPath()) / mrk_file_name),
std::to_string(file_size), std::to_string(expected_file_size));
}
}
@ -218,12 +217,12 @@ void MergeTreeDataPartCompact::checkConsistency(bool require_part_metadata) cons
bool MergeTreeDataPartCompact::isStoredOnRemoteDisk() const
{
return data_part_storage->isStoredOnRemoteDisk();
return getDataPartStorage().isStoredOnRemoteDisk();
}
bool MergeTreeDataPartCompact::isStoredOnRemoteDiskWithZeroCopySupport() const
{
return data_part_storage->supportZeroCopyReplication();
return getDataPartStorage().supportZeroCopyReplication();
}
MergeTreeDataPartCompact::~MergeTreeDataPartCompact()

View File

@ -25,13 +25,13 @@ public:
const MergeTreeData & storage_,
const String & name_,
const MergeTreePartInfo & info_,
const DataPartStoragePtr & data_part_storage_,
const MutableDataPartStoragePtr & data_part_storage_,
const IMergeTreeDataPart * parent_part_ = nullptr);
MergeTreeDataPartCompact(
MergeTreeData & storage_,
const String & name_,
const DataPartStoragePtr & data_part_storage_,
const MutableDataPartStoragePtr & data_part_storage_,
const IMergeTreeDataPart * parent_part_ = nullptr);
MergeTreeReaderPtr getReader(
@ -45,13 +45,12 @@ public:
const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override;
MergeTreeWriterPtr getWriter(
DataPartStorageBuilderPtr data_part_storage_builder,
const NamesAndTypesList & columns_list,
const StorageMetadataPtr & metadata_snapshot,
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
const CompressionCodecPtr & default_codec_,
const MergeTreeWriterSettings & writer_settings,
const MergeTreeIndexGranularity & computed_index_granularity) const override;
const MergeTreeIndexGranularity & computed_index_granularity) override;
bool isStoredOnDisk() const override { return true; }
@ -68,7 +67,7 @@ public:
protected:
static void loadIndexGranularityImpl(
MergeTreeIndexGranularity & index_granularity_, const MergeTreeIndexGranularityInfo & index_granularity_info_,
size_t columns_count, const DataPartStoragePtr & data_part_storage_);
size_t columns_count, const IDataPartStorage & data_part_storage_);
private:
void checkConsistency(bool require_part_metadata) const override;

View File

@ -1,10 +1,12 @@
#include "MergeTreeDataPartInMemory.h"
#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
#include <Storages/MergeTree/MergeTreeReaderInMemory.h>
#include <Storages/MergeTree/MergedBlockOutputStream.h>
#include <Storages/MergeTree/MergeTreeDataPartWriterInMemory.h>
#include <Storages/MergeTree/IMergeTreeReader.h>
#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
#include <Storages/MergeTree/DataPartStorageOnDisk.h>
#include <DataTypes/NestedUtils.h>
#include <Disks/createVolume.h>
#include <Interpreters/Context.h>
#include <Poco/Logger.h>
#include <Common/logger_useful.h>
@ -21,7 +23,7 @@ namespace ErrorCodes
MergeTreeDataPartInMemory::MergeTreeDataPartInMemory(
MergeTreeData & storage_,
const String & name_,
const DataPartStoragePtr & data_part_storage_,
const MutableDataPartStoragePtr & data_part_storage_,
const IMergeTreeDataPart * parent_part_)
: IMergeTreeDataPart(storage_, name_, data_part_storage_, Type::InMemory, parent_part_)
{
@ -32,7 +34,7 @@ MergeTreeDataPartInMemory::MergeTreeDataPartInMemory(
const MergeTreeData & storage_,
const String & name_,
const MergeTreePartInfo & info_,
const DataPartStoragePtr & data_part_storage_,
const MutableDataPartStoragePtr & data_part_storage_,
const IMergeTreeDataPart * parent_part_)
: IMergeTreeDataPart(storage_, name_, info_, data_part_storage_, Type::InMemory, parent_part_)
{
@ -56,27 +58,33 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader(
}
IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter(
DataPartStorageBuilderPtr data_part_storage_builder_,
const NamesAndTypesList & columns_list,
const StorageMetadataPtr & metadata_snapshot,
const std::vector<MergeTreeIndexPtr> & /* indices_to_recalc */,
const CompressionCodecPtr & /* default_codec */,
const MergeTreeWriterSettings & writer_settings,
const MergeTreeIndexGranularity & /* computed_index_granularity */) const
const MergeTreeIndexGranularity & /* computed_index_granularity */)
{
data_part_storage_builder = data_part_storage_builder_;
auto ptr = std::static_pointer_cast<const MergeTreeDataPartInMemory>(shared_from_this());
auto ptr = std::static_pointer_cast<MergeTreeDataPartInMemory>(shared_from_this());
return std::make_unique<MergeTreeDataPartWriterInMemory>(
ptr, columns_list, metadata_snapshot, writer_settings);
}
DataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const
MutableDataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const
{
auto current_full_path = data_part_storage_builder->getFullPath();
data_part_storage_builder->setRelativePath(new_relative_path);
auto reservation = storage.reserveSpace(block.bytes(), getDataPartStorage());
VolumePtr volume = storage.getStoragePolicy()->getVolume(0);
VolumePtr data_part_volume = createVolumeFromReservation(reservation, volume);
auto new_data_part_storage = std::make_shared<DataPartStorageOnDisk>(
data_part_volume,
storage.getRelativeDataPath(),
new_relative_path);
new_data_part_storage->beginTransaction();
auto current_full_path = getDataPartStorage().getFullPath();
auto new_type = storage.choosePartTypeOnDisk(block.bytes(), rows_count);
auto new_data_part_storage = data_part_storage_builder->getStorage();
auto new_data_part = storage.createPart(name, new_type, info, new_data_part_storage);
new_data_part->uuid = uuid;
@ -84,50 +92,50 @@ DataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & new_rel
new_data_part->partition.value = partition.value;
new_data_part->minmax_idx = minmax_idx;
if (data_part_storage_builder->exists())
if (new_data_part_storage->exists())
{
throw Exception(
ErrorCodes::DIRECTORY_ALREADY_EXISTS,
"Could not flush part {}. Part in {} already exists",
quoteString(current_full_path),
data_part_storage_builder->getFullPath());
new_data_part_storage->getFullPath());
}
data_part_storage_builder->createDirectories();
new_data_part_storage->createDirectories();
auto compression_codec = storage.getContext()->chooseCompressionCodec(0, 0);
auto indices = MergeTreeIndexFactory::instance().getMany(metadata_snapshot->getSecondaryIndices());
MergedBlockOutputStream out(new_data_part, data_part_storage_builder, metadata_snapshot, columns, indices, compression_codec, NO_TRANSACTION_PTR);
MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, indices, compression_codec, NO_TRANSACTION_PTR);
out.write(block);
const auto & projections = metadata_snapshot->getProjections();
for (const auto & [projection_name, projection] : projection_parts)
{
if (projections.has(projection_name))
{
auto projection_part_storage_builder = data_part_storage_builder->getProjection(projection_name + ".proj");
if (projection_part_storage_builder->exists())
auto projection_part_storage = new_data_part_storage->getProjection(projection_name + ".proj");
if (projection_part_storage->exists())
{
throw Exception(
ErrorCodes::DIRECTORY_ALREADY_EXISTS,
"Could not flush projection part {}. Projection part in {} already exists",
projection_name,
projection_part_storage_builder->getFullPath());
projection_part_storage->getFullPath());
}
auto projection_part = asInMemoryPart(projection);
auto projection_type = storage.choosePartTypeOnDisk(projection_part->block.bytes(), rows_count);
MergeTreePartInfo projection_info("all", 0, 0, 0);
auto projection_data_part
= storage.createPart(projection_name, projection_type, projection_info, projection_part_storage_builder->getStorage(), parent_part);
= storage.createPart(projection_name, projection_type, projection_info, projection_part_storage, parent_part);
projection_data_part->is_temp = false; // clean up will be done on parent part
projection_data_part->setColumns(projection->getColumns(), {});
projection_part_storage_builder->createDirectories();
projection_part_storage->createDirectories();
const auto & desc = projections.get(name);
auto projection_compression_codec = storage.getContext()->chooseCompressionCodec(0, 0);
auto projection_indices = MergeTreeIndexFactory::instance().getMany(desc.metadata->getSecondaryIndices());
MergedBlockOutputStream projection_out(
projection_data_part, projection_part_storage_builder, desc.metadata, projection_part->columns, projection_indices,
projection_data_part, desc.metadata, projection_part->columns, projection_indices,
projection_compression_codec, NO_TRANSACTION_PTR);
projection_out.write(projection_part->block);
@ -137,6 +145,7 @@ DataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & new_rel
}
out.finalizePart(new_data_part, false);
new_data_part_storage->commitTransaction();
return new_data_part_storage;
}
@ -146,12 +155,9 @@ void MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, const
flushToDisk(detached_path, metadata_snapshot);
}
void MergeTreeDataPartInMemory::renameTo(const String & new_relative_path, bool /* remove_new_dir_if_exists */, DataPartStorageBuilderPtr) const
void MergeTreeDataPartInMemory::renameTo(const String & new_relative_path, bool /* remove_new_dir_if_exists */)
{
data_part_storage->setRelativePath(new_relative_path);
if (data_part_storage_builder)
data_part_storage_builder->setRelativePath(new_relative_path);
getDataPartStorage().setRelativePath(new_relative_path);
}
void MergeTreeDataPartInMemory::calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const

View File

@ -14,13 +14,13 @@ public:
const MergeTreeData & storage_,
const String & name_,
const MergeTreePartInfo & info_,
const DataPartStoragePtr & data_part_storage_,
const MutableDataPartStoragePtr & data_part_storage_,
const IMergeTreeDataPart * parent_part_ = nullptr);
MergeTreeDataPartInMemory(
MergeTreeData & storage_,
const String & name_,
const DataPartStoragePtr & data_part_storage_,
const MutableDataPartStoragePtr & data_part_storage_,
const IMergeTreeDataPart * parent_part_ = nullptr);
MergeTreeReaderPtr getReader(
@ -34,29 +34,27 @@ public:
const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override;
MergeTreeWriterPtr getWriter(
DataPartStorageBuilderPtr data_part_storage_builder_,
const NamesAndTypesList & columns_list,
const StorageMetadataPtr & metadata_snapshot,
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
const CompressionCodecPtr & default_codec_,
const MergeTreeWriterSettings & writer_settings,
const MergeTreeIndexGranularity & computed_index_granularity) const override;
const MergeTreeIndexGranularity & computed_index_granularity) override;
bool isStoredOnDisk() const override { return false; }
bool isStoredOnRemoteDisk() const override { return false; }
bool isStoredOnRemoteDiskWithZeroCopySupport() const override { return false; }
bool hasColumnFiles(const NameAndTypePair & column) const override { return !!getColumnPosition(column.getNameInStorage()); }
String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; }
void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists, DataPartStorageBuilderPtr) const override;
void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) override;
void makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const override;
DataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const;
MutableDataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const;
/// Returns hash of parts's block
Checksum calculateBlockChecksum() const;
mutable Block block;
mutable DataPartStorageBuilderPtr data_part_storage_builder;
private:
mutable std::condition_variable is_merged;
@ -66,6 +64,8 @@ private:
};
using DataPartInMemoryPtr = std::shared_ptr<const MergeTreeDataPartInMemory>;
using MutableDataPartInMemoryPtr = std::shared_ptr<MergeTreeDataPartInMemory>;
DataPartInMemoryPtr asInMemoryPart(const MergeTreeDataPartPtr & part);
}

View File

@ -21,7 +21,7 @@ namespace ErrorCodes
MergeTreeDataPartWide::MergeTreeDataPartWide(
MergeTreeData & storage_,
const String & name_,
const DataPartStoragePtr & data_part_storage_,
const MutableDataPartStoragePtr & data_part_storage_,
const IMergeTreeDataPart * parent_part_)
: IMergeTreeDataPart(storage_, name_, data_part_storage_, Type::Wide, parent_part_)
{
@ -31,7 +31,7 @@ MergeTreeDataPartWide::MergeTreeDataPartWide(
const MergeTreeData & storage_,
const String & name_,
const MergeTreePartInfo & info_,
const DataPartStoragePtr & data_part_storage_,
const MutableDataPartStoragePtr & data_part_storage_,
const IMergeTreeDataPart * parent_part_)
: IMergeTreeDataPart(storage_, name_, info_, data_part_storage_, Type::Wide, parent_part_)
{
@ -56,17 +56,16 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader(
}
IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartWide::getWriter(
DataPartStorageBuilderPtr data_part_storage_builder,
const NamesAndTypesList & columns_list,
const StorageMetadataPtr & metadata_snapshot,
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
const CompressionCodecPtr & default_codec_,
const MergeTreeWriterSettings & writer_settings,
const MergeTreeIndexGranularity & computed_index_granularity) const
const MergeTreeIndexGranularity & computed_index_granularity)
{
return std::make_unique<MergeTreeDataPartWriterWide>(
shared_from_this(), data_part_storage_builder,
columns_list, metadata_snapshot, indices_to_recalc,
shared_from_this(), columns_list,
metadata_snapshot, indices_to_recalc,
getMarksFileExtension(),
default_codec_, writer_settings, computed_index_granularity);
}
@ -105,18 +104,18 @@ ColumnSize MergeTreeDataPartWide::getColumnSizeImpl(
void MergeTreeDataPartWide::loadIndexGranularityImpl(
MergeTreeIndexGranularity & index_granularity_, MergeTreeIndexGranularityInfo & index_granularity_info_,
const DataPartStoragePtr & data_part_storage_, const std::string & any_column_file_name)
const IDataPartStorage & data_part_storage_, const std::string & any_column_file_name)
{
index_granularity_info_.changeGranularityIfRequired(data_part_storage_);
/// We can use any column, it doesn't matter
std::string marks_file_path = index_granularity_info_.getMarksFilePath(any_column_file_name);
if (!data_part_storage_->exists(marks_file_path))
if (!data_part_storage_.exists(marks_file_path))
throw Exception(
ErrorCodes::NO_FILE_IN_DATA_PART, "Marks file '{}' doesn't exist",
std::string(fs::path(data_part_storage_->getFullPath()) / marks_file_path));
std::string(fs::path(data_part_storage_.getFullPath()) / marks_file_path));
size_t marks_file_size = data_part_storage_->getFileSize(marks_file_path);
size_t marks_file_size = data_part_storage_.getFileSize(marks_file_path);
if (!index_granularity_info_.mark_type.adaptive && !index_granularity_info_.mark_type.compressed)
{
@ -126,7 +125,7 @@ void MergeTreeDataPartWide::loadIndexGranularityImpl(
}
else
{
auto marks_file = data_part_storage_->readFile(marks_file_path, ReadSettings().adjustBufferSize(marks_file_size), marks_file_size, std::nullopt);
auto marks_file = data_part_storage_.readFile(marks_file_path, ReadSettings().adjustBufferSize(marks_file_size), marks_file_size, std::nullopt);
std::unique_ptr<ReadBuffer> marks_reader;
if (!index_granularity_info_.mark_type.compressed)
@ -163,18 +162,18 @@ void MergeTreeDataPartWide::loadIndexGranularity()
if (columns.empty())
throw Exception("No columns in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART);
loadIndexGranularityImpl(index_granularity, index_granularity_info, data_part_storage, getFileNameForColumn(columns.front()));
loadIndexGranularityImpl(index_granularity, index_granularity_info, getDataPartStorage(), getFileNameForColumn(columns.front()));
}
bool MergeTreeDataPartWide::isStoredOnRemoteDisk() const
{
return data_part_storage->isStoredOnRemoteDisk();
return getDataPartStorage().isStoredOnRemoteDisk();
}
bool MergeTreeDataPartWide::isStoredOnRemoteDiskWithZeroCopySupport() const
{
return data_part_storage->supportZeroCopyReplication();
return getDataPartStorage().supportZeroCopyReplication();
}
MergeTreeDataPartWide::~MergeTreeDataPartWide()
@ -203,13 +202,13 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const
throw Exception(
ErrorCodes::NO_FILE_IN_DATA_PART,
"No {} file checksum for column {} in part {} ",
mrk_file_name, name_type.name, data_part_storage->getFullPath());
mrk_file_name, name_type.name, getDataPartStorage().getFullPath());
if (!checksums.files.contains(bin_file_name))
throw Exception(
ErrorCodes::NO_FILE_IN_DATA_PART,
"No {} file checksum for column {} in part ",
bin_file_name, name_type.name, data_part_storage->getFullPath());
bin_file_name, name_type.name, getDataPartStorage().getFullPath());
});
}
}
@ -225,23 +224,23 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const
auto file_path = ISerialization::getFileNameForStream(name_type, substream_path) + marks_file_extension;
/// Missing file is Ok for case when new column was added.
if (data_part_storage->exists(file_path))
if (getDataPartStorage().exists(file_path))
{
UInt64 file_size = data_part_storage->getFileSize(file_path);
UInt64 file_size = getDataPartStorage().getFileSize(file_path);
if (!file_size)
throw Exception(
ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART,
"Part {} is broken: {} is empty.",
data_part_storage->getFullPath(),
std::string(fs::path(data_part_storage->getFullPath()) / file_path));
getDataPartStorage().getFullPath(),
std::string(fs::path(getDataPartStorage().getFullPath()) / file_path));
if (!marks_size)
marks_size = file_size;
else if (file_size != *marks_size)
throw Exception(
ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART,
"Part {} is broken: marks have different sizes.", data_part_storage->getFullPath());
"Part {} is broken: marks have different sizes.", getDataPartStorage().getFullPath());
}
});
}

View File

@ -1,5 +1,6 @@
#pragma once
#include "Storages/MergeTree/IDataPartStorage.h"
#include <Storages/MergeTree/IMergeTreeDataPart.h>
namespace DB
@ -19,13 +20,13 @@ public:
const MergeTreeData & storage_,
const String & name_,
const MergeTreePartInfo & info_,
const DataPartStoragePtr & data_part_storage_,
const MutableDataPartStoragePtr & data_part_storage_,
const IMergeTreeDataPart * parent_part_ = nullptr);
MergeTreeDataPartWide(
MergeTreeData & storage_,
const String & name_,
const DataPartStoragePtr & data_part_storage_,
const MutableDataPartStoragePtr & data_part_storage_,
const IMergeTreeDataPart * parent_part_ = nullptr);
MergeTreeReaderPtr getReader(
@ -39,13 +40,12 @@ public:
const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override;
MergeTreeWriterPtr getWriter(
DataPartStorageBuilderPtr data_part_storage_builder,
const NamesAndTypesList & columns_list,
const StorageMetadataPtr & metadata_snapshot,
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
const CompressionCodecPtr & default_codec_,
const MergeTreeWriterSettings & writer_settings,
const MergeTreeIndexGranularity & computed_index_granularity) const override;
const MergeTreeIndexGranularity & computed_index_granularity) override;
bool isStoredOnDisk() const override { return true; }
@ -64,7 +64,7 @@ public:
protected:
static void loadIndexGranularityImpl(
MergeTreeIndexGranularity & index_granularity_, MergeTreeIndexGranularityInfo & index_granularity_info_,
const DataPartStoragePtr & data_part_storage_, const std::string & any_column_file_name);
const IDataPartStorage & data_part_storage_, const std::string & any_column_file_name);
private:
void checkConsistency(bool require_part_metadata) const override;

View File

@ -10,8 +10,7 @@ namespace ErrorCodes
}
MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
const MergeTreeData::DataPartPtr & data_part_,
DataPartStorageBuilderPtr data_part_storage_builder_,
const MergeTreeMutableDataPartPtr & data_part_,
const NamesAndTypesList & columns_list_,
const StorageMetadataPtr & metadata_snapshot_,
const std::vector<MergeTreeIndexPtr> & indices_to_recalc_,
@ -19,16 +18,16 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
const CompressionCodecPtr & default_codec_,
const MergeTreeWriterSettings & settings_,
const MergeTreeIndexGranularity & index_granularity_)
: MergeTreeDataPartWriterOnDisk(data_part_, std::move(data_part_storage_builder_), columns_list_, metadata_snapshot_,
: MergeTreeDataPartWriterOnDisk(data_part_, columns_list_, metadata_snapshot_,
indices_to_recalc_, marks_file_extension_,
default_codec_, settings_, index_granularity_)
, plain_file(data_part_storage_builder->writeFile(
, plain_file(data_part_->getDataPartStorage().writeFile(
MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION,
settings.max_compress_block_size,
settings_.query_write_settings))
, plain_hashing(*plain_file)
{
marks_file = data_part_storage_builder->writeFile(
marks_file = data_part_->getDataPartStorage().writeFile(
MergeTreeDataPartCompact::DATA_FILE_NAME + marks_file_extension_,
4096,
settings_.query_write_settings);

View File

@ -11,8 +11,7 @@ class MergeTreeDataPartWriterCompact : public MergeTreeDataPartWriterOnDisk
{
public:
MergeTreeDataPartWriterCompact(
const MergeTreeData::DataPartPtr & data_part,
DataPartStorageBuilderPtr data_part_storage_builder_,
const MergeTreeMutableDataPartPtr & data_part,
const NamesAndTypesList & columns_list,
const StorageMetadataPtr & metadata_snapshot_,
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,

View File

@ -11,11 +11,11 @@ namespace ErrorCodes
}
MergeTreeDataPartWriterInMemory::MergeTreeDataPartWriterInMemory(
const DataPartInMemoryPtr & part_,
const MutableDataPartInMemoryPtr & part_,
const NamesAndTypesList & columns_list_,
const StorageMetadataPtr & metadata_snapshot_,
const MergeTreeWriterSettings & settings_)
: IMergeTreeDataPartWriter(part_, nullptr, columns_list_, metadata_snapshot_, settings_)
: IMergeTreeDataPartWriter(part_, columns_list_, metadata_snapshot_, settings_)
, part_in_memory(part_) {}
void MergeTreeDataPartWriterInMemory::write(

View File

@ -10,7 +10,7 @@ class MergeTreeDataPartWriterInMemory : public IMergeTreeDataPartWriter
{
public:
MergeTreeDataPartWriterInMemory(
const DataPartInMemoryPtr & part_,
const MutableDataPartInMemoryPtr & part_,
const NamesAndTypesList & columns_list_,
const StorageMetadataPtr & metadata_snapshot,
const MergeTreeWriterSettings & settings_);
@ -24,7 +24,7 @@ public:
private:
void calculateAndSerializePrimaryIndex(const Block & primary_index_block);
DataPartInMemoryPtr part_in_memory;
MutableDataPartInMemoryPtr part_in_memory;
};
}

View File

@ -48,7 +48,7 @@ void MergeTreeDataPartWriterOnDisk::Stream::sync() const
MergeTreeDataPartWriterOnDisk::Stream::Stream(
const String & escaped_column_name_,
const DataPartStorageBuilderPtr & data_part_storage_builder,
const MutableDataPartStoragePtr & data_part_storage,
const String & data_path_,
const std::string & data_file_extension_,
const std::string & marks_path_,
@ -61,11 +61,11 @@ MergeTreeDataPartWriterOnDisk::Stream::Stream(
escaped_column_name(escaped_column_name_),
data_file_extension{data_file_extension_},
marks_file_extension{marks_file_extension_},
plain_file(data_part_storage_builder->writeFile(data_path_ + data_file_extension, max_compress_block_size_, query_write_settings)),
plain_file(data_part_storage->writeFile(data_path_ + data_file_extension, max_compress_block_size_, query_write_settings)),
plain_hashing(*plain_file),
compressor(plain_hashing, compression_codec_, max_compress_block_size_),
compressed_hashing(compressor),
marks_file(data_part_storage_builder->writeFile(marks_path_ + marks_file_extension, 4096, query_write_settings)),
marks_file(data_part_storage->writeFile(marks_path_ + marks_file_extension, 4096, query_write_settings)),
marks_hashing(*marks_file),
marks_compressor(marks_hashing, marks_compression_codec_, marks_compress_block_size_),
marks_compressed_hashing(marks_compressor),
@ -96,8 +96,7 @@ void MergeTreeDataPartWriterOnDisk::Stream::addToChecksums(MergeTreeData::DataPa
MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk(
const MergeTreeData::DataPartPtr & data_part_,
DataPartStorageBuilderPtr data_part_storage_builder_,
const MergeTreeMutableDataPartPtr & data_part_,
const NamesAndTypesList & columns_list_,
const StorageMetadataPtr & metadata_snapshot_,
const MergeTreeIndices & indices_to_recalc_,
@ -105,8 +104,7 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk(
const CompressionCodecPtr & default_codec_,
const MergeTreeWriterSettings & settings_,
const MergeTreeIndexGranularity & index_granularity_)
: IMergeTreeDataPartWriter(data_part_, std::move(data_part_storage_builder_),
columns_list_, metadata_snapshot_, settings_, index_granularity_)
: IMergeTreeDataPartWriter(data_part_, columns_list_, metadata_snapshot_, settings_, index_granularity_)
, skip_indices(indices_to_recalc_)
, marks_file_extension(marks_file_extension_)
, default_codec(default_codec_)
@ -116,8 +114,8 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk(
if (settings.blocks_are_granules_size && !index_granularity.empty())
throw Exception("Can't take information about index granularity from blocks, when non empty index_granularity array specified", ErrorCodes::LOGICAL_ERROR);
if (!data_part_storage_builder->exists())
data_part_storage_builder->createDirectories();
if (!data_part->getDataPartStorage().exists())
data_part->getDataPartStorage().createDirectories();
if (settings.rewrite_primary_key)
initPrimaryIndex();
@ -178,7 +176,7 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex()
if (metadata_snapshot->hasPrimaryKey())
{
String index_name = "primary" + getIndexExtension(compress_primary_key);
index_file_stream = data_part_storage_builder->writeFile(index_name, DBMS_DEFAULT_BUFFER_SIZE, settings.query_write_settings);
index_file_stream = data_part->getDataPartStorage().writeFile(index_name, DBMS_DEFAULT_BUFFER_SIZE, settings.query_write_settings);
index_file_hashing_stream = std::make_unique<HashingWriteBuffer>(*index_file_stream);
if (compress_primary_key)
@ -204,7 +202,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices()
skip_indices_streams.emplace_back(
std::make_unique<MergeTreeDataPartWriterOnDisk::Stream>(
stream_name,
data_part_storage_builder,
data_part->getDataPartStoragePtr(),
stream_name, index_helper->getSerializedFileExtension(),
stream_name, marks_file_extension,
default_codec, settings.max_compress_block_size,

View File

@ -50,7 +50,7 @@ public:
{
Stream(
const String & escaped_column_name_,
const DataPartStorageBuilderPtr & data_part_storage_builder,
const MutableDataPartStoragePtr & data_part_storage,
const String & data_path_,
const std::string & data_file_extension_,
const std::string & marks_path_,
@ -92,8 +92,7 @@ public:
using StreamPtr = std::unique_ptr<Stream>;
MergeTreeDataPartWriterOnDisk(
const MergeTreeData::DataPartPtr & data_part_,
DataPartStorageBuilderPtr data_part_storage_builder_,
const MergeTreeMutableDataPartPtr & data_part_,
const NamesAndTypesList & columns_list,
const StorageMetadataPtr & metadata_snapshot_,
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,

View File

@ -71,8 +71,7 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity,
}
MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide(
const MergeTreeData::DataPartPtr & data_part_,
DataPartStorageBuilderPtr data_part_storage_builder_,
const MergeTreeMutableDataPartPtr & data_part_,
const NamesAndTypesList & columns_list_,
const StorageMetadataPtr & metadata_snapshot_,
const std::vector<MergeTreeIndexPtr> & indices_to_recalc_,
@ -80,7 +79,7 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide(
const CompressionCodecPtr & default_codec_,
const MergeTreeWriterSettings & settings_,
const MergeTreeIndexGranularity & index_granularity_)
: MergeTreeDataPartWriterOnDisk(data_part_, std::move(data_part_storage_builder_), columns_list_, metadata_snapshot_,
: MergeTreeDataPartWriterOnDisk(data_part_, columns_list_, metadata_snapshot_,
indices_to_recalc_, marks_file_extension_,
default_codec_, settings_, index_granularity_)
{
@ -117,7 +116,7 @@ void MergeTreeDataPartWriterWide::addStreams(
column_streams[stream_name] = std::make_unique<Stream>(
stream_name,
data_part_storage_builder,
data_part->getDataPartStoragePtr(),
stream_name, DATA_FILE_EXTENSION,
stream_name, marks_file_extension,
compression_codec,
@ -421,20 +420,18 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai
String mrk_path = escaped_name + marks_file_extension;
String bin_path = escaped_name + DATA_FILE_EXTENSION;
auto data_part_storage = data_part_storage_builder->getStorage();
/// Some columns may be removed because of ttl. Skip them.
if (!data_part_storage->exists(mrk_path))
if (!data_part->getDataPartStorage().exists(mrk_path))
return;
auto mrk_file_in = data_part_storage->readFile(mrk_path, {}, std::nullopt, std::nullopt);
auto mrk_file_in = data_part->getDataPartStorage().readFile(mrk_path, {}, std::nullopt, std::nullopt);
std::unique_ptr<ReadBuffer> mrk_in;
if (data_part->index_granularity_info.mark_type.compressed)
mrk_in = std::make_unique<CompressedReadBufferFromFile>(std::move(mrk_file_in));
else
mrk_in = std::move(mrk_file_in);
DB::CompressedReadBufferFromFile bin_in(data_part_storage->readFile(bin_path, {}, std::nullopt, std::nullopt));
DB::CompressedReadBufferFromFile bin_in(data_part->getDataPartStorage().readFile(bin_path, {}, std::nullopt, std::nullopt));
bool must_be_last = false;
UInt64 offset_in_compressed_file = 0;
UInt64 offset_in_decompressed_block = 0;
@ -485,7 +482,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai
if (index_granularity_rows != index_granularity.getMarkRows(mark_num))
throw Exception(
ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for part {} for mark #{} (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}",
data_part_storage_builder->getFullPath(), mark_num, offset_in_compressed_file, offset_in_decompressed_block, index_granularity.getMarkRows(mark_num), index_granularity_rows, index_granularity.getMarksCount());
data_part->getDataPartStorage().getFullPath(), mark_num, offset_in_compressed_file, offset_in_decompressed_block, index_granularity.getMarkRows(mark_num), index_granularity_rows, index_granularity.getMarksCount());
auto column = type->createColumn();

View File

@ -18,8 +18,7 @@ class MergeTreeDataPartWriterWide : public MergeTreeDataPartWriterOnDisk
{
public:
MergeTreeDataPartWriterWide(
const MergeTreeData::DataPartPtr & data_part,
DataPartStorageBuilderPtr data_part_storage_builder_,
const MergeTreeMutableDataPartPtr & data_part,
const NamesAndTypesList & columns_list,
const StorageMetadataPtr & metadata_snapshot,
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,

Some files were not shown because too many files have changed in this diff Show More