Merge branch 'master' into Fix_flaky_test_ssl_cert_authentication

This commit is contained in:
SmitaRKulkarni 2023-05-18 09:27:32 +02:00 committed by GitHub
commit b7c9964710
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
87 changed files with 2154 additions and 933 deletions

View File

@ -13,8 +13,8 @@ The PostgreSQL engine allows to perform `SELECT` and `INSERT` queries on data th
``` sql ``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
( (
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], name1 type1 [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2], name2 type2 [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
... ...
) ENGINE = PostgreSQL('host:port', 'database', 'table', 'user', 'password'[, `schema`]); ) ENGINE = PostgreSQL('host:port', 'database', 'table', 'user', 'password'[, `schema`]);
``` ```

View File

@ -0,0 +1,636 @@
---
slug: /en/getting-started/example-datasets/reddit-comments
sidebar_label: Reddit comments
---
# Reddit comments dataset
This dataset contains publicly-available comments on Reddit that go back to December, 2005, to March, 2023, and contains over 7B rows of data. The raw data is in JSON format in compressed `.zst` files and the rows look like the following:
```json
{"controversiality":0,"body":"A look at Vietnam and Mexico exposes the myth of market liberalisation.","subreddit_id":"t5_6","link_id":"t3_17863","stickied":false,"subreddit":"reddit.com","score":2,"ups":2,"author_flair_css_class":null,"created_utc":1134365188,"author_flair_text":null,"author":"frjo","id":"c13","edited":false,"parent_id":"t3_17863","gilded":0,"distinguished":null,"retrieved_on":1473738411}
{"created_utc":1134365725,"author_flair_css_class":null,"score":1,"ups":1,"subreddit":"reddit.com","stickied":false,"link_id":"t3_17866","subreddit_id":"t5_6","controversiality":0,"body":"The site states \"What can I use it for? Meeting notes, Reports, technical specs Sign-up sheets, proposals and much more...\", just like any other new breeed of sites that want us to store everything we have on the web. And they even guarantee multiple levels of security and encryption etc. But what prevents these web site operators fom accessing and/or stealing Meeting notes, Reports, technical specs Sign-up sheets, proposals and much more, for competitive or personal gains...? I am pretty sure that most of them are honest, but what's there to prevent me from setting up a good useful site and stealing all your data? Call me paranoid - I am.","retrieved_on":1473738411,"distinguished":null,"gilded":0,"id":"c14","edited":false,"parent_id":"t3_17866","author":"zse7zse","author_flair_text":null}
{"gilded":0,"distinguished":null,"retrieved_on":1473738411,"author":"[deleted]","author_flair_text":null,"edited":false,"id":"c15","parent_id":"t3_17869","subreddit":"reddit.com","score":0,"ups":0,"created_utc":1134366848,"author_flair_css_class":null,"body":"Jython related topics by Frank Wierzbicki","controversiality":0,"subreddit_id":"t5_6","stickied":false,"link_id":"t3_17869"}
{"gilded":0,"retrieved_on":1473738411,"distinguished":null,"author_flair_text":null,"author":"[deleted]","edited":false,"parent_id":"t3_17870","id":"c16","subreddit":"reddit.com","created_utc":1134367660,"author_flair_css_class":null,"score":1,"ups":1,"body":"[deleted]","controversiality":0,"stickied":false,"link_id":"t3_17870","subreddit_id":"t5_6"}
{"gilded":0,"retrieved_on":1473738411,"distinguished":null,"author_flair_text":null,"author":"rjoseph","edited":false,"id":"c17","parent_id":"t3_17817","subreddit":"reddit.com","author_flair_css_class":null,"created_utc":1134367754,"score":1,"ups":1,"body":"Saft is by far the best extension you could tak onto your Safari","controversiality":0,"link_id":"t3_17817","stickied":false,"subreddit_id":"t5_6"}
```
A shoutout to Percona for the [motivation behind ingesting this dataset](https://www.percona.com/blog/big-data-set-reddit-comments-analyzing-clickhouse/), which we have downloaded and stored in an S3 bucket.
:::note
The following commands were executed on ClickHouse Cloud. To run this on your own cluster, replace `default` in the `s3Cluster` function call with the name of your cluster. If you do not have a cluster, then replace the `s3Cluster` function with the `s3` function.
:::
1. Let's create a table for the Reddit data:
```sql
CREATE TABLE reddit
(
subreddit LowCardinality(String),
subreddit_id LowCardinality(String),
subreddit_type Enum('public' = 1, 'restricted' = 2, 'user' = 3, 'archived' = 4, 'gold_restricted' = 5, 'private' = 6),
author LowCardinality(String),
body String CODEC(ZSTD(6)),
created_date Date DEFAULT toDate(created_utc),
created_utc DateTime,
retrieved_on DateTime,
id String,
parent_id String,
link_id String,
score Int32,
total_awards_received UInt16,
controversiality UInt8,
gilded UInt8,
collapsed_because_crowd_control UInt8,
collapsed_reason Enum('' = 0, 'comment score below threshold' = 1, 'may be sensitive content' = 2, 'potentially toxic' = 3, 'potentially toxic content' = 4),
distinguished Enum('' = 0, 'moderator' = 1, 'admin' = 2, 'special' = 3),
removal_reason Enum('' = 0, 'legal' = 1),
author_created_utc DateTime,
author_fullname LowCardinality(String),
author_patreon_flair UInt8,
author_premium UInt8,
can_gild UInt8,
can_mod_post UInt8,
collapsed UInt8,
is_submitter UInt8,
_edited String,
locked UInt8,
quarantined UInt8,
no_follow UInt8,
send_replies UInt8,
stickied UInt8,
author_flair_text LowCardinality(String)
)
ENGINE = MergeTree
ORDER BY (subreddit, created_date, author);
```
:::note
The names of the files in S3 start with `RC_YYYY-MM` where `YYYY-MM` goes from `2005-12` to `2023-02`. The compression changes a couple of times though, so the file extensions are not consistent. For example:
- the file names are initially `RC_2005-12.bz2` to `RC_2017-11.bz2`
- then they look like `RC_2017-12.xz` to `RC_2018-09.xz`
- and finally `RC_2018-10.zst` to `RC_2023-02.zst`
:::
2. We are going to start with one month of data, but if you want to simply insert every row - skip ahead to step 8 below. The following file has 86M records from December, 2017:
```sql
INSERT INTO reddit
SELECT *
FROM s3Cluster(
'default',
'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/reddit/original/RC_2017-12.xz',
'JSONEachRow'
);
```
If you do not have a cluster, use `s3` instead of `s3Cluster`:
```sql
INSERT INTO reddit
SELECT *
FROM s3(
'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/reddit/original/RC_2017-12.xz',
'JSONEachRow'
);
```
3. It will take a while depending on your resources, but when it's done verify it worked:
```sql
SELECT formatReadableQuantity(count())
FROM reddit;
```
```response
┌─formatReadableQuantity(count())─┐
│ 85.97 million │
└─────────────────────────────────┘
```
4. Let's see how many unique subreddits were in December of 2017:
```sql
SELECT uniqExact(subreddit)
FROM reddit;
```
```response
┌─uniqExact(subreddit)─┐
│ 91613 │
└──────────────────────┘
1 row in set. Elapsed: 1.572 sec. Processed 85.97 million rows, 367.43 MB (54.71 million rows/s., 233.80 MB/s.)
```
5. This query returns the top 10 subreddits (in terms of number of comments):
```sql
SELECT
subreddit,
count() AS c
FROM reddit
GROUP BY subreddit
ORDER BY c DESC
LIMIT 20;
```
```response
┌─subreddit───────┬───────c─┐
│ AskReddit │ 5245881 │
│ politics │ 1753120 │
│ nfl │ 1220266 │
│ nba │ 960388 │
│ The_Donald │ 931857 │
│ news │ 796617 │
│ worldnews │ 765709 │
│ CFB │ 710360 │
│ gaming │ 602761 │
│ movies │ 601966 │
│ soccer │ 590628 │
│ Bitcoin │ 583783 │
│ pics │ 563408 │
│ StarWars │ 562514 │
│ funny │ 547563 │
│ leagueoflegends │ 517213 │
│ teenagers │ 492020 │
│ DestinyTheGame │ 477377 │
│ todayilearned │ 472650 │
│ videos │ 450581 │
└─────────────────┴─────────┘
20 rows in set. Elapsed: 0.368 sec. Processed 85.97 million rows, 367.43 MB (233.34 million rows/s., 997.25 MB/s.)
```
6. Here are the top 10 authors in December of 2017, in terms of number of comments posted:
```sql
SELECT
author,
count() AS c
FROM reddit
GROUP BY author
ORDER BY c DESC
LIMIT 10;
```
```response
┌─author──────────┬───────c─┐
│ [deleted] │ 5913324 │
│ AutoModerator │ 784886 │
│ ImagesOfNetwork │ 83241 │
│ BitcoinAllBot │ 54484 │
│ imguralbumbot │ 45822 │
│ RPBot │ 29337 │
│ WikiTextBot │ 25982 │
│ Concise_AMA_Bot │ 19974 │
│ MTGCardFetcher │ 19103 │
│ TotesMessenger │ 19057 │
└─────────────────┴─────────┘
10 rows in set. Elapsed: 8.143 sec. Processed 85.97 million rows, 711.05 MB (10.56 million rows/s., 87.32 MB/s.)
```
7. We already inserted some data, but we will start over:
```sql
TRUNCATE TABLE reddit;
```
8. This is a fun dataset and it looks like we can find some great information, so let's go ahead and insert the entire dataset from 2005 to 2023. When you're ready, run this command to insert all the rows. (It takes a while - up to 17 hours!)
```sql
INSERT INTO reddit
SELECT *
FROM s3Cluster(
'default',
'https://clickhouse-public-datasets.s3.amazonaws.com/reddit/original/RC*',
'JSONEachRow'
)
SETTINGS zstd_window_log_max = 31;
```
The response looks like:
```response
0 rows in set. Elapsed: 61187.839 sec. Processed 6.74 billion rows, 2.06 TB (110.17 thousand rows/s., 33.68 MB/s.)
```
8. Let's see how many rows were inserted and how much disk space the table is using:
```sql
SELECT
sum(rows) AS count,
formatReadableQuantity(count),
formatReadableSize(sum(bytes)) AS disk_size,
formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed_size
FROM system.parts
WHERE (table = 'reddit') AND active
```
Notice the compression of disk storage is about 1/3 of the uncompressed size:
```response
┌──────count─┬─formatReadableQuantity(sum(rows))─┬─disk_size──┬─uncompressed_size─┐
│ 6739503568 │ 6.74 billion │ 501.10 GiB │ 1.51 TiB │
└────────────┴───────────────────────────────────┴────────────┴───────────────────┘
1 row in set. Elapsed: 0.010 sec.
```
9. The following query shows how many comments, authors and subreddits we have for each month:
```sql
SELECT
toStartOfMonth(created_utc) AS firstOfMonth,
count() AS c,
bar(c, 0, 50000000, 25) AS bar_count,
uniq(author) AS authors,
bar(authors, 0, 5000000, 25) AS bar_authors,
uniq(subreddit) AS subreddits,
bar(subreddits, 0, 100000, 25) AS bar_subreddits
FROM reddit
GROUP BY firstOfMonth
ORDER BY firstOfMonth ASC;
```
This is a substantial query that has to process all 6.74 billion rows, but we still get an impressive response time (about 3 minutes):
```response
┌─firstOfMonth─┬─────────c─┬─bar_count─────────────────┬─authors─┬─bar_authors───────────────┬─subreddits─┬─bar_subreddits────────────┐
│ 2005-12-01 │ 1075 │ │ 394 │ │ 1 │ │
│ 2006-01-01 │ 3666 │ │ 791 │ │ 2 │ │
│ 2006-02-01 │ 9095 │ │ 1464 │ │ 18 │ │
│ 2006-03-01 │ 13859 │ │ 1958 │ │ 15 │ │
│ 2006-04-01 │ 19090 │ │ 2334 │ │ 21 │ │
│ 2006-05-01 │ 26859 │ │ 2698 │ │ 21 │ │
│ 2006-06-01 │ 29163 │ │ 3043 │ │ 19 │ │
│ 2006-07-01 │ 37031 │ │ 3532 │ │ 22 │ │
│ 2006-08-01 │ 50559 │ │ 4750 │ │ 24 │ │
│ 2006-09-01 │ 50675 │ │ 4908 │ │ 21 │ │
│ 2006-10-01 │ 54148 │ │ 5654 │ │ 31 │ │
│ 2006-11-01 │ 62021 │ │ 6490 │ │ 23 │ │
│ 2006-12-01 │ 61018 │ │ 6707 │ │ 24 │ │
│ 2007-01-01 │ 81341 │ │ 7931 │ │ 23 │ │
│ 2007-02-01 │ 95634 │ │ 9020 │ │ 21 │ │
│ 2007-03-01 │ 112444 │ │ 10842 │ │ 23 │ │
│ 2007-04-01 │ 126773 │ │ 10701 │ │ 26 │ │
│ 2007-05-01 │ 170097 │ │ 11365 │ │ 25 │ │
│ 2007-06-01 │ 178800 │ │ 11267 │ │ 22 │ │
│ 2007-07-01 │ 203319 │ │ 12482 │ │ 25 │ │
│ 2007-08-01 │ 225111 │ │ 14124 │ │ 30 │ │
│ 2007-09-01 │ 259497 │ ▏ │ 15416 │ │ 33 │ │
│ 2007-10-01 │ 274170 │ ▏ │ 15302 │ │ 36 │ │
│ 2007-11-01 │ 372983 │ ▏ │ 15134 │ │ 43 │ │
│ 2007-12-01 │ 363390 │ ▏ │ 15915 │ │ 31 │ │
│ 2008-01-01 │ 452990 │ ▏ │ 18857 │ │ 126 │ │
│ 2008-02-01 │ 441768 │ ▏ │ 18266 │ │ 173 │ │
│ 2008-03-01 │ 463728 │ ▏ │ 18947 │ │ 292 │ │
│ 2008-04-01 │ 468317 │ ▏ │ 18590 │ │ 323 │ │
│ 2008-05-01 │ 536380 │ ▎ │ 20861 │ │ 375 │ │
│ 2008-06-01 │ 577684 │ ▎ │ 22557 │ │ 575 │ ▏ │
│ 2008-07-01 │ 592610 │ ▎ │ 23123 │ │ 657 │ ▏ │
│ 2008-08-01 │ 595959 │ ▎ │ 23729 │ │ 707 │ ▏ │
│ 2008-09-01 │ 680892 │ ▎ │ 26374 │ ▏ │ 801 │ ▏ │
│ 2008-10-01 │ 789874 │ ▍ │ 28970 │ ▏ │ 893 │ ▏ │
│ 2008-11-01 │ 792310 │ ▍ │ 30272 │ ▏ │ 1024 │ ▎ │
│ 2008-12-01 │ 850359 │ ▍ │ 34073 │ ▏ │ 1103 │ ▎ │
│ 2009-01-01 │ 1051649 │ ▌ │ 38978 │ ▏ │ 1316 │ ▎ │
│ 2009-02-01 │ 944711 │ ▍ │ 43390 │ ▏ │ 1132 │ ▎ │
│ 2009-03-01 │ 1048643 │ ▌ │ 46516 │ ▏ │ 1203 │ ▎ │
│ 2009-04-01 │ 1094599 │ ▌ │ 48284 │ ▏ │ 1334 │ ▎ │
│ 2009-05-01 │ 1201257 │ ▌ │ 52512 │ ▎ │ 1395 │ ▎ │
│ 2009-06-01 │ 1258750 │ ▋ │ 57728 │ ▎ │ 1473 │ ▎ │
│ 2009-07-01 │ 1470290 │ ▋ │ 60098 │ ▎ │ 1686 │ ▍ │
│ 2009-08-01 │ 1750688 │ ▉ │ 67347 │ ▎ │ 1777 │ ▍ │
│ 2009-09-01 │ 2032276 │ █ │ 78051 │ ▍ │ 1784 │ ▍ │
│ 2009-10-01 │ 2242017 │ █ │ 93409 │ ▍ │ 2071 │ ▌ │
│ 2009-11-01 │ 2207444 │ █ │ 95940 │ ▍ │ 2141 │ ▌ │
│ 2009-12-01 │ 2560510 │ █▎ │ 104239 │ ▌ │ 2141 │ ▌ │
│ 2010-01-01 │ 2884096 │ █▍ │ 114314 │ ▌ │ 2313 │ ▌ │
│ 2010-02-01 │ 2687779 │ █▎ │ 115683 │ ▌ │ 2522 │ ▋ │
│ 2010-03-01 │ 3228254 │ █▌ │ 125775 │ ▋ │ 2890 │ ▋ │
│ 2010-04-01 │ 3209898 │ █▌ │ 128936 │ ▋ │ 3170 │ ▊ │
│ 2010-05-01 │ 3267363 │ █▋ │ 131851 │ ▋ │ 3166 │ ▊ │
│ 2010-06-01 │ 3532867 │ █▊ │ 139522 │ ▋ │ 3301 │ ▊ │
│ 2010-07-01 │ 4032737 │ ██ │ 153451 │ ▊ │ 3662 │ ▉ │
│ 2010-08-01 │ 4247982 │ ██ │ 164071 │ ▊ │ 3653 │ ▉ │
│ 2010-09-01 │ 4704069 │ ██▎ │ 186613 │ ▉ │ 4009 │ █ │
│ 2010-10-01 │ 5032368 │ ██▌ │ 203800 │ █ │ 4154 │ █ │
│ 2010-11-01 │ 5689002 │ ██▊ │ 226134 │ █▏ │ 4383 │ █ │
│ 2010-12-01 │ 5972642 │ ██▉ │ 245824 │ █▏ │ 4692 │ █▏ │
│ 2011-01-01 │ 6603329 │ ███▎ │ 270025 │ █▎ │ 5141 │ █▎ │
│ 2011-02-01 │ 6363114 │ ███▏ │ 277593 │ █▍ │ 5202 │ █▎ │
│ 2011-03-01 │ 7556165 │ ███▊ │ 314748 │ █▌ │ 5445 │ █▎ │
│ 2011-04-01 │ 7571398 │ ███▊ │ 329920 │ █▋ │ 6128 │ █▌ │
│ 2011-05-01 │ 8803949 │ ████▍ │ 365013 │ █▊ │ 6834 │ █▋ │
│ 2011-06-01 │ 9766511 │ ████▉ │ 393945 │ █▉ │ 7519 │ █▉ │
│ 2011-07-01 │ 10557466 │ █████▎ │ 424235 │ ██ │ 8293 │ ██ │
│ 2011-08-01 │ 12316144 │ ██████▏ │ 475326 │ ██▍ │ 9657 │ ██▍ │
│ 2011-09-01 │ 12150412 │ ██████ │ 503142 │ ██▌ │ 10278 │ ██▌ │
│ 2011-10-01 │ 13470278 │ ██████▋ │ 548801 │ ██▋ │ 10922 │ ██▋ │
│ 2011-11-01 │ 13621533 │ ██████▊ │ 574435 │ ██▊ │ 11572 │ ██▉ │
│ 2011-12-01 │ 14509469 │ ███████▎ │ 622849 │ ███ │ 12335 │ ███ │
│ 2012-01-01 │ 16350205 │ ████████▏ │ 696110 │ ███▍ │ 14281 │ ███▌ │
│ 2012-02-01 │ 16015695 │ ████████ │ 722892 │ ███▌ │ 14949 │ ███▋ │
│ 2012-03-01 │ 17881943 │ ████████▉ │ 789664 │ ███▉ │ 15795 │ ███▉ │
│ 2012-04-01 │ 19044534 │ █████████▌ │ 842491 │ ████▏ │ 16440 │ ████ │
│ 2012-05-01 │ 20388260 │ ██████████▏ │ 886176 │ ████▍ │ 16974 │ ████▏ │
│ 2012-06-01 │ 21897913 │ ██████████▉ │ 946798 │ ████▋ │ 17952 │ ████▍ │
│ 2012-07-01 │ 24087517 │ ████████████ │ 1018636 │ █████ │ 19069 │ ████▊ │
│ 2012-08-01 │ 25703326 │ ████████████▊ │ 1094445 │ █████▍ │ 20553 │ █████▏ │
│ 2012-09-01 │ 23419524 │ ███████████▋ │ 1088491 │ █████▍ │ 20831 │ █████▏ │
│ 2012-10-01 │ 24788236 │ ████████████▍ │ 1131885 │ █████▋ │ 21868 │ █████▍ │
│ 2012-11-01 │ 24648302 │ ████████████▎ │ 1167608 │ █████▊ │ 21791 │ █████▍ │
│ 2012-12-01 │ 26080276 │ █████████████ │ 1218402 │ ██████ │ 22622 │ █████▋ │
│ 2013-01-01 │ 30365867 │ ███████████████▏ │ 1341703 │ ██████▋ │ 24696 │ ██████▏ │
│ 2013-02-01 │ 27213960 │ █████████████▌ │ 1304756 │ ██████▌ │ 24514 │ ██████▏ │
│ 2013-03-01 │ 30771274 │ ███████████████▍ │ 1391703 │ ██████▉ │ 25730 │ ██████▍ │
│ 2013-04-01 │ 33259557 │ ████████████████▋ │ 1485971 │ ███████▍ │ 27294 │ ██████▊ │
│ 2013-05-01 │ 33126225 │ ████████████████▌ │ 1506473 │ ███████▌ │ 27299 │ ██████▊ │
│ 2013-06-01 │ 32648247 │ ████████████████▎ │ 1506650 │ ███████▌ │ 27450 │ ██████▊ │
│ 2013-07-01 │ 34922133 │ █████████████████▍ │ 1561771 │ ███████▊ │ 28294 │ ███████ │
│ 2013-08-01 │ 34766579 │ █████████████████▍ │ 1589781 │ ███████▉ │ 28943 │ ███████▏ │
│ 2013-09-01 │ 31990369 │ ███████████████▉ │ 1570342 │ ███████▊ │ 29408 │ ███████▎ │
│ 2013-10-01 │ 35940040 │ █████████████████▉ │ 1683770 │ ████████▍ │ 30273 │ ███████▌ │
│ 2013-11-01 │ 37396497 │ ██████████████████▋ │ 1757467 │ ████████▊ │ 31173 │ ███████▊ │
│ 2013-12-01 │ 39810216 │ ███████████████████▉ │ 1846204 │ █████████▏ │ 32326 │ ████████ │
│ 2014-01-01 │ 42420655 │ █████████████████████▏ │ 1927229 │ █████████▋ │ 35603 │ ████████▉ │
│ 2014-02-01 │ 38703362 │ ███████████████████▎ │ 1874067 │ █████████▎ │ 37007 │ █████████▎ │
│ 2014-03-01 │ 42459956 │ █████████████████████▏ │ 1959888 │ █████████▊ │ 37948 │ █████████▍ │
│ 2014-04-01 │ 42440735 │ █████████████████████▏ │ 1951369 │ █████████▊ │ 38362 │ █████████▌ │
│ 2014-05-01 │ 42514094 │ █████████████████████▎ │ 1970197 │ █████████▊ │ 39078 │ █████████▊ │
│ 2014-06-01 │ 41990650 │ ████████████████████▉ │ 1943850 │ █████████▋ │ 38268 │ █████████▌ │
│ 2014-07-01 │ 46868899 │ ███████████████████████▍ │ 2059346 │ ██████████▎ │ 40634 │ ██████████▏ │
│ 2014-08-01 │ 46990813 │ ███████████████████████▍ │ 2117335 │ ██████████▌ │ 41764 │ ██████████▍ │
│ 2014-09-01 │ 44992201 │ ██████████████████████▍ │ 2124708 │ ██████████▌ │ 41890 │ ██████████▍ │
│ 2014-10-01 │ 47497520 │ ███████████████████████▋ │ 2206535 │ ███████████ │ 43109 │ ██████████▊ │
│ 2014-11-01 │ 46118074 │ ███████████████████████ │ 2239747 │ ███████████▏ │ 43718 │ ██████████▉ │
│ 2014-12-01 │ 48807699 │ ████████████████████████▍ │ 2372945 │ ███████████▊ │ 43823 │ ██████████▉ │
│ 2015-01-01 │ 53851542 │ █████████████████████████ │ 2499536 │ ████████████▍ │ 47172 │ ███████████▊ │
│ 2015-02-01 │ 48342747 │ ████████████████████████▏ │ 2448496 │ ████████████▏ │ 47229 │ ███████████▊ │
│ 2015-03-01 │ 54564441 │ █████████████████████████ │ 2550534 │ ████████████▊ │ 48156 │ ████████████ │
│ 2015-04-01 │ 55005780 │ █████████████████████████ │ 2609443 │ █████████████ │ 49865 │ ████████████▍ │
│ 2015-05-01 │ 54504410 │ █████████████████████████ │ 2585535 │ ████████████▉ │ 50137 │ ████████████▌ │
│ 2015-06-01 │ 54258492 │ █████████████████████████ │ 2595129 │ ████████████▉ │ 49598 │ ████████████▍ │
│ 2015-07-01 │ 58451788 │ █████████████████████████ │ 2720026 │ █████████████▌ │ 55022 │ █████████████▊ │
│ 2015-08-01 │ 58075327 │ █████████████████████████ │ 2743994 │ █████████████▋ │ 55302 │ █████████████▊ │
│ 2015-09-01 │ 55574825 │ █████████████████████████ │ 2672793 │ █████████████▎ │ 53960 │ █████████████▍ │
│ 2015-10-01 │ 59494045 │ █████████████████████████ │ 2816426 │ ██████████████ │ 70210 │ █████████████████▌ │
│ 2015-11-01 │ 57117500 │ █████████████████████████ │ 2847146 │ ██████████████▏ │ 71363 │ █████████████████▊ │
│ 2015-12-01 │ 58523312 │ █████████████████████████ │ 2854840 │ ██████████████▎ │ 94559 │ ███████████████████████▋ │
│ 2016-01-01 │ 61991732 │ █████████████████████████ │ 2920366 │ ██████████████▌ │ 108438 │ █████████████████████████ │
│ 2016-02-01 │ 59189875 │ █████████████████████████ │ 2854683 │ ██████████████▎ │ 109916 │ █████████████████████████ │
│ 2016-03-01 │ 63918864 │ █████████████████████████ │ 2969542 │ ██████████████▊ │ 84787 │ █████████████████████▏ │
│ 2016-04-01 │ 64271256 │ █████████████████████████ │ 2999086 │ ██████████████▉ │ 61647 │ ███████████████▍ │
│ 2016-05-01 │ 65212004 │ █████████████████████████ │ 3034674 │ ███████████████▏ │ 67465 │ ████████████████▊ │
│ 2016-06-01 │ 65867743 │ █████████████████████████ │ 3057604 │ ███████████████▎ │ 75170 │ ██████████████████▊ │
│ 2016-07-01 │ 66974735 │ █████████████████████████ │ 3199374 │ ███████████████▉ │ 77732 │ ███████████████████▍ │
│ 2016-08-01 │ 69654819 │ █████████████████████████ │ 3239957 │ ████████████████▏ │ 63080 │ ███████████████▊ │
│ 2016-09-01 │ 67024973 │ █████████████████████████ │ 3190864 │ ███████████████▉ │ 62324 │ ███████████████▌ │
│ 2016-10-01 │ 71826553 │ █████████████████████████ │ 3284340 │ ████████████████▍ │ 62549 │ ███████████████▋ │
│ 2016-11-01 │ 71022319 │ █████████████████████████ │ 3300822 │ ████████████████▌ │ 69718 │ █████████████████▍ │
│ 2016-12-01 │ 72942967 │ █████████████████████████ │ 3430324 │ █████████████████▏ │ 71705 │ █████████████████▉ │
│ 2017-01-01 │ 78946585 │ █████████████████████████ │ 3572093 │ █████████████████▊ │ 78198 │ ███████████████████▌ │
│ 2017-02-01 │ 70609487 │ █████████████████████████ │ 3421115 │ █████████████████ │ 69823 │ █████████████████▍ │
│ 2017-03-01 │ 79723106 │ █████████████████████████ │ 3638122 │ ██████████████████▏ │ 73865 │ ██████████████████▍ │
│ 2017-04-01 │ 77478009 │ █████████████████████████ │ 3620591 │ ██████████████████ │ 74387 │ ██████████████████▌ │
│ 2017-05-01 │ 79810360 │ █████████████████████████ │ 3650820 │ ██████████████████▎ │ 74356 │ ██████████████████▌ │
│ 2017-06-01 │ 79901711 │ █████████████████████████ │ 3737614 │ ██████████████████▋ │ 72114 │ ██████████████████ │
│ 2017-07-01 │ 81798725 │ █████████████████████████ │ 3872330 │ ███████████████████▎ │ 76052 │ ███████████████████ │
│ 2017-08-01 │ 84658503 │ █████████████████████████ │ 3960093 │ ███████████████████▊ │ 77798 │ ███████████████████▍ │
│ 2017-09-01 │ 83165192 │ █████████████████████████ │ 3880501 │ ███████████████████▍ │ 78402 │ ███████████████████▌ │
│ 2017-10-01 │ 85828912 │ █████████████████████████ │ 3980335 │ ███████████████████▉ │ 80685 │ ████████████████████▏ │
│ 2017-11-01 │ 84965681 │ █████████████████████████ │ 4026749 │ ████████████████████▏ │ 82659 │ ████████████████████▋ │
│ 2017-12-01 │ 85973810 │ █████████████████████████ │ 4196354 │ ████████████████████▉ │ 91984 │ ██████████████████████▉ │
│ 2018-01-01 │ 91558594 │ █████████████████████████ │ 4364443 │ █████████████████████▊ │ 102577 │ █████████████████████████ │
│ 2018-02-01 │ 86467179 │ █████████████████████████ │ 4277899 │ █████████████████████▍ │ 104610 │ █████████████████████████ │
│ 2018-03-01 │ 96490262 │ █████████████████████████ │ 4422470 │ ██████████████████████ │ 112559 │ █████████████████████████ │
│ 2018-04-01 │ 98101232 │ █████████████████████████ │ 4572434 │ ██████████████████████▊ │ 105284 │ █████████████████████████ │
│ 2018-05-01 │ 100109100 │ █████████████████████████ │ 4698908 │ ███████████████████████▍ │ 103910 │ █████████████████████████ │
│ 2018-06-01 │ 100009462 │ █████████████████████████ │ 4697426 │ ███████████████████████▍ │ 101107 │ █████████████████████████ │
│ 2018-07-01 │ 108151359 │ █████████████████████████ │ 5099492 │ █████████████████████████ │ 106184 │ █████████████████████████ │
│ 2018-08-01 │ 107330940 │ █████████████████████████ │ 5084082 │ █████████████████████████ │ 109985 │ █████████████████████████ │
│ 2018-09-01 │ 104473929 │ █████████████████████████ │ 5011953 │ █████████████████████████ │ 109710 │ █████████████████████████ │
│ 2018-10-01 │ 112346556 │ █████████████████████████ │ 5320405 │ █████████████████████████ │ 112533 │ █████████████████████████ │
│ 2018-11-01 │ 112573001 │ █████████████████████████ │ 5353282 │ █████████████████████████ │ 112211 │ █████████████████████████ │
│ 2018-12-01 │ 121953600 │ █████████████████████████ │ 5611543 │ █████████████████████████ │ 118291 │ █████████████████████████ │
│ 2019-01-01 │ 129386587 │ █████████████████████████ │ 6016687 │ █████████████████████████ │ 125725 │ █████████████████████████ │
│ 2019-02-01 │ 120645639 │ █████████████████████████ │ 5974488 │ █████████████████████████ │ 125420 │ █████████████████████████ │
│ 2019-03-01 │ 137650471 │ █████████████████████████ │ 6410197 │ █████████████████████████ │ 135924 │ █████████████████████████ │
│ 2019-04-01 │ 138473643 │ █████████████████████████ │ 6416384 │ █████████████████████████ │ 139844 │ █████████████████████████ │
│ 2019-05-01 │ 142463421 │ █████████████████████████ │ 6574836 │ █████████████████████████ │ 142012 │ █████████████████████████ │
│ 2019-06-01 │ 134172939 │ █████████████████████████ │ 6601267 │ █████████████████████████ │ 140997 │ █████████████████████████ │
│ 2019-07-01 │ 145965083 │ █████████████████████████ │ 6901822 │ █████████████████████████ │ 147802 │ █████████████████████████ │
│ 2019-08-01 │ 146854393 │ █████████████████████████ │ 6993882 │ █████████████████████████ │ 151888 │ █████████████████████████ │
│ 2019-09-01 │ 137540219 │ █████████████████████████ │ 7001362 │ █████████████████████████ │ 148839 │ █████████████████████████ │
│ 2019-10-01 │ 129771456 │ █████████████████████████ │ 6825690 │ █████████████████████████ │ 144453 │ █████████████████████████ │
│ 2019-11-01 │ 107990259 │ █████████████████████████ │ 6368286 │ █████████████████████████ │ 141768 │ █████████████████████████ │
│ 2019-12-01 │ 112895934 │ █████████████████████████ │ 6640902 │ █████████████████████████ │ 148277 │ █████████████████████████ │
│ 2020-01-01 │ 54354879 │ █████████████████████████ │ 4782339 │ ███████████████████████▉ │ 111658 │ █████████████████████████ │
│ 2020-02-01 │ 22696923 │ ███████████▎ │ 3135175 │ ███████████████▋ │ 79521 │ ███████████████████▉ │
│ 2020-03-01 │ 3466677 │ █▋ │ 987960 │ ████▉ │ 40901 │ ██████████▏ │
└──────────────┴───────────┴───────────────────────────┴─────────┴───────────────────────────┴────────────┴───────────────────────────┘
172 rows in set. Elapsed: 184.809 sec. Processed 6.74 billion rows, 89.56 GB (36.47 million rows/s., 484.62 MB/s.)
```
10. Here are the top 10 subreddits of 2022:
```sql
SELECT
subreddit,
count() AS count
FROM reddit
WHERE toYear(created_utc) = 2022
GROUP BY subreddit
ORDER BY count DESC
LIMIT 10;
```
The response is:
```response
┌─subreddit────────┬───count─┐
│ AskReddit │ 3858203 │
│ politics │ 1356782 │
│ memes │ 1249120 │
│ nfl │ 883667 │
│ worldnews │ 866065 │
│ teenagers │ 777095 │
│ AmItheAsshole │ 752720 │
│ dankmemes │ 657932 │
│ nba │ 514184 │
│ unpopularopinion │ 473649 │
└──────────────────┴─────────┘
10 rows in set. Elapsed: 27.824 sec. Processed 6.74 billion rows, 53.26 GB (242.22 million rows/s., 1.91 GB/s.)
```
11. Let's see which subreddits had the biggest increase in commnents from 2018 to 2019:
```sql
SELECT
subreddit,
newcount - oldcount AS diff
FROM
(
SELECT
subreddit,
count(*) AS newcount
FROM reddit
WHERE toYear(created_utc) = 2019
GROUP BY subreddit
)
ALL INNER JOIN
(
SELECT
subreddit,
count(*) AS oldcount
FROM reddit
WHERE toYear(created_utc) = 2018
GROUP BY subreddit
) USING (subreddit)
ORDER BY diff DESC
LIMIT 50
SETTINGS joined_subquery_requires_alias = 0;
```
It looks like memes and teenagers were busy on Reddit in 2019:
```response
┌─subreddit────────────┬─────diff─┐
│ memes │ 15368369 │
│ AskReddit │ 14663662 │
│ teenagers │ 12266991 │
│ AmItheAsshole │ 11561538 │
│ dankmemes │ 11305158 │
│ unpopularopinion │ 6332772 │
│ PewdiepieSubmissions │ 5930818 │
│ Market76 │ 5014668 │
│ relationship_advice │ 3776383 │
│ freefolk │ 3169236 │
│ Minecraft │ 3160241 │
│ classicwow │ 2907056 │
│ Animemes │ 2673398 │
│ gameofthrones │ 2402835 │
│ PublicFreakout │ 2267605 │
│ ShitPostCrusaders │ 2207266 │
│ RoastMe │ 2195715 │
│ gonewild │ 2148649 │
│ AnthemTheGame │ 1803818 │
│ entitledparents │ 1706270 │
│ MortalKombat │ 1679508 │
│ Cringetopia │ 1620555 │
│ pokemon │ 1615266 │
│ HistoryMemes │ 1608289 │
│ Brawlstars │ 1574977 │
│ iamatotalpieceofshit │ 1558315 │
│ trashy │ 1518549 │
│ ChapoTrapHouse │ 1505748 │
│ Pikabu │ 1501001 │
│ Showerthoughts │ 1475101 │
│ cursedcomments │ 1465607 │
│ ukpolitics │ 1386043 │
│ wallstreetbets │ 1384431 │
│ interestingasfuck │ 1378900 │
│ wholesomememes │ 1353333 │
│ AskOuija │ 1233263 │
│ borderlands3 │ 1197192 │
│ aww │ 1168257 │
│ insanepeoplefacebook │ 1155473 │
│ FortniteCompetitive │ 1122778 │
│ EpicSeven │ 1117380 │
│ FreeKarma4U │ 1116423 │
│ YangForPresidentHQ │ 1086700 │
│ SquaredCircle │ 1044089 │
│ MurderedByWords │ 1042511 │
│ AskMen │ 1024434 │
│ thedivision │ 1016634 │
│ barstoolsports │ 985032 │
│ nfl │ 978340 │
│ BattlefieldV │ 971408 │
└──────────────────────┴──────────┘
50 rows in set. Elapsed: 65.954 sec. Processed 13.48 billion rows, 79.67 GB (204.37 million rows/s., 1.21 GB/s.)
```
12. One more query: let's compare ClickHouse mentions to other technologies like Snowflake and Postgres. This query is a big one because it has to search all the comments three times for a substring, and unfortunately ClickHouse user are obviously not very active on Reddit yet:
```sql
SELECT
toStartOfQuarter(created_utc) AS quarter,
sum(if(positionCaseInsensitive(body, 'clickhouse') > 0, 1, 0)) AS clickhouse,
sum(if(positionCaseInsensitive(body, 'snowflake') > 0, 1, 0)) AS snowflake,
sum(if(positionCaseInsensitive(body, 'postgres') > 0, 1, 0)) AS postgres
FROM reddit
GROUP BY quarter
ORDER BY quarter ASC;
```
```response
┌────Quarter─┬─clickhouse─┬─snowflake─┬─postgres─┐
│ 2005-10-01 │ 0 │ 0 │ 0 │
│ 2006-01-01 │ 0 │ 2 │ 23 │
│ 2006-04-01 │ 0 │ 2 │ 24 │
│ 2006-07-01 │ 0 │ 4 │ 13 │
│ 2006-10-01 │ 0 │ 23 │ 73 │
│ 2007-01-01 │ 0 │ 14 │ 91 │
│ 2007-04-01 │ 0 │ 10 │ 59 │
│ 2007-07-01 │ 0 │ 39 │ 116 │
│ 2007-10-01 │ 0 │ 45 │ 125 │
│ 2008-01-01 │ 0 │ 53 │ 234 │
│ 2008-04-01 │ 0 │ 79 │ 303 │
│ 2008-07-01 │ 0 │ 102 │ 174 │
│ 2008-10-01 │ 0 │ 156 │ 323 │
│ 2009-01-01 │ 0 │ 206 │ 208 │
│ 2009-04-01 │ 0 │ 178 │ 417 │
│ 2009-07-01 │ 0 │ 300 │ 295 │
│ 2009-10-01 │ 0 │ 633 │ 589 │
│ 2010-01-01 │ 0 │ 555 │ 501 │
│ 2010-04-01 │ 0 │ 587 │ 469 │
│ 2010-07-01 │ 0 │ 770 │ 821 │
│ 2010-10-01 │ 0 │ 1480 │ 550 │
│ 2011-01-01 │ 0 │ 1482 │ 568 │
│ 2011-04-01 │ 0 │ 1558 │ 406 │
│ 2011-07-01 │ 0 │ 2163 │ 628 │
│ 2011-10-01 │ 0 │ 4064 │ 566 │
│ 2012-01-01 │ 0 │ 4621 │ 662 │
│ 2012-04-01 │ 0 │ 5737 │ 785 │
│ 2012-07-01 │ 0 │ 6097 │ 1127 │
│ 2012-10-01 │ 0 │ 7986 │ 600 │
│ 2013-01-01 │ 0 │ 9704 │ 839 │
│ 2013-04-01 │ 0 │ 8161 │ 853 │
│ 2013-07-01 │ 0 │ 9704 │ 1028 │
│ 2013-10-01 │ 0 │ 12879 │ 1404 │
│ 2014-01-01 │ 0 │ 12317 │ 1548 │
│ 2014-04-01 │ 0 │ 13181 │ 1577 │
│ 2014-07-01 │ 0 │ 15640 │ 1710 │
│ 2014-10-01 │ 0 │ 19479 │ 1959 │
│ 2015-01-01 │ 0 │ 20411 │ 2104 │
│ 2015-04-01 │ 1 │ 20309 │ 9112 │
│ 2015-07-01 │ 0 │ 20325 │ 4771 │
│ 2015-10-01 │ 0 │ 25087 │ 3030 │
│ 2016-01-01 │ 0 │ 23462 │ 3126 │
│ 2016-04-01 │ 3 │ 25496 │ 2757 │
│ 2016-07-01 │ 4 │ 28233 │ 2928 │
│ 2016-10-01 │ 2 │ 45445 │ 2449 │
│ 2017-01-01 │ 9 │ 76019 │ 2808 │
│ 2017-04-01 │ 9 │ 67919 │ 2803 │
│ 2017-07-01 │ 13 │ 68974 │ 2771 │
│ 2017-10-01 │ 12 │ 69730 │ 2906 │
│ 2018-01-01 │ 17 │ 67476 │ 3152 │
│ 2018-04-01 │ 3 │ 67139 │ 3986 │
│ 2018-07-01 │ 14 │ 67979 │ 3609 │
│ 2018-10-01 │ 28 │ 74147 │ 3850 │
│ 2019-01-01 │ 14 │ 80250 │ 4305 │
│ 2019-04-01 │ 30 │ 70307 │ 3872 │
│ 2019-07-01 │ 33 │ 77149 │ 4164 │
│ 2019-10-01 │ 13 │ 76746 │ 3541 │
│ 2020-01-01 │ 16 │ 54475 │ 846 │
└────────────┴────────────┴───────────┴──────────┘
58 rows in set. Elapsed: 2663.751 sec. Processed 6.74 billion rows, 1.21 TB (2.53 million rows/s., 454.37 MB/s.)
```

View File

@ -121,7 +121,7 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample(
void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory) void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory)
{ {
AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = true }; AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("groupArray", { createAggregateFunctionGroupArray<false>, properties }); factory.registerFunction("groupArray", { createAggregateFunctionGroupArray<false>, properties });
factory.registerFunction("groupArraySample", { createAggregateFunctionGroupArraySample, properties }); factory.registerFunction("groupArraySample", { createAggregateFunctionGroupArraySample, properties });

View File

@ -72,7 +72,7 @@ public:
{ {
/// Currently the only functions that returns not-NULL on all NULL arguments are count and uniq, and they returns UInt64. /// Currently the only functions that returns not-NULL on all NULL arguments are count and uniq, and they returns UInt64.
if (properties.returns_default_when_only_null) if (properties.returns_default_when_only_null)
return std::make_shared<AggregateFunctionNothing>(arguments, params, nested_function->getResultType()); return std::make_shared<AggregateFunctionNothing>(arguments, params, std::make_shared<DataTypeUInt64>());
else else
return std::make_shared<AggregateFunctionNothing>(arguments, params, std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>())); return std::make_shared<AggregateFunctionNothing>(arguments, params, std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>()));
} }

View File

@ -115,6 +115,7 @@ namespace
writeBinary(info.checksum, out); writeBinary(info.checksum, out);
writeBinary(info.base_size, out); writeBinary(info.base_size, out);
writeBinary(info.base_checksum, out); writeBinary(info.base_checksum, out);
writeBinary(info.encrypted_by_disk, out);
/// We don't store `info.data_file_name` and `info.data_file_index` because they're determined automalically /// We don't store `info.data_file_name` and `info.data_file_index` because they're determined automalically
/// after reading file infos for all the hosts (see the class BackupCoordinationFileInfos). /// after reading file infos for all the hosts (see the class BackupCoordinationFileInfos).
} }
@ -136,6 +137,7 @@ namespace
readBinary(info.checksum, in); readBinary(info.checksum, in);
readBinary(info.base_size, in); readBinary(info.base_size, in);
readBinary(info.base_checksum, in); readBinary(info.base_checksum, in);
readBinary(info.encrypted_by_disk, in);
} }
return res; return res;
} }
@ -254,7 +256,10 @@ void BackupCoordinationRemote::removeAllNodes()
void BackupCoordinationRemote::setStage(const String & new_stage, const String & message) void BackupCoordinationRemote::setStage(const String & new_stage, const String & message)
{ {
if (is_internal)
stage_sync->set(current_host, new_stage, message); stage_sync->set(current_host, new_stage, message);
else
stage_sync->set(current_host, new_stage, /* message */ "", /* all_hosts */ true);
} }
void BackupCoordinationRemote::setError(const Exception & exception) void BackupCoordinationRemote::setError(const Exception & exception)
@ -777,8 +782,8 @@ bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &)
String status; String status;
if (zk->tryGet(root_zookeeper_path + "/" + existing_backup_path + "/stage", status)) if (zk->tryGet(root_zookeeper_path + "/" + existing_backup_path + "/stage", status))
{ {
/// If status is not COMPLETED it could be because the backup failed, check if 'error' exists /// Check if some other backup is in progress
if (status != Stage::COMPLETED && !zk->exists(root_zookeeper_path + "/" + existing_backup_path + "/error")) if (status == Stage::SCHEDULED_TO_START)
{ {
LOG_WARNING(log, "Found a concurrent backup: {}, current backup: {}", existing_backup_uuid, toString(backup_uuid)); LOG_WARNING(log, "Found a concurrent backup: {}, current backup: {}", existing_backup_uuid, toString(backup_uuid));
result = true; result = true;

View File

@ -43,6 +43,10 @@ namespace BackupCoordinationStage
/// Coordination stage meaning that a host finished its work. /// Coordination stage meaning that a host finished its work.
constexpr const char * COMPLETED = "completed"; constexpr const char * COMPLETED = "completed";
/// Coordination stage meaning that backup/restore has failed due to an error
/// Check '/error' for the error message
constexpr const char * ERROR = "error";
} }
} }

View File

@ -8,11 +8,13 @@
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h> #include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Backups/BackupCoordinationStage.h>
namespace DB namespace DB
{ {
namespace Stage = BackupCoordinationStage;
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int FAILED_TO_SYNC_BACKUP_OR_RESTORE; extern const int FAILED_TO_SYNC_BACKUP_OR_RESTORE;
@ -42,7 +44,7 @@ void BackupCoordinationStageSync::createRootNodes()
}); });
} }
void BackupCoordinationStageSync::set(const String & current_host, const String & new_stage, const String & message) void BackupCoordinationStageSync::set(const String & current_host, const String & new_stage, const String & message, const bool & all_hosts)
{ {
auto holder = with_retries.createRetriesControlHolder("set"); auto holder = with_retries.createRetriesControlHolder("set");
holder.retries_ctl.retryLoop( holder.retries_ctl.retryLoop(
@ -50,6 +52,14 @@ void BackupCoordinationStageSync::set(const String & current_host, const String
{ {
with_retries.renewZooKeeper(zookeeper); with_retries.renewZooKeeper(zookeeper);
if (all_hosts)
{
auto code = zookeeper->trySet(zookeeper_path, new_stage);
if (code != Coordination::Error::ZOK)
throw zkutil::KeeperException(code, zookeeper_path);
}
else
{
/// Make an ephemeral node so the initiator can track if the current host is still working. /// Make an ephemeral node so the initiator can track if the current host is still working.
String alive_node_path = zookeeper_path + "/alive|" + current_host; String alive_node_path = zookeeper_path + "/alive|" + current_host;
auto code = zookeeper->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral); auto code = zookeeper->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
@ -58,6 +68,7 @@ void BackupCoordinationStageSync::set(const String & current_host, const String
zookeeper->createIfNotExists(zookeeper_path + "/started|" + current_host, ""); zookeeper->createIfNotExists(zookeeper_path + "/started|" + current_host, "");
zookeeper->createIfNotExists(zookeeper_path + "/current|" + current_host + "|" + new_stage, message); zookeeper->createIfNotExists(zookeeper_path + "/current|" + current_host + "|" + new_stage, message);
}
}); });
} }
@ -73,6 +84,10 @@ void BackupCoordinationStageSync::setError(const String & current_host, const Ex
writeStringBinary(current_host, buf); writeStringBinary(current_host, buf);
writeException(exception, buf, true); writeException(exception, buf, true);
zookeeper->createIfNotExists(zookeeper_path + "/error", buf.str()); zookeeper->createIfNotExists(zookeeper_path + "/error", buf.str());
auto code = zookeeper->trySet(zookeeper_path, Stage::ERROR);
if (code != Coordination::Error::ZOK)
throw zkutil::KeeperException(code, zookeeper_path);
}); });
} }

View File

@ -15,7 +15,7 @@ public:
Poco::Logger * log_); Poco::Logger * log_);
/// Sets the stage of the current host and signal other hosts if there were other hosts waiting for that. /// Sets the stage of the current host and signal other hosts if there were other hosts waiting for that.
void set(const String & current_host, const String & new_stage, const String & message); void set(const String & current_host, const String & new_stage, const String & message, const bool & all_hosts = false);
void setError(const String & current_host, const Exception & exception); void setError(const String & current_host, const Exception & exception);
/// Sets the stage of the current host and waits until all hosts come to the same stage. /// Sets the stage of the current host and waits until all hosts come to the same stage.

View File

@ -1,26 +1,45 @@
#include <Backups/BackupEntryFromAppendOnlyFile.h> #include <Backups/BackupEntryFromAppendOnlyFile.h>
#include <Disks/IDisk.h>
#include <IO/LimitSeekableReadBuffer.h> #include <IO/LimitSeekableReadBuffer.h>
namespace DB namespace DB
{ {
namespace
{
/// For append-only files we must calculate its size on the construction of a backup entry.
UInt64 calculateSize(const DiskPtr & disk, const String & file_path, bool copy_encrypted, std::optional<UInt64> unencrypted_file_size)
{
if (!unencrypted_file_size)
return copy_encrypted ? disk->getEncryptedFileSize(file_path) : disk->getFileSize(file_path);
else if (copy_encrypted)
return disk->getEncryptedFileSize(*unencrypted_file_size);
else
return *unencrypted_file_size;
}
}
BackupEntryFromAppendOnlyFile::BackupEntryFromAppendOnlyFile( BackupEntryFromAppendOnlyFile::BackupEntryFromAppendOnlyFile(
const DiskPtr & disk_, const DiskPtr & disk_, const String & file_path_, bool copy_encrypted_, const std::optional<UInt64> & file_size_)
const String & file_path_, : disk(disk_)
const ReadSettings & settings_, , file_path(file_path_)
const std::optional<UInt64> & file_size_, , data_source_description(disk->getDataSourceDescription())
const std::optional<UInt128> & checksum_, , copy_encrypted(copy_encrypted_ && data_source_description.is_encrypted)
const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_) , size(calculateSize(disk_, file_path_, copy_encrypted, file_size_))
: BackupEntryFromImmutableFile(disk_, file_path_, settings_, file_size_, checksum_, temporary_file_)
, limit(BackupEntryFromImmutableFile::getSize())
{ {
} }
std::unique_ptr<SeekableReadBuffer> BackupEntryFromAppendOnlyFile::getReadBuffer() const BackupEntryFromAppendOnlyFile::~BackupEntryFromAppendOnlyFile() = default;
std::unique_ptr<SeekableReadBuffer> BackupEntryFromAppendOnlyFile::getReadBuffer(const ReadSettings & read_settings) const
{ {
auto buf = BackupEntryFromImmutableFile::getReadBuffer(); std::unique_ptr<SeekableReadBuffer> buf;
return std::make_unique<LimitSeekableReadBuffer>(std::move(buf), 0, limit); if (copy_encrypted)
buf = disk->readEncryptedFile(file_path, read_settings.adjustBufferSize(size));
else
buf = disk->readFile(file_path, read_settings.adjustBufferSize(size));
return std::make_unique<LimitSeekableReadBuffer>(std::move(buf), 0, size);
} }
} }

View File

@ -1,6 +1,6 @@
#pragma once #pragma once
#include <Backups/BackupEntryFromImmutableFile.h> #include <Backups/BackupEntryWithChecksumCalculation.h>
namespace DB namespace DB
@ -8,24 +8,34 @@ namespace DB
/// Represents a file prepared to be included in a backup, assuming that until this backup entry is destroyed /// Represents a file prepared to be included in a backup, assuming that until this backup entry is destroyed
/// the file can be appended with new data, but the bytes which are already in the file won't be changed. /// the file can be appended with new data, but the bytes which are already in the file won't be changed.
class BackupEntryFromAppendOnlyFile : public BackupEntryFromImmutableFile class BackupEntryFromAppendOnlyFile : public BackupEntryWithChecksumCalculation<IBackupEntry>
{ {
public: public:
/// The constructor is allowed to not set `file_size_`, in that case it will be calculated from the data.
/// The constructor is allowed to not set `file_size_` or `checksum_`, in that case it will be calculated from the data.
BackupEntryFromAppendOnlyFile( BackupEntryFromAppendOnlyFile(
const DiskPtr & disk_, const DiskPtr & disk_,
const String & file_path_, const String & file_path_,
const ReadSettings & settings_, bool copy_encrypted_ = false,
const std::optional<UInt64> & file_size_ = {}, const std::optional<UInt64> & file_size_ = {});
const std::optional<UInt128> & checksum_ = {},
const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_ = {});
UInt64 getSize() const override { return limit; } ~BackupEntryFromAppendOnlyFile() override;
std::unique_ptr<SeekableReadBuffer> getReadBuffer() const override;
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const override;
UInt64 getSize() const override { return size; }
DataSourceDescription getDataSourceDescription() const override { return data_source_description; }
bool isEncryptedByDisk() const override { return copy_encrypted; }
bool isFromFile() const override { return true; }
DiskPtr getDisk() const override { return disk; }
String getFilePath() const override { return file_path; }
private: private:
const UInt64 limit; const DiskPtr disk;
const String file_path;
const DataSourceDescription data_source_description;
const bool copy_encrypted;
const UInt64 size;
}; };
} }

View File

@ -1,53 +1,84 @@
#include <Backups/BackupEntryFromImmutableFile.h> #include <Backups/BackupEntryFromImmutableFile.h>
#include <Disks/IDisk.h> #include <Disks/IDisk.h>
#include <Disks/IO/createReadBufferFromFileBase.h>
#include <Poco/File.h>
#include <Common/filesystemHelpers.h>
namespace DB namespace DB
{ {
namespace
{
/// We mix the checksum calculated for non-encrypted data with IV generated to encrypt the file
/// to generate kind of a checksum for encrypted data. Of course it differs from the CityHash properly calculated for encrypted data.
UInt128 combineChecksums(UInt128 checksum1, UInt128 checksum2)
{
chassert(std::size(checksum2.items) == 2);
return CityHash_v1_0_2::CityHash128WithSeed(reinterpret_cast<const char *>(&checksum1), sizeof(checksum1), {checksum2.items[0], checksum2.items[1]});
}
}
BackupEntryFromImmutableFile::BackupEntryFromImmutableFile( BackupEntryFromImmutableFile::BackupEntryFromImmutableFile(
const DiskPtr & disk_, const DiskPtr & disk_,
const String & file_path_, const String & file_path_,
const ReadSettings & settings_, bool copy_encrypted_,
const std::optional<UInt64> & file_size_, const std::optional<UInt64> & file_size_,
const std::optional<UInt128> & checksum_, const std::optional<UInt128> & checksum_)
const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_)
: disk(disk_) : disk(disk_)
, file_path(file_path_) , file_path(file_path_)
, settings(settings_) , data_source_description(disk->getDataSourceDescription())
, copy_encrypted(copy_encrypted_ && data_source_description.is_encrypted)
, file_size(file_size_) , file_size(file_size_)
, checksum(checksum_) , checksum(checksum_)
, temporary_file_on_disk(temporary_file_)
{ {
} }
BackupEntryFromImmutableFile::~BackupEntryFromImmutableFile() = default; BackupEntryFromImmutableFile::~BackupEntryFromImmutableFile() = default;
std::unique_ptr<SeekableReadBuffer> BackupEntryFromImmutableFile::getReadBuffer(const ReadSettings & read_settings) const
{
if (copy_encrypted)
return disk->readEncryptedFile(file_path, read_settings);
else
return disk->readFile(file_path, read_settings);
}
UInt64 BackupEntryFromImmutableFile::getSize() const UInt64 BackupEntryFromImmutableFile::getSize() const
{ {
std::lock_guard lock{get_file_size_mutex}; std::lock_guard lock{size_and_checksum_mutex};
if (!file_size_adjusted)
{
if (!file_size) if (!file_size)
file_size = disk->getFileSize(file_path); file_size = copy_encrypted ? disk->getEncryptedFileSize(file_path) : disk->getFileSize(file_path);
else if (copy_encrypted)
file_size = disk->getEncryptedFileSize(*file_size);
file_size_adjusted = true;
}
return *file_size; return *file_size;
} }
std::unique_ptr<SeekableReadBuffer> BackupEntryFromImmutableFile::getReadBuffer() const UInt128 BackupEntryFromImmutableFile::getChecksum() const
{ {
return disk->readFile(file_path, settings); std::lock_guard lock{size_and_checksum_mutex};
} if (!checksum_adjusted)
DataSourceDescription BackupEntryFromImmutableFile::getDataSourceDescription() const
{ {
return disk->getDataSourceDescription(); if (!checksum)
checksum = BackupEntryWithChecksumCalculation<IBackupEntry>::getChecksum();
else if (copy_encrypted)
checksum = combineChecksums(*checksum, disk->getEncryptedFileIV(file_path));
checksum_adjusted = true;
}
return *checksum;
} }
String BackupEntryFromImmutableFile::getFilePath() const std::optional<UInt128> BackupEntryFromImmutableFile::getPartialChecksum(size_t prefix_length) const
{ {
return file_path; if (prefix_length == 0)
return 0;
if (prefix_length >= getSize())
return getChecksum();
/// For immutable files we don't use partial checksums.
return std::nullopt;
} }
} }

View File

@ -1,49 +1,53 @@
#pragma once #pragma once
#include <Backups/IBackupEntry.h> #include <Backups/BackupEntryWithChecksumCalculation.h>
#include <IO/ReadSettings.h>
#include <base/defines.h> #include <base/defines.h>
#include <mutex> #include <mutex>
namespace DB namespace DB
{ {
class TemporaryFileOnDisk;
class IDisk; class IDisk;
using DiskPtr = std::shared_ptr<IDisk>; using DiskPtr = std::shared_ptr<IDisk>;
/// Represents a file prepared to be included in a backup, assuming that until this backup entry is destroyed the file won't be changed. /// Represents a file prepared to be included in a backup, assuming that until this backup entry is destroyed the file won't be changed.
class BackupEntryFromImmutableFile : public IBackupEntry class BackupEntryFromImmutableFile : public BackupEntryWithChecksumCalculation<IBackupEntry>
{ {
public: public:
/// The constructor is allowed to not set `file_size_` or `checksum_`, in that case it will be calculated from the data. /// The constructor is allowed to not set `file_size_` or `checksum_`, in that case it will be calculated from the data.
BackupEntryFromImmutableFile( BackupEntryFromImmutableFile(
const DiskPtr & disk_, const DiskPtr & disk_,
const String & file_path_, const String & file_path_,
const ReadSettings & settings_, bool copy_encrypted_ = false,
const std::optional<UInt64> & file_size_ = {}, const std::optional<UInt64> & file_size_ = {},
const std::optional<UInt128> & checksum_ = {}, const std::optional<UInt128> & checksum_ = {});
const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_ = {});
~BackupEntryFromImmutableFile() override; ~BackupEntryFromImmutableFile() override;
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const override;
UInt64 getSize() const override; UInt64 getSize() const override;
std::optional<UInt128> getChecksum() const override { return checksum; } UInt128 getChecksum() const override;
std::unique_ptr<SeekableReadBuffer> getReadBuffer() const override; std::optional<UInt128> getPartialChecksum(size_t prefix_length) const override;
String getFilePath() const override; DataSourceDescription getDataSourceDescription() const override { return data_source_description; }
DataSourceDescription getDataSourceDescription() const override; bool isEncryptedByDisk() const override { return copy_encrypted; }
DiskPtr tryGetDiskIfExists() const override { return disk; } bool isFromFile() const override { return true; }
bool isFromImmutableFile() const override { return true; }
DiskPtr getDisk() const override { return disk; }
String getFilePath() const override { return file_path; }
private: private:
const DiskPtr disk; const DiskPtr disk;
const String file_path; const String file_path;
ReadSettings settings; const DataSourceDescription data_source_description;
mutable std::optional<UInt64> file_size TSA_GUARDED_BY(get_file_size_mutex); const bool copy_encrypted;
mutable std::mutex get_file_size_mutex; mutable std::optional<UInt64> file_size;
const std::optional<UInt128> checksum; mutable std::optional<UInt64> checksum;
const std::shared_ptr<TemporaryFileOnDisk> temporary_file_on_disk; mutable bool file_size_adjusted = false;
mutable bool checksum_adjusted = false;
mutable std::mutex size_and_checksum_mutex;
}; };
} }

View File

@ -5,17 +5,16 @@
namespace DB namespace DB
{ {
BackupEntryFromMemory::BackupEntryFromMemory(const void * data_, size_t size_, const std::optional<UInt128> & checksum_) BackupEntryFromMemory::BackupEntryFromMemory(const void * data_, size_t size_)
: BackupEntryFromMemory(String{reinterpret_cast<const char *>(data_), size_}, checksum_) : BackupEntryFromMemory(String{reinterpret_cast<const char *>(data_), size_})
{ {
} }
BackupEntryFromMemory::BackupEntryFromMemory(String data_, const std::optional<UInt128> & checksum_) BackupEntryFromMemory::BackupEntryFromMemory(String data_) : data(std::move(data_))
: data(std::move(data_)), checksum(checksum_)
{ {
} }
std::unique_ptr<SeekableReadBuffer> BackupEntryFromMemory::getReadBuffer() const std::unique_ptr<SeekableReadBuffer> BackupEntryFromMemory::getReadBuffer(const ReadSettings &) const
{ {
return std::make_unique<ReadBufferFromString>(data); return std::make_unique<ReadBufferFromString>(data);
} }

View File

@ -1,39 +1,26 @@
#pragma once #pragma once
#include <Backups/IBackupEntry.h> #include <Backups/BackupEntryWithChecksumCalculation.h>
#include <IO/ReadBufferFromString.h>
namespace DB namespace DB
{ {
/// Represents small preloaded data to be included in a backup. /// Represents small preloaded data to be included in a backup.
class BackupEntryFromMemory : public IBackupEntry class BackupEntryFromMemory : public BackupEntryWithChecksumCalculation<IBackupEntry>
{ {
public: public:
/// The constructor is allowed to not set `checksum_`, in that case it will be calculated from the data. /// The constructor is allowed to not set `checksum_`, in that case it will be calculated from the data.
BackupEntryFromMemory(const void * data_, size_t size_, const std::optional<UInt128> & checksum_ = {}); BackupEntryFromMemory(const void * data_, size_t size_);
explicit BackupEntryFromMemory(String data_, const std::optional<UInt128> & checksum_ = {}); explicit BackupEntryFromMemory(String data_);
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings &) const override;
UInt64 getSize() const override { return data.size(); } UInt64 getSize() const override { return data.size(); }
std::optional<UInt128> getChecksum() const override { return checksum; }
std::unique_ptr<SeekableReadBuffer> getReadBuffer() const override;
String getFilePath() const override DataSourceDescription getDataSourceDescription() const override { return DataSourceDescription{DataSourceType::RAM, "", false, false}; }
{
return "";
}
DataSourceDescription getDataSourceDescription() const override
{
return DataSourceDescription{DataSourceType::RAM, "", false, false};
}
DiskPtr tryGetDiskIfExists() const override { return nullptr; }
private: private:
const String data; const String data;
const std::optional<UInt128> checksum;
}; };
} }

View File

@ -1,6 +1,9 @@
#include <Backups/BackupEntryFromSmallFile.h> #include <Backups/BackupEntryFromSmallFile.h>
#include <Common/filesystemHelpers.h>
#include <Disks/DiskLocal.h>
#include <Disks/IDisk.h> #include <Disks/IDisk.h>
#include <Disks/IO/createReadBufferFromFileBase.h> #include <Disks/IO/createReadBufferFromFileBase.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
@ -16,9 +19,9 @@ namespace
return s; return s;
} }
String readFile(const DiskPtr & disk, const String & file_path) String readFile(const DiskPtr & disk, const String & file_path, bool copy_encrypted)
{ {
auto buf = disk->readFile(file_path); auto buf = copy_encrypted ? disk->readEncryptedFile(file_path, {}) : disk->readFile(file_path);
String s; String s;
readStringUntilEOF(s, *buf); readStringUntilEOF(s, *buf);
return s; return s;
@ -26,15 +29,25 @@ namespace
} }
BackupEntryFromSmallFile::BackupEntryFromSmallFile(const String & file_path_, const std::optional<UInt128> & checksum_) BackupEntryFromSmallFile::BackupEntryFromSmallFile(const String & file_path_)
: BackupEntryFromMemory(readFile(file_path_), checksum_), file_path(file_path_) : file_path(file_path_)
, data_source_description(DiskLocal::getLocalDataSourceDescription(file_path_))
, data(readFile(file_path_))
{ {
} }
BackupEntryFromSmallFile::BackupEntryFromSmallFile( BackupEntryFromSmallFile::BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, bool copy_encrypted_)
const DiskPtr & disk_, const String & file_path_, const std::optional<UInt128> & checksum_) : disk(disk_)
: BackupEntryFromMemory(readFile(disk_, file_path_), checksum_), disk(disk_), file_path(file_path_) , file_path(file_path_)
, data_source_description(disk_->getDataSourceDescription())
, copy_encrypted(copy_encrypted_ && data_source_description.is_encrypted)
, data(readFile(disk_, file_path, copy_encrypted))
{ {
} }
std::unique_ptr<SeekableReadBuffer> BackupEntryFromSmallFile::getReadBuffer(const ReadSettings &) const
{
return std::make_unique<ReadBufferFromString>(data);
}
} }

View File

@ -1,6 +1,6 @@
#pragma once #pragma once
#include <Backups/BackupEntryFromMemory.h> #include <Backups/BackupEntryWithChecksumCalculation.h>
namespace DB namespace DB
@ -10,25 +10,28 @@ using DiskPtr = std::shared_ptr<IDisk>;
/// Represents a file prepared to be included in a backup, /// Represents a file prepared to be included in a backup,
/// assuming that the file is small and can be easily loaded into memory. /// assuming that the file is small and can be easily loaded into memory.
class BackupEntryFromSmallFile : public BackupEntryFromMemory class BackupEntryFromSmallFile : public BackupEntryWithChecksumCalculation<IBackupEntry>
{ {
public: public:
/// The constructor is allowed to not set `checksum_`, in that case it will be calculated from the data. explicit BackupEntryFromSmallFile(const String & file_path_);
explicit BackupEntryFromSmallFile( BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, bool copy_encrypted_ = false);
const String & file_path_,
const std::optional<UInt128> & checksum_ = {});
BackupEntryFromSmallFile( std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings &) const override;
const DiskPtr & disk_, UInt64 getSize() const override { return data.size(); }
const String & file_path_,
const std::optional<UInt128> & checksum_ = {});
DataSourceDescription getDataSourceDescription() const override { return data_source_description; }
bool isEncryptedByDisk() const override { return copy_encrypted; }
bool isFromFile() const override { return true; }
DiskPtr getDisk() const override { return disk; }
String getFilePath() const override { return file_path; } String getFilePath() const override { return file_path; }
DiskPtr tryGetDiskIfExists() const override { return disk; }
private: private:
const DiskPtr disk; const DiskPtr disk;
const String file_path; const String file_path;
const DataSourceDescription data_source_description;
const bool copy_encrypted = false;
const String data;
}; };
} }

View File

@ -0,0 +1,54 @@
#include <Backups/BackupEntryWithChecksumCalculation.h>
#include <IO/HashingReadBuffer.h>
namespace DB
{
template <typename Base>
UInt128 BackupEntryWithChecksumCalculation<Base>::getChecksum() const
{
std::lock_guard lock{checksum_calculation_mutex};
if (!calculated_checksum)
{
auto read_buffer = this->getReadBuffer(ReadSettings{}.adjustBufferSize(this->getSize()));
HashingReadBuffer hashing_read_buffer(*read_buffer);
hashing_read_buffer.ignoreAll();
calculated_checksum = hashing_read_buffer.getHash();
}
return *calculated_checksum;
}
template <typename Base>
std::optional<UInt128> BackupEntryWithChecksumCalculation<Base>::getPartialChecksum(size_t prefix_length) const
{
if (prefix_length == 0)
return 0;
size_t size = this->getSize();
if (prefix_length >= size)
return this->getChecksum();
std::lock_guard lock{checksum_calculation_mutex};
ReadSettings read_settings;
if (calculated_checksum)
read_settings.adjustBufferSize(calculated_checksum ? prefix_length : size);
auto read_buffer = this->getReadBuffer(read_settings);
HashingReadBuffer hashing_read_buffer(*read_buffer);
hashing_read_buffer.ignore(prefix_length);
auto partial_checksum = hashing_read_buffer.getHash();
if (!calculated_checksum)
{
hashing_read_buffer.ignoreAll();
calculated_checksum = hashing_read_buffer.getHash();
}
return partial_checksum;
}
template class BackupEntryWithChecksumCalculation<IBackupEntry>;
}

View File

@ -0,0 +1,22 @@
#pragma once
#include <Backups/IBackupEntry.h>
namespace DB
{
/// Calculates the checksum and the partial checksum for a backup entry based on ReadBuffer returned by getReadBuffer().
template <typename Base>
class BackupEntryWithChecksumCalculation : public Base
{
public:
UInt128 getChecksum() const override;
std::optional<UInt128> getPartialChecksum(size_t prefix_length) const override;
private:
mutable std::optional<UInt128> calculated_checksum;
mutable std::mutex checksum_calculation_mutex;
};
}

View File

@ -15,23 +15,33 @@ public:
BackupEntryWrappedWith(BackupEntryPtr entry_, T && custom_value_) : entry(entry_), custom_value(std::move(custom_value_)) { } BackupEntryWrappedWith(BackupEntryPtr entry_, T && custom_value_) : entry(entry_), custom_value(std::move(custom_value_)) { }
~BackupEntryWrappedWith() override = default; ~BackupEntryWrappedWith() override = default;
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const override { return entry->getReadBuffer(read_settings); }
UInt64 getSize() const override { return entry->getSize(); } UInt64 getSize() const override { return entry->getSize(); }
std::optional<UInt128> getChecksum() const override { return entry->getChecksum(); } UInt128 getChecksum() const override { return entry->getChecksum(); }
std::unique_ptr<SeekableReadBuffer> getReadBuffer() const override { return entry->getReadBuffer(); } std::optional<UInt128> getPartialChecksum(size_t prefix_length) const override { return entry->getPartialChecksum(prefix_length); }
String getFilePath() const override { return entry->getFilePath(); }
DiskPtr tryGetDiskIfExists() const override { return entry->tryGetDiskIfExists(); }
DataSourceDescription getDataSourceDescription() const override { return entry->getDataSourceDescription(); } DataSourceDescription getDataSourceDescription() const override { return entry->getDataSourceDescription(); }
bool isEncryptedByDisk() const override { return entry->isEncryptedByDisk(); }
bool isFromFile() const override { return entry->isFromFile(); }
bool isFromImmutableFile() const override { return entry->isFromImmutableFile(); }
String getFilePath() const override { return entry->getFilePath(); }
DiskPtr getDisk() const override { return entry->getDisk(); }
private: private:
BackupEntryPtr entry; BackupEntryPtr entry;
T custom_value; T custom_value;
}; };
template <typename T>
BackupEntryPtr wrapBackupEntryWith(BackupEntryPtr && backup_entry, const T & custom_value)
{
return std::make_shared<BackupEntryWrappedWith<T>>(std::move(backup_entry), custom_value);
}
template <typename T> template <typename T>
void wrapBackupEntriesWith(std::vector<std::pair<String, BackupEntryPtr>> & backup_entries, const T & custom_value) void wrapBackupEntriesWith(std::vector<std::pair<String, BackupEntryPtr>> & backup_entries, const T & custom_value)
{ {
for (auto & [_, backup_entry] : backup_entries) for (auto & [_, backup_entry] : backup_entries)
backup_entry = std::make_shared<BackupEntryWrappedWith<T>>(std::move(backup_entry), custom_value); backup_entry = wrapBackupEntryWith(std::move(backup_entry), custom_value);
} }
} }

View File

@ -7,7 +7,7 @@
#include <Common/scope_guard_safe.h> #include <Common/scope_guard_safe.h>
#include <Common/setThreadName.h> #include <Common/setThreadName.h>
#include <Common/ThreadPool.h> #include <Common/ThreadPool.h>
#include <IO/HashingReadBuffer.h> #include <base/hex.h>
namespace DB namespace DB
@ -36,7 +36,7 @@ namespace
{ {
/// We cannot reuse base backup because our file is smaller /// We cannot reuse base backup because our file is smaller
/// than file stored in previous backup /// than file stored in previous backup
if (new_entry_info.size < base_backup_info.first) if ((new_entry_info.size < base_backup_info.first) || !base_backup_info.first)
return CheckBackupResult::HasNothing; return CheckBackupResult::HasNothing;
if (base_backup_info.first == new_entry_info.size) if (base_backup_info.first == new_entry_info.size)
@ -48,45 +48,22 @@ namespace
struct ChecksumsForNewEntry struct ChecksumsForNewEntry
{ {
UInt128 full_checksum; /// 0 is the valid checksum of empty data.
UInt128 prefix_checksum; UInt128 full_checksum = 0;
/// std::nullopt here means that it's too difficult to calculate a partial checksum so it shouldn't be used.
std::optional<UInt128> prefix_checksum;
}; };
/// Calculate checksum for backup entry if it's empty. /// Calculate checksum for backup entry if it's empty.
/// Also able to calculate additional checksum of some prefix. /// Also able to calculate additional checksum of some prefix.
ChecksumsForNewEntry calculateNewEntryChecksumsIfNeeded(const BackupEntryPtr & entry, size_t prefix_size) ChecksumsForNewEntry calculateNewEntryChecksumsIfNeeded(const BackupEntryPtr & entry, size_t prefix_size)
{ {
if (prefix_size > 0) ChecksumsForNewEntry res;
{ /// The partial checksum should be calculated before the full checksum to enable optimization in BackupEntryWithChecksumCalculation.
auto read_buffer = entry->getReadBuffer(); res.prefix_checksum = entry->getPartialChecksum(prefix_size);
HashingReadBuffer hashing_read_buffer(*read_buffer); res.full_checksum = entry->getChecksum();
hashing_read_buffer.ignore(prefix_size); return res;
auto prefix_checksum = hashing_read_buffer.getHash();
if (entry->getChecksum() == std::nullopt)
{
hashing_read_buffer.ignoreAll();
auto full_checksum = hashing_read_buffer.getHash();
return ChecksumsForNewEntry{full_checksum, prefix_checksum};
}
else
{
return ChecksumsForNewEntry{*(entry->getChecksum()), prefix_checksum};
}
}
else
{
if (entry->getChecksum() == std::nullopt)
{
auto read_buffer = entry->getReadBuffer();
HashingReadBuffer hashing_read_buffer(*read_buffer);
hashing_read_buffer.ignoreAll();
return ChecksumsForNewEntry{hashing_read_buffer.getHash(), 0};
}
else
{
return ChecksumsForNewEntry{*(entry->getChecksum()), 0};
}
}
} }
/// We store entries' file names in the backup without leading slashes. /// We store entries' file names in the backup without leading slashes.
@ -111,6 +88,7 @@ String BackupFileInfo::describe() const
result += fmt::format("base_checksum: {};\n", getHexUIntLowercase(checksum)); result += fmt::format("base_checksum: {};\n", getHexUIntLowercase(checksum));
result += fmt::format("data_file_name: {};\n", data_file_name); result += fmt::format("data_file_name: {};\n", data_file_name);
result += fmt::format("data_file_index: {};\n", data_file_index); result += fmt::format("data_file_index: {};\n", data_file_index);
result += fmt::format("encrypted_by_disk: {};\n", encrypted_by_disk);
return result; return result;
} }
@ -122,6 +100,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu
BackupFileInfo info; BackupFileInfo info;
info.file_name = adjusted_path; info.file_name = adjusted_path;
info.size = backup_entry->getSize(); info.size = backup_entry->getSize();
info.encrypted_by_disk = backup_entry->isEncryptedByDisk();
/// We don't set `info.data_file_name` and `info.data_file_index` in this function because they're set during backup coordination /// We don't set `info.data_file_name` and `info.data_file_index` in this function because they're set during backup coordination
/// (see the class BackupCoordinationFileInfos). /// (see the class BackupCoordinationFileInfos).
@ -139,7 +118,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu
/// We have info about this file in base backup /// We have info about this file in base backup
/// If file has no checksum -- calculate and fill it. /// If file has no checksum -- calculate and fill it.
if (base_backup_file_info.has_value()) if (base_backup_file_info)
{ {
LOG_TRACE(log, "File {} found in base backup, checking for equality", adjusted_path); LOG_TRACE(log, "File {} found in base backup, checking for equality", adjusted_path);
CheckBackupResult check_base = checkBaseBackupForFile(*base_backup_file_info, info); CheckBackupResult check_base = checkBaseBackupForFile(*base_backup_file_info, info);

View File

@ -35,6 +35,9 @@ struct BackupFileInfo
/// This field is set during backup coordination (see the class BackupCoordinationFileInfos). /// This field is set during backup coordination (see the class BackupCoordinationFileInfos).
size_t data_file_index = static_cast<size_t>(-1); size_t data_file_index = static_cast<size_t>(-1);
/// Whether this file is encrypted by an encrypted disk.
bool encrypted_by_disk = false;
struct LessByFileName struct LessByFileName
{ {
bool operator()(const BackupFileInfo & lhs, const BackupFileInfo & rhs) const { return (lhs.file_name < rhs.file_name); } bool operator()(const BackupFileInfo & lhs, const BackupFileInfo & rhs) const { return (lhs.file_name < rhs.file_name); }

View File

@ -1,46 +0,0 @@
#include <Backups/BackupIO.h>
#include <IO/copyData.h>
#include <IO/WriteBufferFromFileBase.h>
#include <IO/SeekableReadBuffer.h>
#include <Interpreters/Context.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
void IBackupReader::copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
WriteMode write_mode, const WriteSettings & write_settings)
{
auto read_buffer = readFile(file_name);
auto write_buffer = destination_disk->writeFile(destination_path, std::min<size_t>(size, DBMS_DEFAULT_BUFFER_SIZE), write_mode, write_settings);
copyData(*read_buffer, *write_buffer, size);
write_buffer->finalize();
}
IBackupWriter::IBackupWriter(const ContextPtr & context_)
: read_settings(context_->getBackupReadSettings())
, has_throttling(static_cast<bool>(context_->getBackupsThrottler()))
{}
void IBackupWriter::copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name)
{
auto read_buffer = create_read_buffer();
if (offset)
read_buffer->seek(offset, SEEK_SET);
auto write_buffer = writeFile(dest_file_name);
copyData(*read_buffer, *write_buffer, size);
write_buffer->finalize();
}
void IBackupWriter::copyFileNative(
DiskPtr /* src_disk */, const String & /* src_file_name */, UInt64 /* src_offset */, UInt64 /* src_size */, const String & /* dest_file_name */)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Native copy not implemented for backup writer");
}
}

View File

@ -1,58 +1,72 @@
#pragma once #pragma once
#include <Core/Types.h> #include <Core/Types.h>
#include <Disks/DiskType.h>
#include <Disks/IDisk.h>
#include <IO/ReadSettings.h>
#include <Interpreters/Context_fwd.h>
namespace DB namespace DB
{ {
class IDisk;
using DiskPtr = std::shared_ptr<IDisk>;
class SeekableReadBuffer; class SeekableReadBuffer;
class WriteBuffer; class WriteBuffer;
enum class WriteMode;
struct WriteSettings;
struct ReadSettings;
/// Represents operations of loading from disk or downloading for reading a backup. /// Represents operations of loading from disk or downloading for reading a backup.
class IBackupReader /// BackupReaderFile, BackupReaderDisk /// See also implementations: BackupReaderFile, BackupReaderDisk.
class IBackupReader
{ {
public: public:
virtual ~IBackupReader() = default; virtual ~IBackupReader() = default;
virtual bool fileExists(const String & file_name) = 0; virtual bool fileExists(const String & file_name) = 0;
virtual UInt64 getFileSize(const String & file_name) = 0; virtual UInt64 getFileSize(const String & file_name) = 0;
virtual std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) = 0; virtual std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) = 0;
virtual void copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
WriteMode write_mode, const WriteSettings & write_settings); /// The function copyFileToDisk() can be much faster than reading the file with readFile() and then writing it to some disk.
virtual DataSourceDescription getDataSourceDescription() const = 0; /// (especially for S3 where it can use CopyObject to copy objects inside S3 instead of downloading and uploading them).
/// Parameters:
/// `encrypted_in_backup` specify if this file is encrypted in the backup, so it shouldn't be encrypted again while restoring to an encrypted disk.
virtual void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) = 0;
virtual const ReadSettings & getReadSettings() const = 0;
virtual const WriteSettings & getWriteSettings() const = 0;
virtual size_t getWriteBufferSize() const = 0;
}; };
/// Represents operations of storing to disk or uploading for writing a backup. /// Represents operations of storing to disk or uploading for writing a backup.
class IBackupWriter /// BackupWriterFile, BackupWriterDisk /// See also implementations: BackupWriterFile, BackupWriterDisk
class IBackupWriter
{ {
public: public:
using CreateReadBufferFunction = std::function<std::unique_ptr<SeekableReadBuffer>()>;
explicit IBackupWriter(const ContextPtr & context_);
virtual ~IBackupWriter() = default; virtual ~IBackupWriter() = default;
virtual bool fileExists(const String & file_name) = 0; virtual bool fileExists(const String & file_name) = 0;
virtual UInt64 getFileSize(const String & file_name) = 0; virtual UInt64 getFileSize(const String & file_name) = 0;
virtual bool fileContentsEqual(const String & file_name, const String & expected_file_contents) = 0; virtual bool fileContentsEqual(const String & file_name, const String & expected_file_contents) = 0;
virtual std::unique_ptr<WriteBuffer> writeFile(const String & file_name) = 0; virtual std::unique_ptr<WriteBuffer> writeFile(const String & file_name) = 0;
using CreateReadBufferFunction = std::function<std::unique_ptr<SeekableReadBuffer>()>;
virtual void copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) = 0;
/// The function copyFileFromDisk() can be much faster than copyDataToFile()
/// (especially for S3 where it can use CopyObject to copy objects inside S3 instead of downloading and uploading them).
/// Parameters:
/// `start_pos` and `length` specify a part of the file on `src_disk` to copy to the backup.
/// `copy_encrypted` specify whether this function should copy encrypted data of the file `src_path` to the backup.
virtual void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
bool copy_encrypted, UInt64 start_pos, UInt64 length) = 0;
virtual void removeFile(const String & file_name) = 0; virtual void removeFile(const String & file_name) = 0;
virtual void removeFiles(const Strings & file_names) = 0; virtual void removeFiles(const Strings & file_names) = 0;
virtual DataSourceDescription getDataSourceDescription() const = 0;
virtual void copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name);
virtual bool supportNativeCopy(DataSourceDescription /* data_source_description */) const { return false; }
/// Copy file using native copy (optimized for S3 to use CopyObject) virtual const ReadSettings & getReadSettings() const = 0;
/// virtual const WriteSettings & getWriteSettings() const = 0;
/// NOTE: It still may fall back to copyDataToFile() if native copy is not possible: virtual size_t getWriteBufferSize() const = 0;
/// - different buckets
/// - throttling had been requested
virtual void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name);
protected:
const ReadSettings read_settings;
const bool has_throttling;
}; };
} }

View File

@ -0,0 +1,95 @@
#include <Backups/BackupIO_Default.h>
#include <Disks/IDisk.h>
#include <IO/copyData.h>
#include <IO/WriteBufferFromFileBase.h>
#include <IO/SeekableReadBuffer.h>
#include <Interpreters/Context.h>
#include <Common/logger_useful.h>
namespace DB
{
BackupReaderDefault::BackupReaderDefault(Poco::Logger * log_, const ContextPtr & context_)
: log(log_)
, read_settings(context_->getBackupReadSettings())
, write_settings(context_->getWriteSettings())
, write_buffer_size(DBMS_DEFAULT_BUFFER_SIZE)
{
}
void BackupReaderDefault::copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode)
{
LOG_TRACE(log, "Copying file {} to disk {} through buffers", path_in_backup, destination_disk->getName());
auto read_buffer = readFile(path_in_backup);
std::unique_ptr<WriteBuffer> write_buffer;
auto buf_size = std::min(file_size, write_buffer_size);
if (encrypted_in_backup)
write_buffer = destination_disk->writeEncryptedFile(destination_path, buf_size, write_mode, write_settings);
else
write_buffer = destination_disk->writeFile(destination_path, buf_size, write_mode, write_settings);
copyData(*read_buffer, *write_buffer, file_size);
write_buffer->finalize();
}
BackupWriterDefault::BackupWriterDefault(Poco::Logger * log_, const ContextPtr & context_)
: log(log_)
, read_settings(context_->getBackupReadSettings())
, write_settings(context_->getWriteSettings())
, write_buffer_size(DBMS_DEFAULT_BUFFER_SIZE)
{
}
bool BackupWriterDefault::fileContentsEqual(const String & file_name, const String & expected_file_contents)
{
if (!fileExists(file_name))
return false;
try
{
auto in = readFile(file_name, expected_file_contents.size());
String actual_file_contents(expected_file_contents.size(), ' ');
return (in->read(actual_file_contents.data(), actual_file_contents.size()) == actual_file_contents.size())
&& (actual_file_contents == expected_file_contents) && in->eof();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
return false;
}
}
void BackupWriterDefault::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
{
auto read_buffer = create_read_buffer();
if (start_pos)
read_buffer->seek(start_pos, SEEK_SET);
auto write_buffer = writeFile(path_in_backup);
copyData(*read_buffer, *write_buffer, length);
write_buffer->finalize();
}
void BackupWriterDefault::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
bool copy_encrypted, UInt64 start_pos, UInt64 length)
{
LOG_TRACE(log, "Copying file {} from disk {} through buffers", src_path, src_disk->getName());
auto create_read_buffer = [src_disk, src_path, copy_encrypted, settings = read_settings.adjustBufferSize(start_pos + length)]
{
if (copy_encrypted)
return src_disk->readEncryptedFile(src_path, settings);
else
return src_disk->readFile(src_path, settings);
};
copyDataToFile(path_in_backup, create_read_buffer, start_pos, length);
}
}

View File

@ -0,0 +1,73 @@
#pragma once
#include <Backups/BackupIO.h>
#include <IO/ReadSettings.h>
#include <IO/WriteSettings.h>
#include <Interpreters/Context_fwd.h>
namespace DB
{
class IDisk;
using DiskPtr = std::shared_ptr<IDisk>;
class ReadBuffer;
class SeekableReadBuffer;
class WriteBuffer;
enum class WriteMode;
/// Represents operations of loading from disk or downloading for reading a backup.
class BackupReaderDefault : public IBackupReader
{
public:
BackupReaderDefault(Poco::Logger * log_, const ContextPtr & context_);
~BackupReaderDefault() override = default;
/// The function copyFileToDisk() can be much faster than reading the file with readFile() and then writing it to some disk.
/// (especially for S3 where it can use CopyObject to copy objects inside S3 instead of downloading and uploading them).
/// Parameters:
/// `encrypted_in_backup` specify if this file is encrypted in the backup, so it shouldn't be encrypted again while restoring to an encrypted disk.
void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) override;
const ReadSettings & getReadSettings() const override { return read_settings; }
const WriteSettings & getWriteSettings() const override { return write_settings; }
size_t getWriteBufferSize() const override { return write_buffer_size; }
protected:
Poco::Logger * const log;
const ReadSettings read_settings;
/// The write settings are used to write to the source disk in copyFileToDisk().
const WriteSettings write_settings;
const size_t write_buffer_size;
};
/// Represents operations of storing to disk or uploading for writing a backup.
class BackupWriterDefault : public IBackupWriter
{
public:
BackupWriterDefault(Poco::Logger * log_, const ContextPtr & context_);
~BackupWriterDefault() override = default;
bool fileContentsEqual(const String & file_name, const String & expected_file_contents) override;
void copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) override;
void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path, bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
const ReadSettings & getReadSettings() const override { return read_settings; }
const WriteSettings & getWriteSettings() const override { return write_settings; }
size_t getWriteBufferSize() const override { return write_buffer_size; }
protected:
/// Here readFile() is used only to implement fileContentsEqual().
virtual std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) = 0;
Poco::Logger * const log;
/// The read settings are used to read from the source disk in copyFileFromDisk().
const ReadSettings read_settings;
const WriteSettings write_settings;
const size_t write_buffer_size;
};
}

View File

@ -8,13 +8,11 @@
namespace DB namespace DB
{ {
namespace ErrorCodes BackupReaderDisk::BackupReaderDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_)
{ : BackupReaderDefault(&Poco::Logger::get("BackupReaderDisk"), context_)
extern const int LOGICAL_ERROR; , disk(disk_)
} , root_path(root_path_)
, data_source_description(disk->getDataSourceDescription())
BackupReaderDisk::BackupReaderDisk(const DiskPtr & disk_, const String & path_)
: disk(disk_), path(path_), log(&Poco::Logger::get("BackupReaderDisk"))
{ {
} }
@ -22,38 +20,47 @@ BackupReaderDisk::~BackupReaderDisk() = default;
bool BackupReaderDisk::fileExists(const String & file_name) bool BackupReaderDisk::fileExists(const String & file_name)
{ {
return disk->exists(path / file_name); return disk->exists(root_path / file_name);
} }
UInt64 BackupReaderDisk::getFileSize(const String & file_name) UInt64 BackupReaderDisk::getFileSize(const String & file_name)
{ {
return disk->getFileSize(path / file_name); return disk->getFileSize(root_path / file_name);
} }
std::unique_ptr<SeekableReadBuffer> BackupReaderDisk::readFile(const String & file_name) std::unique_ptr<SeekableReadBuffer> BackupReaderDisk::readFile(const String & file_name)
{ {
return disk->readFile(path / file_name); return disk->readFile(root_path / file_name, read_settings);
} }
void BackupReaderDisk::copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path, void BackupReaderDisk::copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
WriteMode write_mode, const WriteSettings & write_settings) DiskPtr destination_disk, const String & destination_path, WriteMode write_mode)
{ {
if (write_mode == WriteMode::Rewrite) /// Use IDisk::copyFile() as a more optimal way to copy a file if it's possible.
/// However IDisk::copyFile() can't use throttling for reading, and can't copy an encrypted file or do appending.
bool has_throttling = disk->isRemote() ? static_cast<bool>(read_settings.remote_throttler) : static_cast<bool>(read_settings.local_throttler);
if (!has_throttling && (write_mode == WriteMode::Rewrite) && !encrypted_in_backup)
{ {
LOG_TRACE(log, "Copying {}/{} from disk {} to {} by the disk", path, file_name, disk->getName(), destination_disk->getName()); auto destination_data_source_description = destination_disk->getDataSourceDescription();
disk->copyFile(path / file_name, *destination_disk, destination_path, write_settings); if (destination_data_source_description.sameKind(data_source_description) && !data_source_description.is_encrypted)
return; {
/// Use more optimal way.
LOG_TRACE(log, "Copying file {} from disk {} to disk {}", path_in_backup, disk->getName(), destination_disk->getName());
disk->copyFile(root_path / path_in_backup, *destination_disk, destination_path, write_settings);
return; /// copied!
}
} }
LOG_TRACE(log, "Copying {}/{} from disk {} to {} through buffers", path, file_name, disk->getName(), destination_disk->getName()); /// Fallback to copy through buffers.
IBackupReader::copyFileToDisk(file_name, size, destination_disk, destination_path, write_mode, write_settings); BackupReaderDefault::copyFileToDisk(path_in_backup, file_size, encrypted_in_backup, destination_disk, destination_path, write_mode);
} }
BackupWriterDisk::BackupWriterDisk(const DiskPtr & disk_, const String & path_, const ContextPtr & context_) BackupWriterDisk::BackupWriterDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_)
: IBackupWriter(context_) : BackupWriterDefault(&Poco::Logger::get("BackupWriterDisk"), context_)
, disk(disk_) , disk(disk_)
, path(path_) , root_path(root_path_)
, data_source_description(disk->getDataSourceDescription())
{ {
} }
@ -61,85 +68,64 @@ BackupWriterDisk::~BackupWriterDisk() = default;
bool BackupWriterDisk::fileExists(const String & file_name) bool BackupWriterDisk::fileExists(const String & file_name)
{ {
return disk->exists(path / file_name); return disk->exists(root_path / file_name);
} }
UInt64 BackupWriterDisk::getFileSize(const String & file_name) UInt64 BackupWriterDisk::getFileSize(const String & file_name)
{ {
return disk->getFileSize(path / file_name); return disk->getFileSize(root_path / file_name);
} }
bool BackupWriterDisk::fileContentsEqual(const String & file_name, const String & expected_file_contents) std::unique_ptr<ReadBuffer> BackupWriterDisk::readFile(const String & file_name, size_t expected_file_size)
{ {
if (!disk->exists(path / file_name)) return disk->readFile(root_path / file_name, read_settings.adjustBufferSize(expected_file_size));
return false;
try
{
auto in = disk->readFile(path / file_name);
String actual_file_contents(expected_file_contents.size(), ' ');
return (in->read(actual_file_contents.data(), actual_file_contents.size()) == actual_file_contents.size())
&& (actual_file_contents == expected_file_contents) && in->eof();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
return false;
}
} }
std::unique_ptr<WriteBuffer> BackupWriterDisk::writeFile(const String & file_name) std::unique_ptr<WriteBuffer> BackupWriterDisk::writeFile(const String & file_name)
{ {
auto file_path = path / file_name; auto file_path = root_path / file_name;
disk->createDirectories(file_path.parent_path()); disk->createDirectories(file_path.parent_path());
return disk->writeFile(file_path); return disk->writeFile(file_path, write_buffer_size, WriteMode::Rewrite, write_settings);
} }
void BackupWriterDisk::removeFile(const String & file_name) void BackupWriterDisk::removeFile(const String & file_name)
{ {
disk->removeFileIfExists(path / file_name); disk->removeFileIfExists(root_path / file_name);
if (disk->isDirectory(path) && disk->isDirectoryEmpty(path)) if (disk->isDirectory(root_path) && disk->isDirectoryEmpty(root_path))
disk->removeDirectory(path); disk->removeDirectory(root_path);
} }
void BackupWriterDisk::removeFiles(const Strings & file_names) void BackupWriterDisk::removeFiles(const Strings & file_names)
{ {
for (const auto & file_name : file_names) for (const auto & file_name : file_names)
disk->removeFileIfExists(path / file_name); disk->removeFileIfExists(root_path / file_name);
if (disk->isDirectory(path) && disk->isDirectoryEmpty(path)) if (disk->isDirectory(root_path) && disk->isDirectoryEmpty(root_path))
disk->removeDirectory(path); disk->removeDirectory(root_path);
} }
DataSourceDescription BackupWriterDisk::getDataSourceDescription() const void BackupWriterDisk::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
bool copy_encrypted, UInt64 start_pos, UInt64 length)
{ {
return disk->getDataSourceDescription(); /// Use IDisk::copyFile() as a more optimal way to copy a file if it's possible.
} /// However IDisk::copyFile() can't use throttling for reading, and can't copy an encrypted file or copy a part of the file.
bool has_throttling = src_disk->isRemote() ? static_cast<bool>(read_settings.remote_throttler) : static_cast<bool>(read_settings.local_throttler);
DataSourceDescription BackupReaderDisk::getDataSourceDescription() const if (!has_throttling && !start_pos && !copy_encrypted)
{ {
return disk->getDataSourceDescription(); auto source_data_source_description = src_disk->getDataSourceDescription();
} if (source_data_source_description.sameKind(data_source_description) && !source_data_source_description.is_encrypted
&& (length == src_disk->getFileSize(src_path)))
bool BackupWriterDisk::supportNativeCopy(DataSourceDescription data_source_description) const
{ {
return data_source_description == disk->getDataSourceDescription(); /// Use more optimal way.
LOG_TRACE(log, "Copying file {} from disk {} to disk {}", src_path, src_disk->getName(), disk->getName());
auto dest_file_path = root_path / path_in_backup;
disk->createDirectories(dest_file_path.parent_path());
src_disk->copyFile(src_path, *disk, dest_file_path, write_settings);
return; /// copied!
}
} }
void BackupWriterDisk::copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name) /// Fallback to copy through buffers.
{ BackupWriterDefault::copyFileFromDisk(path_in_backup, src_disk, src_path, copy_encrypted, start_pos, length);
if (!src_disk)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot natively copy data to disk without source disk");
if (has_throttling || (src_offset != 0) || (src_size != src_disk->getFileSize(src_file_name)))
{
auto create_read_buffer = [this, src_disk, src_file_name] { return src_disk->readFile(src_file_name, read_settings); };
copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name);
return;
}
auto file_path = path / dest_file_name;
disk->createDirectories(file_path.parent_path());
src_disk->copyFile(src_file_name, *disk, file_path);
} }
} }

View File

@ -1,53 +1,58 @@
#pragma once #pragma once
#include <Backups/BackupIO_Default.h>
#include <Disks/DiskType.h>
#include <filesystem> #include <filesystem>
#include <Backups/BackupIO.h>
#include <Interpreters/Context_fwd.h>
namespace DB namespace DB
{ {
class IDisk; class IDisk;
using DiskPtr = std::shared_ptr<IDisk>; using DiskPtr = std::shared_ptr<IDisk>;
class BackupReaderDisk : public IBackupReader class BackupReaderDisk : public BackupReaderDefault
{ {
public: public:
BackupReaderDisk(const DiskPtr & disk_, const String & path_); BackupReaderDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_);
~BackupReaderDisk() override; ~BackupReaderDisk() override;
bool fileExists(const String & file_name) override; bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override; UInt64 getFileSize(const String & file_name) override;
std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override; std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override;
void copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
WriteMode write_mode, const WriteSettings & write_settings) override; void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
DataSourceDescription getDataSourceDescription() const override; DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) override;
private: private:
DiskPtr disk; const DiskPtr disk;
std::filesystem::path path; const std::filesystem::path root_path;
Poco::Logger * log; const DataSourceDescription data_source_description;
}; };
class BackupWriterDisk : public IBackupWriter class BackupWriterDisk : public BackupWriterDefault
{ {
public: public:
BackupWriterDisk(const DiskPtr & disk_, const String & path_, const ContextPtr & context_); BackupWriterDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_);
~BackupWriterDisk() override; ~BackupWriterDisk() override;
bool fileExists(const String & file_name) override; bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override; UInt64 getFileSize(const String & file_name) override;
bool fileContentsEqual(const String & file_name, const String & expected_file_contents) override;
std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override; std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
void removeFile(const String & file_name) override; void removeFile(const String & file_name) override;
void removeFiles(const Strings & file_names) override; void removeFiles(const Strings & file_names) override;
DataSourceDescription getDataSourceDescription() const override;
bool supportNativeCopy(DataSourceDescription data_source_description) const override;
void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name) override;
private: private:
DiskPtr disk; std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;
std::filesystem::path path;
const DiskPtr disk;
const std::filesystem::path root_path;
const DataSourceDescription data_source_description;
}; };
} }

View File

@ -1,9 +1,7 @@
#include <Backups/BackupIO_File.h> #include <Backups/BackupIO_File.h>
#include <Disks/IDisk.h> #include <Disks/DiskLocal.h>
#include <Disks/IO/createReadBufferFromFileBase.h> #include <Disks/IO/createReadBufferFromFileBase.h>
#include <IO/WriteBufferFromFile.h> #include <IO/WriteBufferFromFile.h>
#include <IO/copyData.h>
#include <Common/filesystemHelpers.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
@ -12,158 +10,146 @@ namespace fs = std::filesystem;
namespace DB namespace DB
{ {
BackupReaderFile::BackupReaderFile(const String & path_) : path(path_), log(&Poco::Logger::get("BackupReaderFile"))
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
BackupReaderFile::BackupReaderFile(const String & root_path_, const ContextPtr & context_)
: BackupReaderDefault(&Poco::Logger::get("BackupReaderFile"), context_)
, root_path(root_path_)
, data_source_description(DiskLocal::getLocalDataSourceDescription(root_path))
{ {
} }
BackupReaderFile::~BackupReaderFile() = default;
bool BackupReaderFile::fileExists(const String & file_name) bool BackupReaderFile::fileExists(const String & file_name)
{ {
return fs::exists(path / file_name); return fs::exists(root_path / file_name);
} }
UInt64 BackupReaderFile::getFileSize(const String & file_name) UInt64 BackupReaderFile::getFileSize(const String & file_name)
{ {
return fs::file_size(path / file_name); return fs::file_size(root_path / file_name);
} }
std::unique_ptr<SeekableReadBuffer> BackupReaderFile::readFile(const String & file_name) std::unique_ptr<SeekableReadBuffer> BackupReaderFile::readFile(const String & file_name)
{ {
return createReadBufferFromFileBase(path / file_name, {}); return createReadBufferFromFileBase(root_path / file_name, read_settings);
} }
void BackupReaderFile::copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path, void BackupReaderFile::copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
WriteMode write_mode, const WriteSettings & write_settings) DiskPtr destination_disk, const String & destination_path, WriteMode write_mode)
{ {
if (destination_disk->getDataSourceDescription() == getDataSourceDescription()) /// std::filesystem::copy() can copy from the filesystem only, and can't do throttling or appending.
bool has_throttling = static_cast<bool>(read_settings.local_throttler);
if (!has_throttling && (write_mode == WriteMode::Rewrite))
{
auto destination_data_source_description = destination_disk->getDataSourceDescription();
if (destination_data_source_description.sameKind(data_source_description)
&& (destination_data_source_description.is_encrypted == encrypted_in_backup))
{ {
/// Use more optimal way. /// Use more optimal way.
LOG_TRACE(log, "Copying {}/{} to disk {} locally", path, file_name, destination_disk->getName()); LOG_TRACE(log, "Copying file {} to disk {} locally", path_in_backup, destination_disk->getName());
fs::copy(path / file_name, fullPath(destination_disk, destination_path), fs::copy_options::overwrite_existing);
return; auto write_blob_function = [abs_source_path = root_path / path_in_backup, file_size](
const Strings & blob_path, WriteMode mode, const std::optional<ObjectAttributes> &) -> size_t
{
/// For local disks the size of a blob path is expected to be 1.
if (blob_path.size() != 1 || mode != WriteMode::Rewrite)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Blob writing function called with unexpected blob_path.size={} or mode={}",
blob_path.size(), mode);
fs::copy(abs_source_path, blob_path.at(0), fs::copy_options::overwrite_existing);
return file_size;
};
destination_disk->writeFileUsingBlobWritingFunction(destination_path, write_mode, write_blob_function);
return; /// copied!
}
} }
LOG_TRACE(log, "Copying {}/{} to disk {} through buffers", path, file_name, destination_disk->getName()); /// Fallback to copy through buffers.
IBackupReader::copyFileToDisk(path / file_name, size, destination_disk, destination_path, write_mode, write_settings); BackupReaderDefault::copyFileToDisk(path_in_backup, file_size, encrypted_in_backup, destination_disk, destination_path, write_mode);
} }
BackupWriterFile::BackupWriterFile(const String & path_, const ContextPtr & context_) BackupWriterFile::BackupWriterFile(const String & root_path_, const ContextPtr & context_)
: IBackupWriter(context_) : BackupWriterDefault(&Poco::Logger::get("BackupWriterFile"), context_)
, path(path_) , root_path(root_path_)
, data_source_description(DiskLocal::getLocalDataSourceDescription(root_path))
{ {
} }
BackupWriterFile::~BackupWriterFile() = default;
bool BackupWriterFile::fileExists(const String & file_name) bool BackupWriterFile::fileExists(const String & file_name)
{ {
return fs::exists(path / file_name); return fs::exists(root_path / file_name);
} }
UInt64 BackupWriterFile::getFileSize(const String & file_name) UInt64 BackupWriterFile::getFileSize(const String & file_name)
{ {
return fs::file_size(path / file_name); return fs::file_size(root_path / file_name);
} }
bool BackupWriterFile::fileContentsEqual(const String & file_name, const String & expected_file_contents) std::unique_ptr<ReadBuffer> BackupWriterFile::readFile(const String & file_name, size_t expected_file_size)
{ {
if (!fs::exists(path / file_name)) return createReadBufferFromFileBase(root_path / file_name, read_settings.adjustBufferSize(expected_file_size));
return false;
try
{
auto in = createReadBufferFromFileBase(path / file_name, {});
String actual_file_contents(expected_file_contents.size(), ' ');
return (in->read(actual_file_contents.data(), actual_file_contents.size()) == actual_file_contents.size())
&& (actual_file_contents == expected_file_contents) && in->eof();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
return false;
}
} }
std::unique_ptr<WriteBuffer> BackupWriterFile::writeFile(const String & file_name) std::unique_ptr<WriteBuffer> BackupWriterFile::writeFile(const String & file_name)
{ {
auto file_path = path / file_name; auto file_path = root_path / file_name;
fs::create_directories(file_path.parent_path()); fs::create_directories(file_path.parent_path());
return std::make_unique<WriteBufferFromFile>(file_path); return std::make_unique<WriteBufferFromFile>(file_path, write_buffer_size, -1, write_settings.local_throttler);
} }
void BackupWriterFile::removeFile(const String & file_name) void BackupWriterFile::removeFile(const String & file_name)
{ {
fs::remove(path / file_name); fs::remove(root_path / file_name);
if (fs::is_directory(path) && fs::is_empty(path)) if (fs::is_directory(root_path) && fs::is_empty(root_path))
fs::remove(path); fs::remove(root_path);
} }
void BackupWriterFile::removeFiles(const Strings & file_names) void BackupWriterFile::removeFiles(const Strings & file_names)
{ {
for (const auto & file_name : file_names) for (const auto & file_name : file_names)
fs::remove(path / file_name); fs::remove(root_path / file_name);
if (fs::is_directory(path) && fs::is_empty(path)) if (fs::is_directory(root_path) && fs::is_empty(root_path))
fs::remove(path); fs::remove(root_path);
} }
DataSourceDescription BackupWriterFile::getDataSourceDescription() const void BackupWriterFile::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
bool copy_encrypted, UInt64 start_pos, UInt64 length)
{ {
DataSourceDescription data_source_description; /// std::filesystem::copy() can copy from the filesystem only, and can't do throttling or copy a part of the file.
bool has_throttling = static_cast<bool>(read_settings.local_throttler);
data_source_description.type = DataSourceType::Local; if (!has_throttling)
if (auto block_device_id = tryGetBlockDeviceId(path); block_device_id.has_value())
data_source_description.description = *block_device_id;
else
data_source_description.description = path;
data_source_description.is_encrypted = false;
data_source_description.is_cached = false;
return data_source_description;
}
DataSourceDescription BackupReaderFile::getDataSourceDescription() const
{ {
DataSourceDescription data_source_description; auto source_data_source_description = src_disk->getDataSourceDescription();
if (source_data_source_description.sameKind(data_source_description)
data_source_description.type = DataSourceType::Local; && (source_data_source_description.is_encrypted == copy_encrypted))
if (auto block_device_id = tryGetBlockDeviceId(path); block_device_id.has_value())
data_source_description.description = *block_device_id;
else
data_source_description.description = path;
data_source_description.is_encrypted = false;
data_source_description.is_cached = false;
return data_source_description;
}
bool BackupWriterFile::supportNativeCopy(DataSourceDescription data_source_description) const
{ {
return data_source_description == getDataSourceDescription(); /// std::filesystem::copy() can copy from a single file only.
} if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 1)
void BackupWriterFile::copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name)
{ {
std::string abs_source_path; auto abs_source_path = blob_path[0];
if (src_disk)
abs_source_path = fullPath(src_disk, src_file_name);
else
abs_source_path = fs::absolute(src_file_name);
if (has_throttling || (src_offset != 0) || (src_size != fs::file_size(abs_source_path))) /// std::filesystem::copy() can copy a file as a whole only.
if ((start_pos == 0) && (length == fs::file_size(abs_source_path)))
{ {
auto create_read_buffer = [this, abs_source_path] { return createReadBufferFromFileBase(abs_source_path, read_settings); }; /// Use more optimal way.
copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name); LOG_TRACE(log, "Copying file {} from disk {} locally", src_path, src_disk->getName());
return; auto abs_dest_path = root_path / path_in_backup;
fs::create_directories(abs_dest_path.parent_path());
fs::copy(abs_source_path, abs_dest_path, fs::copy_options::overwrite_existing);
return; /// copied!
}
}
}
} }
auto file_path = path / dest_file_name; /// Fallback to copy through buffers.
fs::create_directories(file_path.parent_path()); BackupWriterDefault::copyFileFromDisk(path_in_backup, src_disk, src_path, copy_encrypted, start_pos, length);
fs::copy(abs_source_path, file_path, fs::copy_options::overwrite_existing);
} }
} }

View File

@ -1,48 +1,51 @@
#pragma once #pragma once
#include <Backups/BackupIO_Default.h>
#include <Disks/DiskType.h>
#include <filesystem> #include <filesystem>
#include <Backups/BackupIO.h>
#include <Interpreters/Context_fwd.h>
namespace DB namespace DB
{ {
class BackupReaderFile : public IBackupReader class BackupReaderFile : public BackupReaderDefault
{ {
public: public:
explicit BackupReaderFile(const String & path_); explicit BackupReaderFile(const String & root_path_, const ContextPtr & context_);
~BackupReaderFile() override;
bool fileExists(const String & file_name) override; bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override; UInt64 getFileSize(const String & file_name) override;
std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override; std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override;
void copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
WriteMode write_mode, const WriteSettings & write_settings) override; void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
DataSourceDescription getDataSourceDescription() const override; DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) override;
private: private:
std::filesystem::path path; const std::filesystem::path root_path;
Poco::Logger * log; const DataSourceDescription data_source_description;
}; };
class BackupWriterFile : public IBackupWriter class BackupWriterFile : public BackupWriterDefault
{ {
public: public:
explicit BackupWriterFile(const String & path_, const ContextPtr & context_); BackupWriterFile(const String & root_path_, const ContextPtr & context_);
~BackupWriterFile() override;
bool fileExists(const String & file_name) override; bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override; UInt64 getFileSize(const String & file_name) override;
bool fileContentsEqual(const String & file_name, const String & expected_file_contents) override;
std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override; std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
void removeFile(const String & file_name) override; void removeFile(const String & file_name) override;
void removeFiles(const Strings & file_names) override; void removeFiles(const Strings & file_names) override;
DataSourceDescription getDataSourceDescription() const override;
bool supportNativeCopy(DataSourceDescription data_source_description) const override;
void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name) override;
private: private:
std::filesystem::path path; std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;
const std::filesystem::path root_path;
const DataSourceDescription data_source_description;
}; };
} }

View File

@ -2,7 +2,6 @@
#if USE_AWS_S3 #if USE_AWS_S3
#include <Common/quoteString.h> #include <Common/quoteString.h>
#include <Disks/ObjectStorages/S3/copyS3FileToDisk.h>
#include <Interpreters/threadPoolCallbackRunner.h> #include <Interpreters/threadPoolCallbackRunner.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <IO/SharedThreadPools.h> #include <IO/SharedThreadPools.h>
@ -12,6 +11,7 @@
#include <IO/S3/copyS3File.h> #include <IO/S3/copyS3File.h>
#include <IO/S3/Client.h> #include <IO/S3/Client.h>
#include <IO/S3/Credentials.h> #include <IO/S3/Credentials.h>
#include <Disks/IDisk.h>
#include <Poco/Util/AbstractConfiguration.h> #include <Poco/Util/AbstractConfiguration.h>
@ -102,21 +102,15 @@ namespace
BackupReaderS3::BackupReaderS3( BackupReaderS3::BackupReaderS3(
const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_) const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_)
: s3_uri(s3_uri_) : BackupReaderDefault(&Poco::Logger::get("BackupReaderS3"), context_)
, s3_uri(s3_uri_)
, client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_))
, read_settings(context_->getReadSettings())
, request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings) , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings)
, log(&Poco::Logger::get("BackupReaderS3")) , data_source_description{DataSourceType::S3, s3_uri.endpoint, false, false}
{ {
request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
} }
DataSourceDescription BackupReaderS3::getDataSourceDescription() const
{
return DataSourceDescription{DataSourceType::S3, s3_uri.endpoint, false, false};
}
BackupReaderS3::~BackupReaderS3() = default; BackupReaderS3::~BackupReaderS3() = default;
bool BackupReaderS3::fileExists(const String & file_name) bool BackupReaderS3::fileExists(const String & file_name)
@ -138,75 +132,98 @@ std::unique_ptr<SeekableReadBuffer> BackupReaderS3::readFile(const String & file
client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, request_settings, read_settings); client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, request_settings, read_settings);
} }
void BackupReaderS3::copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path, void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
WriteMode write_mode, const WriteSettings & write_settings) DiskPtr destination_disk, const String & destination_path, WriteMode write_mode)
{ {
LOG_TRACE(log, "Copying {} to disk {}", file_name, destination_disk->getName()); /// Use the native copy as a more optimal way to copy a file from S3 to S3 if it's possible.
/// We don't check for `has_throttling` here because the native copy almost doesn't use network.
auto destination_data_source_description = destination_disk->getDataSourceDescription();
if (destination_data_source_description.sameKind(data_source_description)
&& (destination_data_source_description.is_encrypted == encrypted_in_backup))
{
/// Use native copy, the more optimal way.
LOG_TRACE(log, "Copying {} from S3 to disk {} using native copy", path_in_backup, destination_disk->getName());
auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional<ObjectAttributes> & object_attributes) -> size_t
{
/// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files.
if (blob_path.size() != 2 || mode != WriteMode::Rewrite)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Blob writing function called with unexpected blob_path.size={} or mode={}",
blob_path.size(), mode);
copyS3FileToDisk( copyS3File(
client, client,
s3_uri.bucket, s3_uri.bucket,
fs::path(s3_uri.key) / file_name, fs::path(s3_uri.key) / path_in_backup,
s3_uri.version_id,
0, 0,
size, file_size,
destination_disk, /* dest_bucket= */ blob_path[1],
destination_path, /* dest_key= */ blob_path[0],
write_mode,
read_settings,
write_settings,
request_settings, request_settings,
threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupReaderS3")); object_attributes,
threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupReaderS3"),
/* for_disk_s3= */ true);
return file_size;
};
destination_disk->writeFileUsingBlobWritingFunction(destination_path, write_mode, write_blob_function);
return; /// copied!
}
/// Fallback to copy through buffers.
BackupReaderDefault::copyFileToDisk(path_in_backup, file_size, encrypted_in_backup, destination_disk, destination_path, write_mode);
} }
BackupWriterS3::BackupWriterS3( BackupWriterS3::BackupWriterS3(
const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_) const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_)
: IBackupWriter(context_) : BackupWriterDefault(&Poco::Logger::get("BackupWriterS3"), context_)
, s3_uri(s3_uri_) , s3_uri(s3_uri_)
, client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_))
, request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings) , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings)
, log(&Poco::Logger::get("BackupWriterS3")) , data_source_description{DataSourceType::S3, s3_uri.endpoint, false, false}
{ {
request_settings.updateFromSettings(context_->getSettingsRef()); request_settings.updateFromSettings(context_->getSettingsRef());
request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
} }
DataSourceDescription BackupWriterS3::getDataSourceDescription() const void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
bool copy_encrypted, UInt64 start_pos, UInt64 length)
{ {
return DataSourceDescription{DataSourceType::S3, s3_uri.endpoint, false, false}; /// Use the native copy as a more optimal way to copy a file from S3 to S3 if it's possible.
} /// We don't check for `has_throttling` here because the native copy almost doesn't use network.
auto source_data_source_description = src_disk->getDataSourceDescription();
bool BackupWriterS3::supportNativeCopy(DataSourceDescription data_source_description) const if (source_data_source_description.sameKind(data_source_description) && (source_data_source_description.is_encrypted == copy_encrypted))
{ {
return getDataSourceDescription() == data_source_description; /// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in S3 bucket.
} /// In this case we can't use the native copy.
if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2)
void BackupWriterS3::copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name)
{ {
if (!src_disk) /// Use native copy, the more optimal way.
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot natively copy data to disk without source disk"); LOG_TRACE(log, "Copying file {} from disk {} to S3 using native copy", src_path, src_disk->getName());
copyS3File(
auto objects = src_disk->getStorageObjects(src_file_name); client,
if (objects.size() > 1) /* src_bucket */ blob_path[1],
{ /* src_key= */ blob_path[0],
auto create_read_buffer = [this, src_disk, src_file_name] { return src_disk->readFile(src_file_name, read_settings); }; start_pos,
copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name); length,
} s3_uri.bucket,
else fs::path(s3_uri.key) / path_in_backup,
{ request_settings,
auto object_storage = src_disk->getObjectStorage(); {},
std::string src_bucket = object_storage->getObjectsNamespace();
auto file_path = fs::path(s3_uri.key) / dest_file_name;
copyS3File(client, src_bucket, objects[0].remote_path, src_offset, src_size, s3_uri.bucket, file_path, request_settings, {},
threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3")); threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"));
return; /// copied!
} }
} }
void BackupWriterS3::copyDataToFile( /// Fallback to copy through buffers.
const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name) BackupWriterDefault::copyFileFromDisk(path_in_backup, src_disk, src_path, copy_encrypted, start_pos, length);
}
void BackupWriterS3::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
{ {
copyDataToS3File(create_read_buffer, offset, size, client, s3_uri.bucket, fs::path(s3_uri.key) / dest_file_name, request_settings, {}, copyDataToS3File(create_read_buffer, start_pos, length, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, request_settings, {},
threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3")); threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"));
} }
@ -225,24 +242,11 @@ UInt64 BackupWriterS3::getFileSize(const String & file_name)
return objects[0].GetSize(); return objects[0].GetSize();
} }
bool BackupWriterS3::fileContentsEqual(const String & file_name, const String & expected_file_contents) std::unique_ptr<ReadBuffer> BackupWriterS3::readFile(const String & file_name, size_t expected_file_size)
{ {
if (listObjects(*client, s3_uri, file_name).empty()) return std::make_unique<ReadBufferFromS3>(
return false; client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, request_settings, read_settings,
false, 0, 0, false, expected_file_size);
try
{
auto in = std::make_unique<ReadBufferFromS3>(
client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, request_settings, read_settings);
String actual_file_contents(expected_file_contents.size(), ' ');
return (in->read(actual_file_contents.data(), actual_file_contents.size()) == actual_file_contents.size())
&& (actual_file_contents == expected_file_contents) && in->eof();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
return false;
}
} }
std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name) std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
@ -253,7 +257,8 @@ std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
fs::path(s3_uri.key) / file_name, fs::path(s3_uri.key) / file_name,
request_settings, request_settings,
std::nullopt, std::nullopt,
threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3")); threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"),
write_settings);
} }
void BackupWriterS3::removeFile(const String & file_name) void BackupWriterS3::removeFile(const String & file_name)

View File

@ -3,8 +3,8 @@
#include "config.h" #include "config.h"
#if USE_AWS_S3 #if USE_AWS_S3
#include <Backups/BackupIO.h> #include <Backups/BackupIO_Default.h>
#include <IO/ReadSettings.h> #include <Disks/DiskType.h>
#include <IO/S3Common.h> #include <IO/S3Common.h>
#include <Storages/StorageS3Settings.h> #include <Storages/StorageS3Settings.h>
#include <Interpreters/Context_fwd.h> #include <Interpreters/Context_fwd.h>
@ -14,7 +14,7 @@ namespace DB
{ {
/// Represents a backup stored to AWS S3. /// Represents a backup stored to AWS S3.
class BackupReaderS3 : public IBackupReader class BackupReaderS3 : public BackupReaderDefault
{ {
public: public:
BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_); BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_);
@ -23,20 +23,19 @@ public:
bool fileExists(const String & file_name) override; bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override; UInt64 getFileSize(const String & file_name) override;
std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override; std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override;
void copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
WriteMode write_mode, const WriteSettings & write_settings) override; void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
DataSourceDescription getDataSourceDescription() const override; DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) override;
private: private:
S3::URI s3_uri; const S3::URI s3_uri;
std::shared_ptr<S3::Client> client; const std::shared_ptr<S3::Client> client;
ReadSettings read_settings;
S3Settings::RequestSettings request_settings; S3Settings::RequestSettings request_settings;
Poco::Logger * log; const DataSourceDescription data_source_description;
}; };
class BackupWriterS3 : public IBackupWriter class BackupWriterS3 : public BackupWriterDefault
{ {
public: public:
BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_); BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_);
@ -44,42 +43,24 @@ public:
bool fileExists(const String & file_name) override; bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override; UInt64 getFileSize(const String & file_name) override;
bool fileContentsEqual(const String & file_name, const String & expected_file_contents) override;
std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override; std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
void copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name) override; void copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) override;
void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
void removeFile(const String & file_name) override; void removeFile(const String & file_name) override;
void removeFiles(const Strings & file_names) override; void removeFiles(const Strings & file_names) override;
DataSourceDescription getDataSourceDescription() const override;
bool supportNativeCopy(DataSourceDescription data_source_description) const override;
void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name) override;
private: private:
void copyObjectImpl( std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;
const String & src_bucket,
const String & src_key,
const String & dst_bucket,
const String & dst_key,
size_t size,
const std::optional<ObjectAttributes> & metadata = std::nullopt) const;
void copyObjectMultipartImpl(
const String & src_bucket,
const String & src_key,
const String & dst_bucket,
const String & dst_key,
size_t size,
const std::optional<ObjectAttributes> & metadata = std::nullopt) const;
void removeFilesBatch(const Strings & file_names); void removeFilesBatch(const Strings & file_names);
S3::URI s3_uri; const S3::URI s3_uri;
std::shared_ptr<S3::Client> client; const std::shared_ptr<S3::Client> client;
S3Settings::RequestSettings request_settings; S3Settings::RequestSettings request_settings;
Poco::Logger * log;
std::optional<bool> supports_batch_delete; std::optional<bool> supports_batch_delete;
const DataSourceDescription data_source_description;
}; };
} }

View File

@ -36,6 +36,7 @@ namespace ErrorCodes
extern const int WRONG_BASE_BACKUP; extern const int WRONG_BASE_BACKUP;
extern const int BACKUP_ENTRY_NOT_FOUND; extern const int BACKUP_ENTRY_NOT_FOUND;
extern const int BACKUP_IS_EMPTY; extern const int BACKUP_IS_EMPTY;
extern const int CANNOT_RESTORE_TO_NONENCRYPTED_DISK;
extern const int FAILED_TO_SYNC_BACKUP_OR_RESTORE; extern const int FAILED_TO_SYNC_BACKUP_OR_RESTORE;
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
} }
@ -339,6 +340,8 @@ void BackupImpl::writeBackupMetadata()
} }
if (!info.data_file_name.empty() && (info.data_file_name != info.file_name)) if (!info.data_file_name.empty() && (info.data_file_name != info.file_name))
*out << "<data_file>" << xml << info.data_file_name << "</data_file>"; *out << "<data_file>" << xml << info.data_file_name << "</data_file>";
if (info.encrypted_by_disk)
*out << "<encrypted_by_disk>true</encrypted_by_disk>";
} }
total_size += info.size; total_size += info.size;
@ -444,6 +447,7 @@ void BackupImpl::readBackupMetadata()
{ {
info.data_file_name = getString(file_config, "data_file", info.file_name); info.data_file_name = getString(file_config, "data_file", info.file_name);
} }
info.encrypted_by_disk = getBool(file_config, "encrypted_by_disk", false);
} }
file_names.emplace(info.file_name, std::pair{info.size, info.checksum}); file_names.emplace(info.file_name, std::pair{info.size, info.checksum});
@ -633,6 +637,11 @@ std::unique_ptr<SeekableReadBuffer> BackupImpl::readFile(const String & file_nam
} }
std::unique_ptr<SeekableReadBuffer> BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) const std::unique_ptr<SeekableReadBuffer> BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) const
{
return readFileImpl(size_and_checksum, /* read_encrypted= */ false);
}
std::unique_ptr<SeekableReadBuffer> BackupImpl::readFileImpl(const SizeAndChecksum & size_and_checksum, bool read_encrypted) const
{ {
if (open_mode != OpenMode::READ) if (open_mode != OpenMode::READ)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is not opened for reading"); throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is not opened for reading");
@ -660,6 +669,14 @@ std::unique_ptr<SeekableReadBuffer> BackupImpl::readFile(const SizeAndChecksum &
info = it->second; info = it->second;
} }
if (info.encrypted_by_disk != read_encrypted)
{
throw Exception(
ErrorCodes::CANNOT_RESTORE_TO_NONENCRYPTED_DISK,
"File {} is encrypted in the backup, it can be restored only to an encrypted disk",
info.data_file_name);
}
std::unique_ptr<SeekableReadBuffer> read_buffer; std::unique_ptr<SeekableReadBuffer> read_buffer;
std::unique_ptr<SeekableReadBuffer> base_read_buffer; std::unique_ptr<SeekableReadBuffer> base_read_buffer;
@ -720,14 +737,14 @@ std::unique_ptr<SeekableReadBuffer> BackupImpl::readFile(const SizeAndChecksum &
} }
} }
size_t BackupImpl::copyFileToDisk(const String & file_name, DiskPtr destination_disk, const String & destination_path, size_t BackupImpl::copyFileToDisk(const String & file_name,
WriteMode write_mode, const WriteSettings & write_settings) const DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) const
{ {
return copyFileToDisk(getFileSizeAndChecksum(file_name), destination_disk, destination_path, write_mode, write_settings); return copyFileToDisk(getFileSizeAndChecksum(file_name), destination_disk, destination_path, write_mode);
} }
size_t BackupImpl::copyFileToDisk(const SizeAndChecksum & size_and_checksum, DiskPtr destination_disk, const String & destination_path, size_t BackupImpl::copyFileToDisk(const SizeAndChecksum & size_and_checksum,
WriteMode write_mode, const WriteSettings & write_settings) const DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) const
{ {
if (open_mode != OpenMode::READ) if (open_mode != OpenMode::READ)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is not opened for reading"); throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is not opened for reading");
@ -760,19 +777,26 @@ size_t BackupImpl::copyFileToDisk(const SizeAndChecksum & size_and_checksum, Dis
info = it->second; info = it->second;
} }
if (info.encrypted_by_disk && !destination_disk->getDataSourceDescription().is_encrypted)
{
throw Exception(
ErrorCodes::CANNOT_RESTORE_TO_NONENCRYPTED_DISK,
"File {} is encrypted in the backup, it can be restored only to an encrypted disk",
info.data_file_name);
}
bool file_copied = false; bool file_copied = false;
if (info.size && !info.base_size && !use_archive) if (info.size && !info.base_size && !use_archive)
{ {
/// Data comes completely from this backup. /// Data comes completely from this backup.
reader->copyFileToDisk(info.data_file_name, info.size, destination_disk, destination_path, write_mode, write_settings); reader->copyFileToDisk(info.data_file_name, info.size, info.encrypted_by_disk, destination_disk, destination_path, write_mode);
file_copied = true; file_copied = true;
} }
else if (info.size && (info.size == info.base_size)) else if (info.size && (info.size == info.base_size))
{ {
/// Data comes completely from the base backup (nothing comes from this backup). /// Data comes completely from the base backup (nothing comes from this backup).
base_backup->copyFileToDisk(std::pair{info.base_size, info.base_checksum}, destination_disk, destination_path, write_mode, write_settings); base_backup->copyFileToDisk(std::pair{info.base_size, info.base_checksum}, destination_disk, destination_path, write_mode);
file_copied = true; file_copied = true;
} }
@ -786,9 +810,13 @@ size_t BackupImpl::copyFileToDisk(const SizeAndChecksum & size_and_checksum, Dis
else else
{ {
/// Use the generic way to copy data. `readFile()` will update `num_read_files`. /// Use the generic way to copy data. `readFile()` will update `num_read_files`.
auto read_buffer = readFile(size_and_checksum); auto read_buffer = readFileImpl(size_and_checksum, /* read_encrypted= */ info.encrypted_by_disk);
auto write_buffer = destination_disk->writeFile(destination_path, std::min<size_t>(info.size, DBMS_DEFAULT_BUFFER_SIZE), std::unique_ptr<WriteBuffer> write_buffer;
write_mode, write_settings); size_t buf_size = std::min<size_t>(info.size, reader->getWriteBufferSize());
if (info.encrypted_by_disk)
write_buffer = destination_disk->writeEncryptedFile(destination_path, buf_size, write_mode, reader->getWriteSettings());
else
write_buffer = destination_disk->writeFile(destination_path, buf_size, write_mode, reader->getWriteSettings());
copyData(*read_buffer, *write_buffer, info.size); copyData(*read_buffer, *write_buffer, info.size);
write_buffer->finalize(); write_buffer->finalize();
} }
@ -805,72 +833,57 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
if (writing_finalized) if (writing_finalized)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is already finalized"); throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is already finalized");
std::string from_file_name = "memory buffer"; bool should_check_lock_file = false;
if (auto fname = entry->getFilePath(); !fname.empty())
from_file_name = "file " + fname;
{ {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
++num_files; ++num_files;
total_size += info.size; total_size += info.size;
if (!num_entries)
should_check_lock_file = true;
} }
auto src_disk = entry->getDisk();
auto src_file_path = entry->getFilePath();
bool from_immutable_file = entry->isFromImmutableFile();
String src_file_desc = src_file_path.empty() ? "memory buffer" : ("file " + src_file_path);
if (info.data_file_name.empty()) if (info.data_file_name.empty())
{ {
LOG_TRACE(log, "Writing backup for file {} from {}: skipped, {}", info.data_file_name, from_file_name, !info.size ? "empty" : "base backup has it"); LOG_TRACE(log, "Writing backup for file {} from {}: skipped, {}", info.data_file_name, src_file_desc, !info.size ? "empty" : "base backup has it");
return; return;
} }
if (!coordination->startWritingFile(info.data_file_index)) if (!coordination->startWritingFile(info.data_file_index))
{ {
LOG_TRACE(log, "Writing backup for file {} from {}: skipped, data file #{} is already being written", info.data_file_name, from_file_name, info.data_file_index); LOG_TRACE(log, "Writing backup for file {} from {}: skipped, data file #{} is already being written", info.data_file_name, src_file_desc, info.data_file_index);
return; return;
} }
LOG_TRACE(log, "Writing backup for file {} from {}: data file #{}", info.data_file_name, from_file_name, info.data_file_index); if (!should_check_lock_file)
auto writer_description = writer->getDataSourceDescription();
auto reader_description = entry->getDataSourceDescription();
/// We need to copy whole file without archive, we can do it faster
/// if source and destination are compatible
if (!use_archive && writer->supportNativeCopy(reader_description))
{
/// Should be much faster than writing data through server.
LOG_TRACE(log, "Will copy file {} using native copy", info.data_file_name);
/// NOTE: `mutex` must be unlocked here otherwise writing will be in one thread maximum and hence slow.
writer->copyFileNative(entry->tryGetDiskIfExists(), entry->getFilePath(), info.base_size, info.size - info.base_size, info.data_file_name);
}
else
{
bool has_entries = false;
{
std::lock_guard lock{mutex};
has_entries = num_entries > 0;
}
if (!has_entries)
checkLockFile(true); checkLockFile(true);
/// NOTE: `mutex` must be unlocked during copying otherwise writing will be in one thread maximum and hence slow.
if (use_archive) if (use_archive)
{ {
LOG_TRACE(log, "Adding file {} to archive", info.data_file_name); LOG_TRACE(log, "Writing backup for file {} from {}: data file #{}, adding to archive", info.data_file_name, src_file_desc, info.data_file_index);
auto out = archive_writer->writeFile(info.data_file_name); auto out = archive_writer->writeFile(info.data_file_name);
auto read_buffer = entry->getReadBuffer(); auto read_buffer = entry->getReadBuffer(writer->getReadSettings());
if (info.base_size != 0) if (info.base_size != 0)
read_buffer->seek(info.base_size, SEEK_SET); read_buffer->seek(info.base_size, SEEK_SET);
copyData(*read_buffer, *out); copyData(*read_buffer, *out);
out->finalize(); out->finalize();
} }
else if (src_disk && from_immutable_file)
{
LOG_TRACE(log, "Writing backup for file {} from {} (disk {}): data file #{}", info.data_file_name, src_file_desc, src_disk->getName(), info.data_file_index);
writer->copyFileFromDisk(info.data_file_name, src_disk, src_file_path, info.encrypted_by_disk, info.base_size, info.size - info.base_size);
}
else else
{ {
LOG_TRACE(log, "Will copy file {}", info.data_file_name); LOG_TRACE(log, "Writing backup for file {} from {}: data file #{}", info.data_file_name, src_file_desc, info.data_file_index);
auto create_read_buffer = [entry] { return entry->getReadBuffer(); }; auto create_read_buffer = [entry, read_settings = writer->getReadSettings()] { return entry->getReadBuffer(read_settings); };
writer->copyDataToFile(info.data_file_name, create_read_buffer, info.base_size, info.size - info.base_size);
/// NOTE: `mutex` must be unlocked here otherwise writing will be in one thread maximum and hence slow.
writer->copyDataToFile(create_read_buffer, info.base_size, info.size - info.base_size, info.data_file_name);
}
} }
{ {

View File

@ -76,10 +76,8 @@ public:
SizeAndChecksum getFileSizeAndChecksum(const String & file_name) const override; SizeAndChecksum getFileSizeAndChecksum(const String & file_name) const override;
std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) const override; std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) const override;
std::unique_ptr<SeekableReadBuffer> readFile(const SizeAndChecksum & size_and_checksum) const override; std::unique_ptr<SeekableReadBuffer> readFile(const SizeAndChecksum & size_and_checksum) const override;
size_t copyFileToDisk(const String & file_name, DiskPtr destination_disk, const String & destination_path, size_t copyFileToDisk(const String & file_name, DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) const override;
WriteMode write_mode, const WriteSettings & write_settings) const override; size_t copyFileToDisk(const SizeAndChecksum & size_and_checksum, DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) const override;
size_t copyFileToDisk(const SizeAndChecksum & size_and_checksum, DiskPtr destination_disk, const String & destination_path,
WriteMode write_mode, const WriteSettings & write_settings) const override;
void writeFile(const BackupFileInfo & info, BackupEntryPtr entry) override; void writeFile(const BackupFileInfo & info, BackupEntryPtr entry) override;
void finalizeWriting() override; void finalizeWriting() override;
bool supportsWritingInMultipleThreads() const override { return !use_archive; } bool supportsWritingInMultipleThreads() const override { return !use_archive; }
@ -109,6 +107,8 @@ private:
/// Calculates and sets `compressed_size`. /// Calculates and sets `compressed_size`.
void setCompressedSize(); void setCompressedSize();
std::unique_ptr<SeekableReadBuffer> readFileImpl(const SizeAndChecksum & size_and_checksum, bool read_encrypted) const;
const String backup_name_for_logging; const String backup_name_for_logging;
const bool use_archive; const bool use_archive;
const ArchiveParams archive_params; const ArchiveParams archive_params;

View File

@ -23,6 +23,7 @@ namespace ErrorCodes
M(String, password) \ M(String, password) \
M(Bool, structure_only) \ M(Bool, structure_only) \
M(Bool, async) \ M(Bool, async) \
M(Bool, decrypt_files_from_encrypted_disks) \
M(Bool, deduplicate_files) \ M(Bool, deduplicate_files) \
M(UInt64, shard_num) \ M(UInt64, shard_num) \
M(UInt64, replica_num) \ M(UInt64, replica_num) \

View File

@ -32,6 +32,9 @@ struct BackupSettings
/// Whether the BACKUP command must return immediately without waiting until the backup has completed. /// Whether the BACKUP command must return immediately without waiting until the backup has completed.
bool async = false; bool async = false;
/// Whether the BACKUP command should decrypt files stored on encrypted disks.
bool decrypt_files_from_encrypted_disks = false;
/// Whether the BACKUP will omit similar files (within one backup only). /// Whether the BACKUP will omit similar files (within one backup only).
bool deduplicate_files = true; bool deduplicate_files = true;

View File

@ -368,6 +368,7 @@ void BackupsWorker::doBackup(
/// Wait until all the hosts have written their backup entries. /// Wait until all the hosts have written their backup entries.
backup_coordination->waitForStage(Stage::COMPLETED); backup_coordination->waitForStage(Stage::COMPLETED);
backup_coordination->setStage(Stage::COMPLETED,"");
} }
else else
{ {
@ -654,12 +655,26 @@ void BackupsWorker::doRestore(
/// (If this isn't ON CLUSTER query RestorerFromBackup will check access rights later.) /// (If this isn't ON CLUSTER query RestorerFromBackup will check access rights later.)
ClusterPtr cluster; ClusterPtr cluster;
bool on_cluster = !restore_query->cluster.empty(); bool on_cluster = !restore_query->cluster.empty();
if (on_cluster) if (on_cluster)
{ {
restore_query->cluster = context->getMacros()->expand(restore_query->cluster); restore_query->cluster = context->getMacros()->expand(restore_query->cluster);
cluster = context->getCluster(restore_query->cluster); cluster = context->getCluster(restore_query->cluster);
restore_settings.cluster_host_ids = cluster->getHostIDs(); restore_settings.cluster_host_ids = cluster->getHostIDs();
}
/// Make a restore coordination.
if (!restore_coordination)
restore_coordination = makeRestoreCoordination(context, restore_settings, /* remote= */ on_cluster);
if (!allow_concurrent_restores && restore_coordination->hasConcurrentRestores(std::ref(num_active_restores)))
throw Exception(
ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED,
"Concurrent restores not supported, turn on setting 'allow_concurrent_restores'");
if (on_cluster)
{
/// We cannot just use access checking provided by the function executeDDLQueryOnCluster(): it would be incorrect /// We cannot just use access checking provided by the function executeDDLQueryOnCluster(): it would be incorrect
/// because different replicas can contain different set of tables and so the required access rights can differ too. /// because different replicas can contain different set of tables and so the required access rights can differ too.
/// So the right way is pass through the entire cluster and check access for each host. /// So the right way is pass through the entire cluster and check access for each host.
@ -676,15 +691,6 @@ void BackupsWorker::doRestore(
} }
} }
/// Make a restore coordination.
if (!restore_coordination)
restore_coordination = makeRestoreCoordination(context, restore_settings, /* remote= */ on_cluster);
if (!allow_concurrent_restores && restore_coordination->hasConcurrentRestores(std::ref(num_active_restores)))
throw Exception(
ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED,
"Concurrent restores not supported, turn on setting 'allow_concurrent_restores'");
/// Do RESTORE. /// Do RESTORE.
if (on_cluster) if (on_cluster)
{ {
@ -703,6 +709,7 @@ void BackupsWorker::doRestore(
/// Wait until all the hosts have written their backup entries. /// Wait until all the hosts have written their backup entries.
restore_coordination->waitForStage(Stage::COMPLETED); restore_coordination->waitForStage(Stage::COMPLETED);
restore_coordination->setStage(Stage::COMPLETED,"");
} }
else else
{ {

View File

@ -109,10 +109,10 @@ public:
/// Copies a file from the backup to a specified destination disk. Returns the number of bytes written. /// Copies a file from the backup to a specified destination disk. Returns the number of bytes written.
virtual size_t copyFileToDisk(const String & file_name, DiskPtr destination_disk, const String & destination_path, virtual size_t copyFileToDisk(const String & file_name, DiskPtr destination_disk, const String & destination_path,
WriteMode write_mode = WriteMode::Rewrite, const WriteSettings & write_settings = {}) const = 0; WriteMode write_mode = WriteMode::Rewrite) const = 0;
virtual size_t copyFileToDisk(const SizeAndChecksum & size_and_checksum, DiskPtr destination_disk, const String & destination_path, virtual size_t copyFileToDisk(const SizeAndChecksum & size_and_checksum, DiskPtr destination_disk, const String & destination_path,
WriteMode write_mode = WriteMode::Rewrite, const WriteSettings & write_settings = {}) const = 0; WriteMode write_mode = WriteMode::Rewrite) const = 0;
/// Puts a new entry to the backup. /// Puts a new entry to the backup.
virtual void writeFile(const BackupFileInfo & file_info, BackupEntryPtr entry) = 0; virtual void writeFile(const BackupFileInfo & file_info, BackupEntryPtr entry) = 0;

View File

@ -17,23 +17,16 @@ class IBackupEntriesLazyBatch::BackupEntryFromBatch : public IBackupEntry
public: public:
BackupEntryFromBatch(const std::shared_ptr<IBackupEntriesLazyBatch> & batch_, size_t index_) : batch(batch_), index(index_) { } BackupEntryFromBatch(const std::shared_ptr<IBackupEntriesLazyBatch> & batch_, size_t index_) : batch(batch_), index(index_) { }
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const override { return getInternalBackupEntry()->getReadBuffer(read_settings); }
UInt64 getSize() const override { return getInternalBackupEntry()->getSize(); } UInt64 getSize() const override { return getInternalBackupEntry()->getSize(); }
std::optional<UInt128> getChecksum() const override { return getInternalBackupEntry()->getChecksum(); } UInt128 getChecksum() const override { return getInternalBackupEntry()->getChecksum(); }
std::unique_ptr<SeekableReadBuffer> getReadBuffer() const override { return getInternalBackupEntry()->getReadBuffer(); } std::optional<UInt128> getPartialChecksum(size_t prefix_length) const override { return getInternalBackupEntry()->getPartialChecksum(prefix_length); }
String getFilePath() const override DataSourceDescription getDataSourceDescription() const override { return getInternalBackupEntry()->getDataSourceDescription(); }
{ bool isEncryptedByDisk() const override { return getInternalBackupEntry()->isEncryptedByDisk(); }
return getInternalBackupEntry()->getFilePath(); bool isFromFile() const override { return getInternalBackupEntry()->isFromFile(); }
} bool isFromImmutableFile() const override { return getInternalBackupEntry()->isFromImmutableFile(); }
String getFilePath() const override { return getInternalBackupEntry()->getFilePath(); }
DiskPtr tryGetDiskIfExists() const override DiskPtr getDisk() const override { return getInternalBackupEntry()->getDisk(); }
{
return getInternalBackupEntry()->tryGetDiskIfExists();
}
DataSourceDescription getDataSourceDescription() const override
{
return getInternalBackupEntry()->getDataSourceDescription();
}
private: private:
BackupEntryPtr getInternalBackupEntry() const BackupEntryPtr getInternalBackupEntry() const

View File

@ -20,16 +20,24 @@ public:
/// Returns the size of the data. /// Returns the size of the data.
virtual UInt64 getSize() const = 0; virtual UInt64 getSize() const = 0;
/// Returns the checksum of the data if it's precalculated. /// Returns the checksum of the data.
/// Can return nullopt which means the checksum should be calculated from the read buffer. virtual UInt128 getChecksum() const = 0;
virtual std::optional<UInt128> getChecksum() const { return {}; }
/// Returns a partial checksum, i.e. the checksum calculated for a prefix part of the data.
/// Can return nullopt if the partial checksum is too difficult to calculate.
virtual std::optional<UInt128> getPartialChecksum(size_t /* prefix_length */) const { return {}; }
/// Returns a read buffer for reading the data. /// Returns a read buffer for reading the data.
virtual std::unique_ptr<SeekableReadBuffer> getReadBuffer() const = 0; virtual std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const = 0;
virtual String getFilePath() const = 0; /// Returns true if the data returned by getReadBuffer() is encrypted by an encrypted disk.
virtual bool isEncryptedByDisk() const { return false; }
virtual DiskPtr tryGetDiskIfExists() const = 0; /// Returns information about disk and file if this backup entry is generated from a file.
virtual bool isFromFile() const { return false; }
virtual bool isFromImmutableFile() const { return false; }
virtual String getFilePath() const { return ""; }
virtual DiskPtr getDisk() const { return nullptr; }
virtual DataSourceDescription getDataSourceDescription() const = 0; virtual DataSourceDescription getDataSourceDescription() const = 0;
}; };

View File

@ -93,7 +93,10 @@ void RestoreCoordinationRemote::createRootNodes()
void RestoreCoordinationRemote::setStage(const String & new_stage, const String & message) void RestoreCoordinationRemote::setStage(const String & new_stage, const String & message)
{ {
if (is_internal)
stage_sync->set(current_host, new_stage, message); stage_sync->set(current_host, new_stage, message);
else
stage_sync->set(current_host, new_stage, /* message */ "", /* all_hosts */ true);
} }
void RestoreCoordinationRemote::setError(const Exception & exception) void RestoreCoordinationRemote::setError(const Exception & exception)
@ -283,8 +286,8 @@ bool RestoreCoordinationRemote::hasConcurrentRestores(const std::atomic<size_t>
String status; String status;
if (zk->tryGet(root_zookeeper_path + "/" + existing_restore_path + "/stage", status)) if (zk->tryGet(root_zookeeper_path + "/" + existing_restore_path + "/stage", status))
{ {
/// If status is not COMPLETED it could be because the restore failed, check if 'error' exists /// Check if some other restore is in progress
if (status != Stage::COMPLETED && !zk->exists(root_zookeeper_path + "/" + existing_restore_path + "/error")) if (status == Stage::SCHEDULED_TO_START)
{ {
LOG_WARNING(log, "Found a concurrent restore: {}, current restore: {}", existing_restore_uuid, toString(restore_uuid)); LOG_WARNING(log, "Found a concurrent restore: {}, current restore: {}", existing_restore_uuid, toString(restore_uuid));
result = true; result = true;

View File

@ -169,9 +169,9 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
{ {
std::shared_ptr<IBackupReader> reader; std::shared_ptr<IBackupReader> reader;
if (engine_name == "File") if (engine_name == "File")
reader = std::make_shared<BackupReaderFile>(path); reader = std::make_shared<BackupReaderFile>(path, params.context);
else else
reader = std::make_shared<BackupReaderDisk>(disk, path); reader = std::make_shared<BackupReaderDisk>(disk, path, params.context);
return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context); return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context);
} }
else else

View File

@ -579,6 +579,7 @@
M(694, ASYNC_LOAD_CYCLE) \ M(694, ASYNC_LOAD_CYCLE) \
M(695, ASYNC_LOAD_FAILED) \ M(695, ASYNC_LOAD_FAILED) \
M(696, ASYNC_LOAD_CANCELED) \ M(696, ASYNC_LOAD_CANCELED) \
M(697, CANNOT_RESTORE_TO_NONENCRYPTED_DISK) \
\ \
M(999, KEEPER_EXCEPTION) \ M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \ M(1000, POCO_EXCEPTION) \

View File

@ -779,6 +779,7 @@ class IColumn;
MAKE_OBSOLETE(M, UInt64, partial_merge_join_optimizations, 0) \ MAKE_OBSOLETE(M, UInt64, partial_merge_join_optimizations, 0) \
MAKE_OBSOLETE(M, MaxThreads, max_alter_threads, 0) \ MAKE_OBSOLETE(M, MaxThreads, max_alter_threads, 0) \
MAKE_OBSOLETE(M, Bool, allow_experimental_projection_optimization, true) \ MAKE_OBSOLETE(M, Bool, allow_experimental_projection_optimization, true) \
MAKE_OBSOLETE(M, Bool, allow_experimental_query_cache, true) \
/* moved to config.xml: see also src/Core/ServerSettings.h */ \ /* moved to config.xml: see also src/Core/ServerSettings.h */ \
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_buffer_flush_schedule_pool_size, 16) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_buffer_flush_schedule_pool_size, 16) \
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_pool_size, 16) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_pool_size, 16) \

View File

@ -348,6 +348,23 @@ size_t DiskEncrypted::getFileSize(const String & path) const
return size > FileEncryption::Header::kSize ? (size - FileEncryption::Header::kSize) : 0; return size > FileEncryption::Header::kSize ? (size - FileEncryption::Header::kSize) : 0;
} }
UInt128 DiskEncrypted::getEncryptedFileIV(const String & path) const
{
auto wrapped_path = wrappedPath(path);
auto read_buffer = delegate->readFile(wrapped_path, ReadSettings().adjustBufferSize(FileEncryption::Header::kSize));
if (read_buffer->eof())
return 0;
auto header = readHeader(*read_buffer);
return header.init_vector.get();
}
size_t DiskEncrypted::getEncryptedFileSize(size_t unencrypted_size) const
{
if (unencrypted_size)
return unencrypted_size + FileEncryption::Header::kSize;
return 0;
}
void DiskEncrypted::truncateFile(const String & path, size_t size) void DiskEncrypted::truncateFile(const String & path, size_t size)
{ {
auto wrapped_path = wrappedPath(path); auto wrapped_path = wrappedPath(path);

View File

@ -186,6 +186,46 @@ public:
delegate->removeSharedFileIfExists(wrapped_path, flag); delegate->removeSharedFileIfExists(wrapped_path, flag);
} }
Strings getBlobPath(const String & path) const override
{
auto wrapped_path = wrappedPath(path);
return delegate->getBlobPath(wrapped_path);
}
void writeFileUsingBlobWritingFunction(const String & path, WriteMode mode, WriteBlobFunction && write_blob_function) override
{
auto wrapped_path = wrappedPath(path);
delegate->writeFileUsingBlobWritingFunction(wrapped_path, mode, std::move(write_blob_function));
}
std::unique_ptr<ReadBufferFromFileBase> readEncryptedFile(const String & path, const ReadSettings & settings) const override
{
auto wrapped_path = wrappedPath(path);
return delegate->readFile(wrapped_path, settings);
}
std::unique_ptr<WriteBufferFromFileBase> writeEncryptedFile(
const String & path,
size_t buf_size,
WriteMode mode,
const WriteSettings & settings) const override
{
auto wrapped_path = wrappedPath(path);
return delegate->writeFile(wrapped_path, buf_size, mode, settings);
}
size_t getEncryptedFileSize(const String & path) const override
{
auto wrapped_path = wrappedPath(path);
return delegate->getFileSize(wrapped_path);
}
size_t getEncryptedFileSize(size_t unencrypted_size) const override;
UInt128 getEncryptedFileIV(const String & path) const override;
static size_t convertFileSizeToEncryptedFileSize(size_t file_size);
void setLastModified(const String & path, const Poco::Timestamp & timestamp) override void setLastModified(const String & path, const Poco::Timestamp & timestamp) override
{ {
auto wrapped_path = wrappedPath(path); auto wrapped_path = wrappedPath(path);

View File

@ -328,6 +328,18 @@ DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode, const
fs::path(disk_path) / path, buf_size, flags, settings.local_throttler); fs::path(disk_path) / path, buf_size, flags, settings.local_throttler);
} }
std::vector<String> DiskLocal::getBlobPath(const String & path) const
{
auto fs_path = fs::path(disk_path) / path;
return {fs_path};
}
void DiskLocal::writeFileUsingBlobWritingFunction(const String & path, WriteMode mode, WriteBlobFunction && write_blob_function)
{
auto fs_path = fs::path(disk_path) / path;
std::move(write_blob_function)({fs_path}, mode, {});
}
void DiskLocal::removeFile(const String & path) void DiskLocal::removeFile(const String & path)
{ {
auto fs_path = fs::path(disk_path) / path; auto fs_path = fs::path(disk_path) / path;
@ -455,15 +467,8 @@ DiskLocal::DiskLocal(const String & name_, const String & path_, UInt64 keep_fre
, disk_path(path_) , disk_path(path_)
, keep_free_space_bytes(keep_free_space_bytes_) , keep_free_space_bytes(keep_free_space_bytes_)
, logger(&Poco::Logger::get("DiskLocal")) , logger(&Poco::Logger::get("DiskLocal"))
, data_source_description(getLocalDataSourceDescription(disk_path))
{ {
data_source_description.type = DataSourceType::Local;
if (auto block_device_id = tryGetBlockDeviceId(disk_path); block_device_id.has_value())
data_source_description.description = *block_device_id;
else
data_source_description.description = disk_path;
data_source_description.is_encrypted = false;
data_source_description.is_cached = false;
} }
DiskLocal::DiskLocal( DiskLocal::DiskLocal(
@ -479,6 +484,20 @@ DataSourceDescription DiskLocal::getDataSourceDescription() const
return data_source_description; return data_source_description;
} }
DataSourceDescription DiskLocal::getLocalDataSourceDescription(const String & path)
{
DataSourceDescription res;
res.type = DataSourceType::Local;
if (auto block_device_id = tryGetBlockDeviceId(path); block_device_id.has_value())
res.description = *block_device_id;
else
res.description = path;
res.is_encrypted = false;
res.is_cached = false;
return res;
}
void DiskLocal::shutdown() void DiskLocal::shutdown()
{ {
if (disk_checker) if (disk_checker)

View File

@ -81,6 +81,9 @@ public:
WriteMode mode, WriteMode mode,
const WriteSettings & settings) override; const WriteSettings & settings) override;
Strings getBlobPath(const String & path) const override;
void writeFileUsingBlobWritingFunction(const String & path, WriteMode mode, WriteBlobFunction && write_blob_function) override;
void removeFile(const String & path) override; void removeFile(const String & path) override;
void removeFileIfExists(const String & path) override; void removeFileIfExists(const String & path) override;
void removeDirectory(const String & path) override; void removeDirectory(const String & path) override;
@ -99,6 +102,7 @@ public:
void truncateFile(const String & path, size_t size) override; void truncateFile(const String & path, size_t size) override;
DataSourceDescription getDataSourceDescription() const override; DataSourceDescription getDataSourceDescription() const override;
static DataSourceDescription getLocalDataSourceDescription(const String & path);
bool isRemote() const override { return false; } bool isRemote() const override { return false; }

View File

@ -8,4 +8,9 @@ bool DataSourceDescription::operator==(const DataSourceDescription & other) cons
return std::tie(type, description, is_encrypted) == std::tie(other.type, other.description, other.is_encrypted); return std::tie(type, description, is_encrypted) == std::tie(other.type, other.description, other.is_encrypted);
} }
bool DataSourceDescription::sameKind(const DataSourceDescription & other) const
{
return std::tie(type, description) == std::tie(other.type, other.description);
}
} }

View File

@ -51,6 +51,7 @@ struct DataSourceDescription
bool is_cached = false; bool is_cached = false;
bool operator==(const DataSourceDescription & other) const; bool operator==(const DataSourceDescription & other) const;
bool sameKind(const DataSourceDescription & other) const;
}; };
} }

View File

@ -69,13 +69,9 @@ public:
return disk.writeFile(path, buf_size, mode, settings); return disk.writeFile(path, buf_size, mode, settings);
} }
void writeFileUsingCustomWriteObject( void writeFileUsingBlobWritingFunction(const String & path, WriteMode mode, WriteBlobFunction && write_blob_function) override
const String & path,
WriteMode mode,
std::function<size_t(const StoredObject & object, WriteMode mode, const std::optional<ObjectAttributes> & object_attributes)>
custom_write_object_function) override
{ {
disk.writeFileUsingCustomWriteObject(path, mode, std::move(custom_write_object_function)); disk.writeFileUsingBlobWritingFunction(path, mode, std::move(write_blob_function));
} }
void removeFile(const std::string & path) override void removeFile(const std::string & path) override

View File

@ -35,16 +35,6 @@ void IDisk::copyFile(const String & from_file_path, IDisk & to_disk, const Strin
out->finalize(); out->finalize();
} }
void IDisk::writeFileUsingCustomWriteObject(
const String &, WriteMode, std::function<size_t(const StoredObject &, WriteMode, const std::optional<ObjectAttributes> &)>)
{
throw Exception(
ErrorCodes::NOT_IMPLEMENTED,
"Method `writeFileUsingCustomWriteObject()` is not implemented for disk: {}",
getDataSourceDescription().type);
}
DiskTransactionPtr IDisk::createTransaction() DiskTransactionPtr IDisk::createTransaction()
{ {
return std::make_shared<FakeDiskTransaction>(*this); return std::make_shared<FakeDiskTransaction>(*this);
@ -62,6 +52,31 @@ void IDisk::removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_ba
} }
} }
std::unique_ptr<ReadBufferFromFileBase> IDisk::readEncryptedFile(const String &, const ReadSettings &) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "File encryption is not implemented for disk of type {}", getDataSourceDescription().type);
}
std::unique_ptr<WriteBufferFromFileBase> IDisk::writeEncryptedFile(const String &, size_t, WriteMode, const WriteSettings &) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "File encryption is not implemented for disk of type {}", getDataSourceDescription().type);
}
size_t IDisk::getEncryptedFileSize(const String &) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "File encryption is not implemented for disk of type {}", getDataSourceDescription().type);
}
size_t IDisk::getEncryptedFileSize(size_t) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "File encryption is not implemented for disk of type {}", getDataSourceDescription().type);
}
UInt128 IDisk::getEncryptedFileIV(const String &) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "File encryption is not implemented for disk of type {}", getDataSourceDescription().type);
}
using ResultsCollector = std::vector<std::future<void>>; using ResultsCollector = std::vector<std::future<void>>;

View File

@ -209,15 +209,6 @@ public:
WriteMode mode = WriteMode::Rewrite, WriteMode mode = WriteMode::Rewrite,
const WriteSettings & settings = {}) = 0; const WriteSettings & settings = {}) = 0;
/// Write a file using a custom function to write an object to the disk's object storage.
/// This method is alternative to writeFile(), the difference is that writeFile() calls IObjectStorage::writeObject()
/// to write an object to the object storage while this method allows to specify a callback for that.
virtual void writeFileUsingCustomWriteObject(
const String & path,
WriteMode mode,
std::function<size_t(const StoredObject & object, WriteMode mode, const std::optional<ObjectAttributes> & object_attributes)>
custom_write_object_function);
/// Remove file. Throws exception if file doesn't exists or it's a directory. /// Remove file. Throws exception if file doesn't exists or it's a directory.
/// Return whether file was finally removed. (For remote disks it is not always removed). /// Return whether file was finally removed. (For remote disks it is not always removed).
virtual void removeFile(const String & path) = 0; virtual void removeFile(const String & path) = 0;
@ -247,6 +238,34 @@ public:
/// Second bool param is a flag to remove (true) or keep (false) shared data on S3 /// Second bool param is a flag to remove (true) or keep (false) shared data on S3
virtual void removeSharedFileIfExists(const String & path, bool /* keep_shared_data */) { removeFileIfExists(path); } virtual void removeSharedFileIfExists(const String & path, bool /* keep_shared_data */) { removeFileIfExists(path); }
/// Returns the path to a blob representing a specified file.
/// The meaning of the returned path depends on disk's type.
/// E.g. for DiskLocal it's the absolute path to the file and for DiskObjectStorage it's
/// StoredObject::remote_path for each stored object combined with the name of the objects' namespace.
virtual Strings getBlobPath(const String & path) const = 0;
using WriteBlobFunction = std::function<size_t(const Strings & blob_path, WriteMode mode, const std::optional<ObjectAttributes> & object_attributes)>;
/// Write a file using a custom function to write a blob representing the file.
/// This method is alternative to writeFile(), the difference is that for example for DiskObjectStorage
/// writeFile() calls IObjectStorage::writeObject() to write an object to the object storage while
/// this method allows to specify a callback for that.
virtual void writeFileUsingBlobWritingFunction(const String & path, WriteMode mode, WriteBlobFunction && write_blob_function) = 0;
/// Reads a file from an encrypted disk without decrypting it (only for encrypted disks).
virtual std::unique_ptr<ReadBufferFromFileBase> readEncryptedFile(const String & path, const ReadSettings & settings) const;
/// Writes an already encrypted file to the disk (only for encrypted disks).
virtual std::unique_ptr<WriteBufferFromFileBase> writeEncryptedFile(
const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) const;
/// Returns the size of an encrypted file (only for encrypted disks).
virtual size_t getEncryptedFileSize(const String & path) const;
virtual size_t getEncryptedFileSize(size_t unencrypted_size) const;
/// Returns IV of an encrypted file (only for encrypted disks).
virtual UInt128 getEncryptedFileIV(const String & path) const;
virtual const String & getCacheName() const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "There is no cache"); } virtual const String & getCacheName() const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "There is no cache"); }
virtual bool supportsCache() const { return false; } virtual bool supportsCache() const { return false; }

View File

@ -68,12 +68,10 @@ public:
const WriteSettings & settings = {}, const WriteSettings & settings = {},
bool autocommit = true) = 0; bool autocommit = true) = 0;
using WriteBlobFunction = std::function<size_t(const Strings & blob_path, WriteMode mode, const std::optional<ObjectAttributes> & object_attributes)>;
/// Write a file using a custom function to write an object to the disk's object storage. /// Write a file using a custom function to write an object to the disk's object storage.
virtual void writeFileUsingCustomWriteObject( virtual void writeFileUsingBlobWritingFunction(const String & path, WriteMode mode, WriteBlobFunction && write_blob_function) = 0;
const String & path,
WriteMode mode,
std::function<size_t(const StoredObject & object, WriteMode mode, const std::optional<ObjectAttributes> & object_attributes)>
custom_write_object_function) = 0;
/// Remove file. Throws exception if file doesn't exists or it's a directory. /// Remove file. Throws exception if file doesn't exists or it's a directory.
virtual void removeFile(const std::string & path) = 0; virtual void removeFile(const std::string & path) = 0;

View File

@ -579,15 +579,24 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorage::writeFile(
return result; return result;
} }
void DiskObjectStorage::writeFileUsingCustomWriteObject( Strings DiskObjectStorage::getBlobPath(const String & path) const
const String & path, {
WriteMode mode, auto objects = getStorageObjects(path);
std::function<size_t(const StoredObject & object, WriteMode mode, const std::optional<ObjectAttributes> & object_attributes)> Strings res;
custom_write_object_function) res.reserve(objects.size() + 1);
for (const auto & object : objects)
res.emplace_back(object.remote_path);
String objects_namespace = object_storage->getObjectsNamespace();
if (!objects_namespace.empty())
res.emplace_back(objects_namespace);
return res;
}
void DiskObjectStorage::writeFileUsingBlobWritingFunction(const String & path, WriteMode mode, WriteBlobFunction && write_blob_function)
{ {
LOG_TEST(log, "Write file: {}", path); LOG_TEST(log, "Write file: {}", path);
auto transaction = createObjectStorageTransaction(); auto transaction = createObjectStorageTransaction();
return transaction->writeFileUsingCustomWriteObject(path, mode, std::move(custom_write_object_function)); return transaction->writeFileUsingBlobWritingFunction(path, mode, std::move(write_blob_function));
} }
void DiskObjectStorage::applyNewSettings( void DiskObjectStorage::applyNewSettings(

View File

@ -149,11 +149,8 @@ public:
WriteMode mode, WriteMode mode,
const WriteSettings & settings) override; const WriteSettings & settings) override;
void writeFileUsingCustomWriteObject( Strings getBlobPath(const String & path) const override;
const String & path, void writeFileUsingBlobWritingFunction(const String & path, WriteMode mode, WriteBlobFunction && write_blob_function) override;
WriteMode mode,
std::function<size_t(const StoredObject & object, WriteMode mode, const std::optional<ObjectAttributes> & object_attributes)>
custom_write_object_function) override;
void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override; void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override;

View File

@ -669,11 +669,8 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorageTransaction::writeFile
} }
void DiskObjectStorageTransaction::writeFileUsingCustomWriteObject( void DiskObjectStorageTransaction::writeFileUsingBlobWritingFunction(
const String & path, const String & path, WriteMode mode, WriteBlobFunction && write_blob_function)
WriteMode mode,
std::function<size_t(const StoredObject & object, WriteMode mode, const std::optional<ObjectAttributes> & object_attributes)>
custom_write_object_function)
{ {
/// This function is a simplified and adapted version of DiskObjectStorageTransaction::writeFile(). /// This function is a simplified and adapted version of DiskObjectStorageTransaction::writeFile().
auto blob_name = object_storage.generateBlobNameForPath(path); auto blob_name = object_storage.generateBlobNameForPath(path);
@ -694,8 +691,16 @@ void DiskObjectStorageTransaction::writeFileUsingCustomWriteObject(
operations_to_execute.emplace_back(std::move(write_operation)); operations_to_execute.emplace_back(std::move(write_operation));
/// See DiskObjectStorage::getBlobPath().
Strings blob_path;
blob_path.reserve(2);
blob_path.emplace_back(object.remote_path);
String objects_namespace = object_storage.getObjectsNamespace();
if (!objects_namespace.empty())
blob_path.emplace_back(objects_namespace);
/// We always use mode Rewrite because we simulate append using metadata and different files /// We always use mode Rewrite because we simulate append using metadata and different files
size_t object_size = std::move(custom_write_object_function)(object, WriteMode::Rewrite, object_attributes); size_t object_size = std::move(write_blob_function)(blob_path, WriteMode::Rewrite, object_attributes);
/// Create metadata (see create_metadata_callback in DiskObjectStorageTransaction::writeFile()). /// Create metadata (see create_metadata_callback in DiskObjectStorageTransaction::writeFile()).
if (mode == WriteMode::Rewrite) if (mode == WriteMode::Rewrite)

View File

@ -100,11 +100,7 @@ public:
bool autocommit = true) override; bool autocommit = true) override;
/// Write a file using a custom function to write an object to the disk's object storage. /// Write a file using a custom function to write an object to the disk's object storage.
void writeFileUsingCustomWriteObject( void writeFileUsingBlobWritingFunction(const String & path, WriteMode mode, WriteBlobFunction && write_blob_function) override;
const String & path,
WriteMode mode,
std::function<size_t(const StoredObject & object, WriteMode mode, const std::optional<ObjectAttributes> & object_attributes)>
custom_write_object_function) override;
void removeFile(const std::string & path) override; void removeFile(const std::string & path) override;
void removeFileIfExists(const std::string & path) override; void removeFileIfExists(const std::string & path) override;

View File

@ -1,69 +0,0 @@
#include <Disks/ObjectStorages/S3/copyS3FileToDisk.h>
#if USE_AWS_S3
#include <IO/S3/getObjectInfo.h>
#include <IO/ReadBufferFromS3.h>
#include <IO/WriteBufferFromFileBase.h>
#include <IO/copyData.h>
#include <IO/S3/copyS3File.h>
namespace DB
{
void copyS3FileToDisk(
const std::shared_ptr<const S3::Client> & s3_client,
const String & src_bucket,
const String & src_key,
const std::optional<String> & version_id,
std::optional<size_t> src_offset,
std::optional<size_t> src_size,
DiskPtr destination_disk,
const String & destination_path,
WriteMode write_mode,
const ReadSettings & read_settings,
const WriteSettings & write_settings,
const S3Settings::RequestSettings & request_settings,
ThreadPoolCallbackRunner<void> scheduler)
{
if (!src_offset)
src_offset = 0;
if (!src_size)
src_size = S3::getObjectSize(*s3_client, src_bucket, src_key, version_id.value_or(""), request_settings) - *src_offset;
auto destination_data_source_description = destination_disk->getDataSourceDescription();
if (destination_data_source_description != DataSourceDescription{DataSourceType::S3, s3_client->getInitialEndpoint(), false, false})
{
LOG_TRACE(&Poco::Logger::get("copyS3FileToDisk"), "Copying {} to disk {} through buffers", src_key, destination_disk->getName());
ReadBufferFromS3 read_buffer{s3_client, src_bucket, src_key, {}, request_settings, read_settings};
if (*src_offset)
read_buffer.seek(*src_offset, SEEK_SET);
auto write_buffer = destination_disk->writeFile(destination_path, std::min<size_t>(*src_size, DBMS_DEFAULT_BUFFER_SIZE), write_mode, write_settings);
copyData(read_buffer, *write_buffer, *src_size);
write_buffer->finalize();
return;
}
LOG_TRACE(&Poco::Logger::get("copyS3FileToDisk"), "Copying {} to disk {} using native copy", src_key, destination_disk->getName());
String dest_bucket = destination_disk->getObjectStorage()->getObjectsNamespace();
auto custom_write_object = [&](const StoredObject & object_, WriteMode write_mode_, const std::optional<ObjectAttributes> & object_attributes_) -> size_t
{
/// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files.
chassert(write_mode_ == WriteMode::Rewrite);
copyS3File(s3_client, src_bucket, src_key, *src_offset, *src_size, dest_bucket, /* dest_key= */ object_.remote_path,
request_settings, object_attributes_, scheduler, /* for_disk_s3= */ true);
return *src_size;
};
destination_disk->writeFileUsingCustomWriteObject(destination_path, write_mode, custom_write_object);
}
}
#endif

View File

@ -1,36 +0,0 @@
#pragma once
#include "config.h"
#if USE_AWS_S3
#include <Disks/IDisk.h>
#include <Storages/StorageS3Settings.h>
#include <Interpreters/threadPoolCallbackRunner.h>
namespace DB
{
/// Copies an object from S3 bucket to a disk of any type.
/// Depending on the disk the function can either do copying though buffers
/// (i.e. download the object by portions and then write those portions to the specified disk),
/// or perform a server-side copy.
void copyS3FileToDisk(
const std::shared_ptr<const S3::Client> & s3_client,
const String & src_bucket,
const String & src_key,
const std::optional<String> & version_id,
std::optional<size_t> src_offset,
std::optional<size_t> src_size,
DiskPtr destination_disk,
const String & destination_path,
WriteMode write_mode = WriteMode::Rewrite,
const ReadSettings & read_settings = {},
const WriteSettings & write_settings = {},
const S3Settings::RequestSettings & request_settings = {},
ThreadPoolCallbackRunner<void> scheduler = {});
}
#endif

View File

@ -573,8 +573,6 @@ bool FileCache::tryReserve(FileSegment & file_segment, size_t size)
else else
queue_size += 1; queue_size += 1;
size_t removed_size = 0;
class EvictionCandidates final : public std::vector<FileSegmentMetadataPtr> class EvictionCandidates final : public std::vector<FileSegmentMetadataPtr>
{ {
public: public:
@ -600,6 +598,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, size_t size)
std::unordered_map<Key, EvictionCandidates> to_delete; std::unordered_map<Key, EvictionCandidates> to_delete;
size_t removed_size = 0;
auto iterate_func = [&](LockedKey & locked_key, FileSegmentMetadataPtr segment_metadata) auto iterate_func = [&](LockedKey & locked_key, FileSegmentMetadataPtr segment_metadata)
{ {
chassert(segment_metadata->file_segment->assertCorrectness()); chassert(segment_metadata->file_segment->assertCorrectness());
@ -655,8 +654,18 @@ bool FileCache::tryReserve(FileSegment & file_segment, size_t size)
{ {
/// max_size == 0 means unlimited cache size, /// max_size == 0 means unlimited cache size,
/// max_element_size means unlimited number of cache elements. /// max_element_size means unlimited number of cache elements.
return (main_priority->getSizeLimit() != 0 && main_priority->getSize(cache_lock) + size - removed_size > main_priority->getSizeLimit()) const bool is_overflow = (main_priority->getSizeLimit() != 0
&& main_priority->getSize(cache_lock) + size - removed_size > main_priority->getSizeLimit())
|| (main_priority->getElementsLimit() != 0 && queue_size > main_priority->getElementsLimit()); || (main_priority->getElementsLimit() != 0 && queue_size > main_priority->getElementsLimit());
LOG_TEST(
log, "Overflow: {}, size: {}, ready to remove: {}, current cache size: {}/{}, elements: {}/{}, while reserving for {}:{}",
is_overflow, size, removed_size,
main_priority->getSize(cache_lock), main_priority->getSizeLimit(),
main_priority->getElementsCount(cache_lock), main_priority->getElementsLimit(),
file_segment.key(), file_segment.offset());
return is_overflow;
}; };
main_priority->iterate( main_priority->iterate(

View File

@ -44,7 +44,7 @@ IFileCachePriority::Iterator LRUFileCachePriority::add(
throw Exception( throw Exception(
ErrorCodes::LOGICAL_ERROR, ErrorCodes::LOGICAL_ERROR,
"Not enough space to add {}:{} with size {}: current size: {}/{}", "Not enough space to add {}:{} with size {}: current size: {}/{}",
key, offset, size, current_size, getSizeLimit()); key, offset, size, current_size, size_limit);
} }
auto iter = queue.insert(queue.end(), Entry(key, offset, size, key_metadata)); auto iter = queue.insert(queue.end(), Entry(key, offset, size, key_metadata));
@ -161,6 +161,11 @@ void LRUFileCachePriority::LRUFileCacheIterator::annul()
void LRUFileCachePriority::LRUFileCacheIterator::updateSize(int64_t size) void LRUFileCachePriority::LRUFileCacheIterator::updateSize(int64_t size)
{ {
LOG_TEST(
cache_priority->log,
"Update size with {} in LRU queue for key: {}, offset: {}, previous size: {}",
size, queue_iter->key, queue_iter->offset, queue_iter->size);
cache_priority->current_size += size; cache_priority->current_size += size;
queue_iter->size += size; queue_iter->size += size;

View File

@ -201,6 +201,14 @@ void DistributedAsyncInsertBatch::sendBatch()
{ {
for (const auto & file : files) for (const auto & file : files)
{ {
/// In case of recovery it is possible that some of files will be
/// missing, if server had been restarted abnormally
if (recovered && !fs::exists(file))
{
LOG_WARNING(parent.log, "File {} does not exists, likely due abnormal shutdown", file);
continue;
}
ReadBufferFromFile in(file); ReadBufferFromFile in(file);
const auto & distributed_header = DistributedAsyncInsertHeader::read(in, parent.log); const auto & distributed_header = DistributedAsyncInsertHeader::read(in, parent.log);

View File

@ -9,6 +9,8 @@
#include <Storages/MergeTree/localBackup.h> #include <Storages/MergeTree/localBackup.h>
#include <Backups/BackupEntryFromSmallFile.h> #include <Backups/BackupEntryFromSmallFile.h>
#include <Backups/BackupEntryFromImmutableFile.h> #include <Backups/BackupEntryFromImmutableFile.h>
#include <Backups/BackupEntryWrappedWith.h>
#include <Backups/BackupSettings.h>
#include <Disks/SingleDiskVolume.h> #include <Disks/SingleDiskVolume.h>
#include <Storages/MergeTree/IMergeTreeDataPart.h> #include <Storages/MergeTree/IMergeTreeDataPart.h>
@ -318,12 +320,12 @@ DataPartStorageOnDiskBase::getReplicatedFilesDescriptionForRemoteDisk(const Name
} }
void DataPartStorageOnDiskBase::backup( void DataPartStorageOnDiskBase::backup(
const ReadSettings & read_settings,
const MergeTreeDataPartChecksums & checksums, const MergeTreeDataPartChecksums & checksums,
const NameSet & files_without_checksums, const NameSet & files_without_checksums,
const String & path_in_backup, const String & path_in_backup,
BackupEntries & backup_entries, const BackupSettings & backup_settings,
bool make_temporary_hard_links, bool make_temporary_hard_links,
BackupEntries & backup_entries,
TemporaryFilesOnDisks * temp_dirs) const TemporaryFilesOnDisks * temp_dirs) const
{ {
fs::path part_path_on_disk = fs::path{root_path} / part_dir; fs::path part_path_on_disk = fs::path{root_path} / part_dir;
@ -364,6 +366,8 @@ void DataPartStorageOnDiskBase::backup(
files_to_backup = getActualFileNamesOnDisk(files_to_backup); files_to_backup = getActualFileNamesOnDisk(files_to_backup);
bool copy_encrypted = !backup_settings.decrypt_files_from_encrypted_disks;
for (const auto & filepath : files_to_backup) for (const auto & filepath : files_to_backup)
{ {
auto filepath_on_disk = part_path_on_disk / filepath; auto filepath_on_disk = part_path_on_disk / filepath;
@ -371,7 +375,7 @@ void DataPartStorageOnDiskBase::backup(
if (files_without_checksums.contains(filepath)) if (files_without_checksums.contains(filepath))
{ {
backup_entries.emplace_back(filepath_in_backup, std::make_unique<BackupEntryFromSmallFile>(disk, filepath_on_disk)); backup_entries.emplace_back(filepath_in_backup, std::make_unique<BackupEntryFromSmallFile>(disk, filepath_on_disk, copy_encrypted));
continue; continue;
} }
@ -392,9 +396,12 @@ void DataPartStorageOnDiskBase::backup(
file_hash = {it->second.file_hash.first, it->second.file_hash.second}; file_hash = {it->second.file_hash.first, it->second.file_hash.second};
} }
backup_entries.emplace_back( BackupEntryPtr backup_entry = std::make_unique<BackupEntryFromImmutableFile>(disk, filepath_on_disk, copy_encrypted, file_size, file_hash);
filepath_in_backup,
std::make_unique<BackupEntryFromImmutableFile>(disk, filepath_on_disk, read_settings, file_size, file_hash, temp_dir_owner)); if (temp_dir_owner)
backup_entry = wrapBackupEntryWith(std::move(backup_entry), temp_dir_owner);
backup_entries.emplace_back(filepath_in_backup, std::move(backup_entry));
} }
} }

View File

@ -50,12 +50,12 @@ public:
ReplicatedFilesDescription getReplicatedFilesDescriptionForRemoteDisk(const NameSet & file_names) const override; ReplicatedFilesDescription getReplicatedFilesDescriptionForRemoteDisk(const NameSet & file_names) const override;
void backup( void backup(
const ReadSettings & read_settings,
const MergeTreeDataPartChecksums & checksums, const MergeTreeDataPartChecksums & checksums,
const NameSet & files_without_checksums, const NameSet & files_without_checksums,
const String & path_in_backup, const String & path_in_backup,
BackupEntries & backup_entries, const BackupSettings & backup_settings,
bool make_temporary_hard_links, bool make_temporary_hard_links,
BackupEntries & backup_entries,
TemporaryFilesOnDisks * temp_dirs) const override; TemporaryFilesOnDisks * temp_dirs) const override;
MutableDataPartStoragePtr freeze( MutableDataPartStoragePtr freeze(

View File

@ -65,6 +65,7 @@ using SyncGuardPtr = std::unique_ptr<ISyncGuard>;
class IBackupEntry; class IBackupEntry;
using BackupEntryPtr = std::shared_ptr<const IBackupEntry>; using BackupEntryPtr = std::shared_ptr<const IBackupEntry>;
using BackupEntries = std::vector<std::pair<String, BackupEntryPtr>>; using BackupEntries = std::vector<std::pair<String, BackupEntryPtr>>;
struct BackupSettings;
struct WriteSettings; struct WriteSettings;
@ -197,12 +198,12 @@ public:
/// Also creates a new tmp_dir for internal disk (if disk is mentioned the first time). /// Also creates a new tmp_dir for internal disk (if disk is mentioned the first time).
using TemporaryFilesOnDisks = std::map<DiskPtr, std::shared_ptr<TemporaryFileOnDisk>>; using TemporaryFilesOnDisks = std::map<DiskPtr, std::shared_ptr<TemporaryFileOnDisk>>;
virtual void backup( virtual void backup(
const ReadSettings & read_settings,
const MergeTreeDataPartChecksums & checksums, const MergeTreeDataPartChecksums & checksums,
const NameSet & files_without_checksums, const NameSet & files_without_checksums,
const String & path_in_backup, const String & path_in_backup,
BackupEntries & backup_entries, const BackupSettings & backup_settings,
bool make_temporary_hard_links, bool make_temporary_hard_links,
BackupEntries & backup_entries,
TemporaryFilesOnDisks * temp_dirs) const = 0; TemporaryFilesOnDisks * temp_dirs) const = 0;
/// Creates hardlinks into 'to/dir_path' for every file in data part. /// Creates hardlinks into 'to/dir_path' for every file in data part.

View File

@ -19,6 +19,7 @@
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <Common/CurrentMetrics.h> #include <Common/CurrentMetrics.h>
#include <Common/ThreadFuzzer.h> #include <Common/ThreadFuzzer.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Compression/CompressedReadBuffer.h> #include <Compression/CompressedReadBuffer.h>
#include <Core/QueryProcessingStage.h> #include <Core/QueryProcessingStage.h>
#include <DataTypes/DataTypeEnum.h> #include <DataTypes/DataTypeEnum.h>
@ -2429,9 +2430,13 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
} }
/// Parallel parts removal. /// Parallel parts removal.
size_t num_threads = std::min<size_t>(settings->max_part_removal_threads, parts_to_remove.size()); size_t num_threads = settings->max_part_removal_threads;
if (!num_threads)
num_threads = getNumberOfPhysicalCPUCores() * 2;
num_threads = std::min<size_t>(num_threads, parts_to_remove.size());
std::mutex part_names_mutex; std::mutex part_names_mutex;
ThreadPool pool(CurrentMetrics::MergeTreePartsCleanerThreads, CurrentMetrics::MergeTreePartsCleanerThreadsActive, num_threads); ThreadPool pool(CurrentMetrics::MergeTreePartsCleanerThreads, CurrentMetrics::MergeTreePartsCleanerThreadsActive,
num_threads, num_threads, /* unlimited queue size */ 0);
/// This flag disallow straightforward concurrent parts removal. It's required only in case /// This flag disallow straightforward concurrent parts removal. It's required only in case
/// when we have parts on zero-copy disk + at least some of them were mutated. /// when we have parts on zero-copy disk + at least some of them were mutated.
@ -2490,9 +2495,22 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
/// We remove disjoint subsets of parts in parallel. /// We remove disjoint subsets of parts in parallel.
/// The problem is that it's not trivial to divide Outdated parts into disjoint subsets, /// The problem is that it's not trivial to divide Outdated parts into disjoint subsets,
/// because Outdated parts legally can be intersecting (but intersecting parts must be separated by a DROP_RANGE). /// because Outdated parts legally can be intersecting (but intersecting parts must be separated by a DROP_RANGE).
/// So we ignore level and version and use block numbers only. /// So we ignore level and version and use block numbers only (they cannot intersect by block numbers unless we have a bug).
struct RemovalRanges
{
std::vector<MergeTreePartInfo> infos;
std::vector<DataPartsVector> parts;
std::vector<UInt64> split_times;
};
auto split_into_independent_ranges = [this](const DataPartsVector & parts_to_remove_, size_t split_times) -> RemovalRanges
{
if (parts_to_remove_.empty())
return {};
ActiveDataPartSet independent_ranges_set(format_version); ActiveDataPartSet independent_ranges_set(format_version);
for (const auto & part : parts_to_remove) for (const auto & part : parts_to_remove_)
{ {
MergeTreePartInfo range_info = part->info; MergeTreePartInfo range_info = part->info;
range_info.level = static_cast<UInt32>(range_info.max_block - range_info.min_block); range_info.level = static_cast<UInt32>(range_info.max_block - range_info.min_block);
@ -2500,19 +2518,39 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
independent_ranges_set.add(range_info, range_info.getPartNameV1()); independent_ranges_set.add(range_info, range_info.getPartNameV1());
} }
auto independent_ranges_infos = independent_ranges_set.getPartInfos(); RemovalRanges independent_ranges;
independent_ranges.infos = independent_ranges_set.getPartInfos();
size_t num_ranges = independent_ranges.infos.size();
independent_ranges.parts.resize(num_ranges);
independent_ranges.split_times.resize(num_ranges, split_times);
size_t avg_range_size = parts_to_remove_.size() / num_ranges;
size_t sum_of_ranges = 0; size_t sum_of_ranges = 0;
for (auto range : independent_ranges_infos) for (size_t i = 0; i < num_ranges; ++i)
{ {
MergeTreePartInfo & range = independent_ranges.infos[i];
DataPartsVector & parts_in_range = independent_ranges.parts[i];
range.level = MergeTreePartInfo::MAX_LEVEL; range.level = MergeTreePartInfo::MAX_LEVEL;
range.mutation = MergeTreePartInfo::MAX_BLOCK_NUMBER; range.mutation = MergeTreePartInfo::MAX_BLOCK_NUMBER;
DataPartsVector parts_in_range; parts_in_range.reserve(avg_range_size * 2);
for (const auto & part : parts_to_remove) for (const auto & part : parts_to_remove_)
if (range.contains(part->info)) if (range.contains(part->info))
parts_in_range.push_back(part); parts_in_range.push_back(part);
sum_of_ranges += parts_in_range.size(); sum_of_ranges += parts_in_range.size();
}
if (parts_to_remove_.size() != sum_of_ranges)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Number of removed parts is not equal to number of parts in independent ranges "
"({} != {}), it's a bug", parts_to_remove_.size(), sum_of_ranges);
return independent_ranges;
};
auto schedule_parts_removal = [this, &pool, &part_names_mutex, part_names_succeed](
const MergeTreePartInfo & range, DataPartsVector && parts_in_range)
{
/// Below, range should be captured by copy to avoid use-after-scope on exception from pool
pool.scheduleOrThrowOnError( pool.scheduleOrThrowOnError(
[this, range, &part_names_mutex, part_names_succeed, thread_group = CurrentThread::getGroup(), batch = std::move(parts_in_range)] [this, range, &part_names_mutex, part_names_succeed, thread_group = CurrentThread::getGroup(), batch = std::move(parts_in_range)]
{ {
@ -2535,13 +2573,83 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
} }
} }
}); });
};
RemovalRanges independent_ranges = split_into_independent_ranges(parts_to_remove, /* split_times */ 0);
DataPartsVector excluded_parts;
size_t num_ranges = independent_ranges.infos.size();
size_t sum_of_ranges = 0;
for (size_t i = 0; i < num_ranges; ++i)
{
MergeTreePartInfo & range = independent_ranges.infos[i];
DataPartsVector & parts_in_range = independent_ranges.parts[i];
UInt64 split_times = independent_ranges.split_times[i];
/// It may happen that we have a huge part covering thousands small parts.
/// In this case, we will get a huge range that will be process by only one thread causing really long tail latency.
/// Let's try to exclude such parts in order to get smaller tasks for thread pool and more uniform distribution.
if (settings->concurrent_part_removal_threshold < parts_in_range.size() &&
split_times < settings->zero_copy_concurrent_part_removal_max_split_times)
{
auto smaller_parts_pred = [&range](const DataPartPtr & part)
{
return !(part->info.min_block == range.min_block && part->info.max_block == range.max_block);
};
size_t covered_parts_count = std::count_if(parts_in_range.begin(), parts_in_range.end(), smaller_parts_pred);
size_t top_level_count = parts_in_range.size() - covered_parts_count;
chassert(top_level_count);
Float32 parts_to_exclude_ratio = static_cast<Float32>(top_level_count) / parts_in_range.size();
if (settings->zero_copy_concurrent_part_removal_max_postpone_ratio < parts_to_exclude_ratio)
{
/// Most likely we have a long mutations chain here
LOG_DEBUG(log, "Block range {} contains {} parts including {} top-level parts, will not try to split it",
range.getPartNameForLogs(), parts_in_range.size(), top_level_count);
}
else
{
auto new_end_it = std::partition(parts_in_range.begin(), parts_in_range.end(), smaller_parts_pred);
std::move(new_end_it, parts_in_range.end(), std::back_inserter(excluded_parts));
parts_in_range.erase(new_end_it, parts_in_range.end());
RemovalRanges subranges = split_into_independent_ranges(parts_in_range, split_times + 1);
LOG_DEBUG(log, "Block range {} contained {} parts, it was split into {} independent subranges after excluding {} top-level parts",
range.getPartNameForLogs(), parts_in_range.size() + top_level_count, subranges.infos.size(), top_level_count);
std::move(subranges.infos.begin(), subranges.infos.end(), std::back_inserter(independent_ranges.infos));
std::move(subranges.parts.begin(), subranges.parts.end(), std::back_inserter(independent_ranges.parts));
std::move(subranges.split_times.begin(), subranges.split_times.end(), std::back_inserter(independent_ranges.split_times));
num_ranges += subranges.infos.size();
continue;
}
}
sum_of_ranges += parts_in_range.size();
schedule_parts_removal(range, std::move(parts_in_range));
}
/// Remove excluded parts as well. They were reordered, so sort them again
std::sort(excluded_parts.begin(), excluded_parts.end(), [](const auto & x, const auto & y) { return x->info < y->info; });
LOG_TRACE(log, "Will remove {} big parts separately: {}", excluded_parts.size(), fmt::join(excluded_parts, ", "));
independent_ranges = split_into_independent_ranges(excluded_parts, /* split_times */ 0);
pool.wait();
for (size_t i = 0; i < independent_ranges.infos.size(); ++i)
{
MergeTreePartInfo & range = independent_ranges.infos[i];
DataPartsVector & parts_in_range = independent_ranges.parts[i];
schedule_parts_removal(range, std::move(parts_in_range));
} }
pool.wait(); pool.wait();
if (parts_to_remove.size() != sum_of_ranges) if (parts_to_remove.size() != sum_of_ranges + excluded_parts.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Number of removed parts is not equal to number of parts in independent ranges " throw Exception(ErrorCodes::LOGICAL_ERROR,
"({} != {}), it's a bug", parts_to_remove.size(), sum_of_ranges); "Number of parts to remove was not equal to number of parts in independent ranges and excluded parts"
"({} != {} + {}), it's a bug", parts_to_remove.size(), sum_of_ranges, excluded_parts.size());
} }
size_t MergeTreeData::clearOldBrokenPartsFromDetachedDirectory() size_t MergeTreeData::clearOldBrokenPartsFromDetachedDirectory()
@ -5051,7 +5159,11 @@ Pipe MergeTreeData::alterPartition(
} }
BackupEntries MergeTreeData::backupParts(const DataPartsVector & data_parts, const String & data_path_in_backup, const ContextPtr & local_context) BackupEntries MergeTreeData::backupParts(
const DataPartsVector & data_parts,
const String & data_path_in_backup,
const BackupSettings & backup_settings,
const ContextPtr & local_context)
{ {
BackupEntries backup_entries; BackupEntries backup_entries;
std::map<DiskPtr, std::shared_ptr<TemporaryFileOnDisk>> temp_dirs; std::map<DiskPtr, std::shared_ptr<TemporaryFileOnDisk>> temp_dirs;
@ -5086,24 +5198,24 @@ BackupEntries MergeTreeData::backupParts(const DataPartsVector & data_parts, con
BackupEntries backup_entries_from_part; BackupEntries backup_entries_from_part;
part->getDataPartStorage().backup( part->getDataPartStorage().backup(
read_settings,
part->checksums, part->checksums,
part->getFileNamesWithoutChecksums(), part->getFileNamesWithoutChecksums(),
data_path_in_backup, data_path_in_backup,
backup_entries_from_part, backup_settings,
make_temporary_hard_links, make_temporary_hard_links,
backup_entries_from_part,
&temp_dirs); &temp_dirs);
auto projection_parts = part->getProjectionParts(); auto projection_parts = part->getProjectionParts();
for (const auto & [projection_name, projection_part] : projection_parts) for (const auto & [projection_name, projection_part] : projection_parts)
{ {
projection_part->getDataPartStorage().backup( projection_part->getDataPartStorage().backup(
read_settings,
projection_part->checksums, projection_part->checksums,
projection_part->getFileNamesWithoutChecksums(), projection_part->getFileNamesWithoutChecksums(),
fs::path{data_path_in_backup} / part->name, fs::path{data_path_in_backup} / part->name,
backup_entries_from_part, backup_settings,
make_temporary_hard_links, make_temporary_hard_links,
backup_entries_from_part,
&temp_dirs); &temp_dirs);
} }

View File

@ -1322,7 +1322,7 @@ protected:
MovePartsOutcome movePartsToSpace(const DataPartsVector & parts, SpacePtr space); MovePartsOutcome movePartsToSpace(const DataPartsVector & parts, SpacePtr space);
/// Makes backup entries to backup the parts of this table. /// Makes backup entries to backup the parts of this table.
BackupEntries backupParts(const DataPartsVector & data_parts, const String & data_path_in_backup, const ContextPtr & local_context); BackupEntries backupParts(const DataPartsVector & data_parts, const String & data_path_in_backup, const BackupSettings & backup_settings, const ContextPtr & local_context);
class RestoredPartsHolder; class RestoredPartsHolder;

View File

@ -146,6 +146,8 @@ struct Settings;
M(MaxThreads, max_part_loading_threads, 0, "The number of threads to load data parts at startup.", 0) \ M(MaxThreads, max_part_loading_threads, 0, "The number of threads to load data parts at startup.", 0) \
M(MaxThreads, max_part_removal_threads, 0, "The number of threads for concurrent removal of inactive data parts. One is usually enough, but in 'Google Compute Environment SSD Persistent Disks' file removal (unlink) operation is extraordinarily slow and you probably have to increase this number (recommended is up to 16).", 0) \ M(MaxThreads, max_part_removal_threads, 0, "The number of threads for concurrent removal of inactive data parts. One is usually enough, but in 'Google Compute Environment SSD Persistent Disks' file removal (unlink) operation is extraordinarily slow and you probably have to increase this number (recommended is up to 16).", 0) \
M(UInt64, concurrent_part_removal_threshold, 100, "Activate concurrent part removal (see 'max_part_removal_threads') only if the number of inactive data parts is at least this.", 0) \ M(UInt64, concurrent_part_removal_threshold, 100, "Activate concurrent part removal (see 'max_part_removal_threads') only if the number of inactive data parts is at least this.", 0) \
M(UInt64, zero_copy_concurrent_part_removal_max_split_times, 5, "Max recursion depth for splitting independent Outdated parts ranges into smaller subranges (highly not recommended to change)", 0) \
M(Float, zero_copy_concurrent_part_removal_max_postpone_ratio, static_cast<Float32>(0.05), "Max percentage of top level parts to postpone removal in order to get smaller independent ranges (highly not recommended to change)", 0) \
M(String, storage_policy, "default", "Name of storage disk policy", 0) \ M(String, storage_policy, "default", "Name of storage disk policy", 0) \
M(String, disk, "", "Name of storage disk. Can be specified instead of storage policy.", 0) \ M(String, disk, "", "Name of storage disk. Can be specified instead of storage policy.", 0) \
M(Bool, allow_nullable_key, false, "Allow Nullable types as primary keys.", 0) \ M(Bool, allow_nullable_key, false, "Allow Nullable types as primary keys.", 0) \

View File

@ -60,7 +60,6 @@ public:
~MergeTreeWriteAheadLog(); ~MergeTreeWriteAheadLog();
void addPart(DataPartInMemoryPtr & part);
void dropPart(const String & part_name); void dropPart(const String & part_name);
std::vector<MergeTreeMutableDataPartPtr> restore( std::vector<MergeTreeMutableDataPartPtr> restore(
const StorageMetadataPtr & metadata_snapshot, const StorageMetadataPtr & metadata_snapshot,
@ -77,7 +76,6 @@ public:
private: private:
void init(); void init();
void rotate(const std::unique_lock<std::mutex> & lock); void rotate(const std::unique_lock<std::mutex> & lock);
void sync(std::unique_lock<std::mutex> & lock);
const MergeTreeData & storage; const MergeTreeData & storage;
DiskPtr disk; DiskPtr disk;

View File

@ -27,7 +27,9 @@
#include <Backups/BackupEntriesCollector.h> #include <Backups/BackupEntriesCollector.h>
#include <Backups/BackupEntryFromAppendOnlyFile.h> #include <Backups/BackupEntryFromAppendOnlyFile.h>
#include <Backups/BackupEntryFromMemory.h>
#include <Backups/BackupEntryFromSmallFile.h> #include <Backups/BackupEntryFromSmallFile.h>
#include <Backups/BackupEntryWrappedWith.h>
#include <Backups/IBackup.h> #include <Backups/IBackup.h>
#include <Backups/RestorerFromBackup.h> #include <Backups/RestorerFromBackup.h>
#include <Disks/TemporaryFileOnDisk.h> #include <Disks/TemporaryFileOnDisk.h>
@ -926,10 +928,8 @@ std::optional<UInt64> StorageLog::totalBytes(const Settings &) const
void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional<ASTs> & /* partitions */) void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional<ASTs> & /* partitions */)
{ {
auto local_context = backup_entries_collector.getContext(); auto lock_timeout = getLockTimeout(backup_entries_collector.getContext());
ReadSettings read_settings = local_context->getBackupReadSettings();
auto lock_timeout = getLockTimeout(local_context);
loadMarks(lock_timeout); loadMarks(lock_timeout);
ReadLock lock{rwlock, lock_timeout}; ReadLock lock{rwlock, lock_timeout};
@ -944,6 +944,8 @@ void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, c
fs::path temp_dir = temp_dir_owner->getPath(); fs::path temp_dir = temp_dir_owner->getPath();
disk->createDirectories(temp_dir); disk->createDirectories(temp_dir);
bool copy_encrypted = !backup_entries_collector.getBackupSettings().decrypt_files_from_encrypted_disks;
/// *.bin /// *.bin
for (const auto & data_file : data_files) for (const auto & data_file : data_files)
{ {
@ -951,10 +953,10 @@ void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, c
String data_file_name = fileName(data_file.path); String data_file_name = fileName(data_file.path);
String hardlink_file_path = temp_dir / data_file_name; String hardlink_file_path = temp_dir / data_file_name;
disk->createHardLink(data_file.path, hardlink_file_path); disk->createHardLink(data_file.path, hardlink_file_path);
backup_entries_collector.addBackupEntry( BackupEntryPtr backup_entry = std::make_unique<BackupEntryFromAppendOnlyFile>(
data_path_in_backup_fs / data_file_name, disk, hardlink_file_path, copy_encrypted, file_checker.getFileSize(data_file.path));
std::make_unique<BackupEntryFromAppendOnlyFile>( backup_entry = wrapBackupEntryWith(std::move(backup_entry), temp_dir_owner);
disk, hardlink_file_path, read_settings, file_checker.getFileSize(data_file.path), std::nullopt, temp_dir_owner)); backup_entries_collector.addBackupEntry(data_path_in_backup_fs / data_file_name, std::move(backup_entry));
} }
/// __marks.mrk /// __marks.mrk
@ -964,16 +966,16 @@ void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, c
String marks_file_name = fileName(marks_file_path); String marks_file_name = fileName(marks_file_path);
String hardlink_file_path = temp_dir / marks_file_name; String hardlink_file_path = temp_dir / marks_file_name;
disk->createHardLink(marks_file_path, hardlink_file_path); disk->createHardLink(marks_file_path, hardlink_file_path);
backup_entries_collector.addBackupEntry( BackupEntryPtr backup_entry = std::make_unique<BackupEntryFromAppendOnlyFile>(
data_path_in_backup_fs / marks_file_name, disk, hardlink_file_path, copy_encrypted, file_checker.getFileSize(marks_file_path));
std::make_unique<BackupEntryFromAppendOnlyFile>( backup_entry = wrapBackupEntryWith(std::move(backup_entry), temp_dir_owner);
disk, hardlink_file_path, read_settings, file_checker.getFileSize(marks_file_path), std::nullopt, temp_dir_owner)); backup_entries_collector.addBackupEntry(data_path_in_backup_fs / marks_file_name, std::move(backup_entry));
} }
/// sizes.json /// sizes.json
String files_info_path = file_checker.getPath(); String files_info_path = file_checker.getPath();
backup_entries_collector.addBackupEntry( backup_entries_collector.addBackupEntry(
data_path_in_backup_fs / fileName(files_info_path), std::make_unique<BackupEntryFromSmallFile>(disk, files_info_path)); data_path_in_backup_fs / fileName(files_info_path), std::make_unique<BackupEntryFromSmallFile>(disk, files_info_path, copy_encrypted));
/// columns.txt /// columns.txt
backup_entries_collector.addBackupEntry( backup_entries_collector.addBackupEntry(

View File

@ -23,7 +23,8 @@
#include <Compression/CompressedReadBufferFromFile.h> #include <Compression/CompressedReadBufferFromFile.h>
#include <Compression/CompressedWriteBuffer.h> #include <Compression/CompressedWriteBuffer.h>
#include <Backups/BackupEntriesCollector.h> #include <Backups/BackupEntriesCollector.h>
#include <Backups/BackupEntryFromImmutableFile.h> #include <Backups/BackupEntryFromAppendOnlyFile.h>
#include <Backups/BackupEntryFromMemory.h>
#include <Backups/BackupEntryFromSmallFile.h> #include <Backups/BackupEntryFromSmallFile.h>
#include <Backups/IBackup.h> #include <Backups/IBackup.h>
#include <Backups/IBackupEntriesLazyBatch.h> #include <Backups/IBackupEntriesLazyBatch.h>
@ -307,8 +308,6 @@ namespace
BackupEntries generate() override BackupEntries generate() override
{ {
ReadSettings read_settings = context->getBackupReadSettings();
BackupEntries backup_entries; BackupEntries backup_entries;
backup_entries.resize(file_paths.size()); backup_entries.resize(file_paths.size());
@ -321,20 +320,28 @@ namespace
{ {
auto data_file_path = temp_dir / fs::path{file_paths[data_bin_pos]}.filename(); auto data_file_path = temp_dir / fs::path{file_paths[data_bin_pos]}.filename();
auto data_out_compressed = temp_disk->writeFile(data_file_path); auto data_out_compressed = temp_disk->writeFile(data_file_path);
CompressedWriteBuffer data_out{*data_out_compressed, CompressionCodecFactory::instance().getDefaultCodec(), max_compress_block_size}; auto data_out = std::make_unique<CompressedWriteBuffer>(*data_out_compressed, CompressionCodecFactory::instance().getDefaultCodec(), max_compress_block_size);
NativeWriter block_out{data_out, 0, metadata_snapshot->getSampleBlock(), false, &index}; NativeWriter block_out{*data_out, 0, metadata_snapshot->getSampleBlock(), false, &index};
for (const auto & block : *blocks) for (const auto & block : *blocks)
block_out.write(block); block_out.write(block);
backup_entries[data_bin_pos] = {file_paths[data_bin_pos], std::make_shared<BackupEntryFromImmutableFile>(temp_disk, data_file_path, read_settings)}; data_out->finalize();
data_out.reset();
data_out_compressed->finalize();
data_out_compressed.reset();
backup_entries[data_bin_pos] = {file_paths[data_bin_pos], std::make_shared<BackupEntryFromAppendOnlyFile>(temp_disk, data_file_path)};
} }
/// Writing index.mrk /// Writing index.mrk
{ {
auto index_mrk_path = temp_dir / fs::path{file_paths[index_mrk_pos]}.filename(); auto index_mrk_path = temp_dir / fs::path{file_paths[index_mrk_pos]}.filename();
auto index_mrk_out_compressed = temp_disk->writeFile(index_mrk_path); auto index_mrk_out_compressed = temp_disk->writeFile(index_mrk_path);
CompressedWriteBuffer index_mrk_out{*index_mrk_out_compressed}; auto index_mrk_out = std::make_unique<CompressedWriteBuffer>(*index_mrk_out_compressed);
index.write(index_mrk_out); index.write(*index_mrk_out);
backup_entries[index_mrk_pos] = {file_paths[index_mrk_pos], std::make_shared<BackupEntryFromImmutableFile>(temp_disk, index_mrk_path, read_settings)}; index_mrk_out->finalize();
index_mrk_out.reset();
index_mrk_out_compressed->finalize();
index_mrk_out_compressed.reset();
backup_entries[index_mrk_pos] = {file_paths[index_mrk_pos], std::make_shared<BackupEntryFromAppendOnlyFile>(temp_disk, index_mrk_path)};
} }
/// Writing columns.txt /// Writing columns.txt

View File

@ -2142,6 +2142,7 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_
void StorageMergeTree::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional<ASTs> & partitions) void StorageMergeTree::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional<ASTs> & partitions)
{ {
const auto & backup_settings = backup_entries_collector.getBackupSettings();
auto local_context = backup_entries_collector.getContext(); auto local_context = backup_entries_collector.getContext();
DataPartsVector data_parts; DataPartsVector data_parts;
@ -2154,7 +2155,7 @@ void StorageMergeTree::backupData(BackupEntriesCollector & backup_entries_collec
for (const auto & data_part : data_parts) for (const auto & data_part : data_parts)
min_data_version = std::min(min_data_version, data_part->info.getDataVersion() + 1); min_data_version = std::min(min_data_version, data_part->info.getDataVersion() + 1);
backup_entries_collector.addBackupEntries(backupParts(data_parts, data_path_in_backup, local_context)); backup_entries_collector.addBackupEntries(backupParts(data_parts, data_path_in_backup, backup_settings, local_context));
backup_entries_collector.addBackupEntries(backupMutations(min_data_version, data_path_in_backup)); backup_entries_collector.addBackupEntries(backupMutations(min_data_version, data_path_in_backup));
} }

View File

@ -9252,6 +9252,8 @@ void StorageReplicatedMergeTree::backupData(
/// First we generate backup entries in the same way as an ordinary MergeTree does. /// First we generate backup entries in the same way as an ordinary MergeTree does.
/// But then we don't add them to the BackupEntriesCollector right away, /// But then we don't add them to the BackupEntriesCollector right away,
/// because we need to coordinate them with other replicas (other replicas can have better parts). /// because we need to coordinate them with other replicas (other replicas can have better parts).
const auto & backup_settings = backup_entries_collector.getBackupSettings();
auto local_context = backup_entries_collector.getContext(); auto local_context = backup_entries_collector.getContext();
DataPartsVector data_parts; DataPartsVector data_parts;
@ -9260,7 +9262,7 @@ void StorageReplicatedMergeTree::backupData(
else else
data_parts = getVisibleDataPartsVector(local_context); data_parts = getVisibleDataPartsVector(local_context);
auto backup_entries = backupParts(data_parts, /* data_path_in_backup */ "", local_context); auto backup_entries = backupParts(data_parts, /* data_path_in_backup */ "", backup_settings, local_context);
auto coordination = backup_entries_collector.getBackupCoordination(); auto coordination = backup_entries_collector.getBackupCoordination();
String shared_id = getTableSharedID(); String shared_id = getTableSharedID();
@ -9278,10 +9280,9 @@ void StorageReplicatedMergeTree::backupData(
auto & hash = part_names_with_hashes_calculating[part_name]; auto & hash = part_names_with_hashes_calculating[part_name];
if (relative_path.ends_with(".bin")) if (relative_path.ends_with(".bin"))
{ {
auto checksum = backup_entry->getChecksum();
hash.update(relative_path); hash.update(relative_path);
hash.update(backup_entry->getSize()); hash.update(backup_entry->getSize());
hash.update(*checksum); hash.update(backup_entry->getChecksum());
} }
continue; continue;
} }

View File

@ -31,7 +31,9 @@
#include <Backups/BackupEntriesCollector.h> #include <Backups/BackupEntriesCollector.h>
#include <Backups/BackupEntryFromAppendOnlyFile.h> #include <Backups/BackupEntryFromAppendOnlyFile.h>
#include <Backups/BackupEntryFromMemory.h>
#include <Backups/BackupEntryFromSmallFile.h> #include <Backups/BackupEntryFromSmallFile.h>
#include <Backups/BackupEntryWrappedWith.h>
#include <Backups/IBackup.h> #include <Backups/IBackup.h>
#include <Backups/RestorerFromBackup.h> #include <Backups/RestorerFromBackup.h>
#include <Disks/TemporaryFileOnDisk.h> #include <Disks/TemporaryFileOnDisk.h>
@ -527,10 +529,8 @@ std::optional<UInt64> StorageStripeLog::totalBytes(const Settings &) const
void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional<ASTs> & /* partitions */) void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional<ASTs> & /* partitions */)
{ {
auto local_context = backup_entries_collector.getContext(); auto lock_timeout = getLockTimeout(backup_entries_collector.getContext());
ReadSettings read_settings = local_context->getBackupReadSettings();
auto lock_timeout = getLockTimeout(local_context);
loadIndices(lock_timeout); loadIndices(lock_timeout);
ReadLock lock{rwlock, lock_timeout}; ReadLock lock{rwlock, lock_timeout};
@ -545,16 +545,18 @@ void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collec
fs::path temp_dir = temp_dir_owner->getPath(); fs::path temp_dir = temp_dir_owner->getPath();
disk->createDirectories(temp_dir); disk->createDirectories(temp_dir);
bool copy_encrypted = !backup_entries_collector.getBackupSettings().decrypt_files_from_encrypted_disks;
/// data.bin /// data.bin
{ {
/// We make a copy of the data file because it can be changed later in write() or in truncate(). /// We make a copy of the data file because it can be changed later in write() or in truncate().
String data_file_name = fileName(data_file_path); String data_file_name = fileName(data_file_path);
String hardlink_file_path = temp_dir / data_file_name; String hardlink_file_path = temp_dir / data_file_name;
disk->createHardLink(data_file_path, hardlink_file_path); disk->createHardLink(data_file_path, hardlink_file_path);
backup_entries_collector.addBackupEntry( BackupEntryPtr backup_entry = std::make_unique<BackupEntryFromAppendOnlyFile>(
data_path_in_backup_fs / data_file_name, disk, hardlink_file_path, copy_encrypted, file_checker.getFileSize(data_file_path));
std::make_unique<BackupEntryFromAppendOnlyFile>( backup_entry = wrapBackupEntryWith(std::move(backup_entry), temp_dir_owner);
disk, hardlink_file_path, read_settings, file_checker.getFileSize(data_file_path), std::nullopt, temp_dir_owner)); backup_entries_collector.addBackupEntry(data_path_in_backup_fs / data_file_name, std::move(backup_entry));
} }
/// index.mrk /// index.mrk
@ -563,16 +565,16 @@ void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collec
String index_file_name = fileName(index_file_path); String index_file_name = fileName(index_file_path);
String hardlink_file_path = temp_dir / index_file_name; String hardlink_file_path = temp_dir / index_file_name;
disk->createHardLink(index_file_path, hardlink_file_path); disk->createHardLink(index_file_path, hardlink_file_path);
backup_entries_collector.addBackupEntry( BackupEntryPtr backup_entry = std::make_unique<BackupEntryFromAppendOnlyFile>(
data_path_in_backup_fs / index_file_name, disk, hardlink_file_path, copy_encrypted, file_checker.getFileSize(index_file_path));
std::make_unique<BackupEntryFromAppendOnlyFile>( backup_entry = wrapBackupEntryWith(std::move(backup_entry), temp_dir_owner);
disk, hardlink_file_path, read_settings, file_checker.getFileSize(index_file_path), std::nullopt, temp_dir_owner)); backup_entries_collector.addBackupEntry(data_path_in_backup_fs / index_file_name, std::move(backup_entry));
} }
/// sizes.json /// sizes.json
String files_info_path = file_checker.getPath(); String files_info_path = file_checker.getPath();
backup_entries_collector.addBackupEntry( backup_entries_collector.addBackupEntry(
data_path_in_backup_fs / fileName(files_info_path), std::make_unique<BackupEntryFromSmallFile>(disk, files_info_path)); data_path_in_backup_fs / fileName(files_info_path), std::make_unique<BackupEntryFromSmallFile>(disk, files_info_path, copy_encrypted));
/// columns.txt /// columns.txt
backup_entries_collector.addBackupEntry( backup_entries_collector.addBackupEntry(

View File

@ -125,6 +125,7 @@ TRUSTED_CONTRIBUTORS = {
"thevar1able", # ClickHouse Employee "thevar1able", # ClickHouse Employee
"aalexfvk", "aalexfvk",
"MikhailBurdukov", "MikhailBurdukov",
"tsolodov", # ClickHouse Employee
] ]
} }

View File

@ -158,8 +158,6 @@ def test_restore_table(engine):
assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n"
instance.query(f"BACKUP TABLE test.table TO {backup_name}") instance.query(f"BACKUP TABLE test.table TO {backup_name}")
assert instance.contains_in_log("using native copy")
instance.query("DROP TABLE test.table") instance.query("DROP TABLE test.table")
assert instance.query("EXISTS test.table") == "0\n" assert instance.query("EXISTS test.table") == "0\n"
@ -200,8 +198,6 @@ def test_restore_table_under_another_name():
assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n"
instance.query(f"BACKUP TABLE test.table TO {backup_name}") instance.query(f"BACKUP TABLE test.table TO {backup_name}")
assert instance.contains_in_log("using native copy")
assert instance.query("EXISTS test.table2") == "0\n" assert instance.query("EXISTS test.table2") == "0\n"
instance.query(f"RESTORE TABLE test.table AS test.table2 FROM {backup_name}") instance.query(f"RESTORE TABLE test.table AS test.table2 FROM {backup_name}")
@ -215,8 +211,6 @@ def test_backup_table_under_another_name():
assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n"
instance.query(f"BACKUP TABLE test.table AS test.table2 TO {backup_name}") instance.query(f"BACKUP TABLE test.table AS test.table2 TO {backup_name}")
assert instance.contains_in_log("using native copy")
assert instance.query("EXISTS test.table2") == "0\n" assert instance.query("EXISTS test.table2") == "0\n"
instance.query(f"RESTORE TABLE test.table2 FROM {backup_name}") instance.query(f"RESTORE TABLE test.table2 FROM {backup_name}")
@ -245,8 +239,6 @@ def test_incremental_backup():
assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n"
instance.query(f"BACKUP TABLE test.table TO {backup_name}") instance.query(f"BACKUP TABLE test.table TO {backup_name}")
assert instance.contains_in_log("using native copy")
instance.query("INSERT INTO test.table VALUES (65, 'a'), (66, 'b')") instance.query("INSERT INTO test.table VALUES (65, 'a'), (66, 'b')")
assert instance.query("SELECT count(), sum(x) FROM test.table") == "102\t5081\n" assert instance.query("SELECT count(), sum(x) FROM test.table") == "102\t5081\n"
@ -524,8 +516,6 @@ def test_file_engine():
assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n"
instance.query(f"BACKUP TABLE test.table TO {backup_name}") instance.query(f"BACKUP TABLE test.table TO {backup_name}")
assert instance.contains_in_log("using native copy")
instance.query("DROP TABLE test.table") instance.query("DROP TABLE test.table")
assert instance.query("EXISTS test.table") == "0\n" assert instance.query("EXISTS test.table") == "0\n"
@ -540,8 +530,6 @@ def test_database():
instance.query(f"BACKUP DATABASE test TO {backup_name}") instance.query(f"BACKUP DATABASE test TO {backup_name}")
assert instance.contains_in_log("using native copy")
instance.query("DROP DATABASE test") instance.query("DROP DATABASE test")
instance.query(f"RESTORE DATABASE test FROM {backup_name}") instance.query(f"RESTORE DATABASE test FROM {backup_name}")

View File

@ -6,7 +6,6 @@ import concurrent
from helpers.cluster import ClickHouseCluster from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV, assert_eq_with_retry from helpers.test_tools import TSV, assert_eq_with_retry
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
num_nodes = 10 num_nodes = 10

View File

@ -141,8 +141,8 @@ def test_backup_to_s3_native_copy():
f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')"
) )
check_backup_and_restore(storage_policy, backup_destination) check_backup_and_restore(storage_policy, backup_destination)
assert node.contains_in_log("BackupImpl.*using native copy") assert node.contains_in_log("BackupWriterS3.*using native copy")
assert node.contains_in_log("copyS3FileToDisk.*using native copy") assert node.contains_in_log("BackupReaderS3.*using native copy")
assert node.contains_in_log( assert node.contains_in_log(
f"copyS3File: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}" f"copyS3File: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}"
) )
@ -155,8 +155,8 @@ def test_backup_to_s3_native_copy_other_bucket():
f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')"
) )
check_backup_and_restore(storage_policy, backup_destination) check_backup_and_restore(storage_policy, backup_destination)
assert node.contains_in_log("BackupImpl.*using native copy") assert node.contains_in_log("BackupWriterS3.*using native copy")
assert node.contains_in_log("copyS3FileToDisk.*using native copy") assert node.contains_in_log("BackupReaderS3.*using native copy")
assert node.contains_in_log( assert node.contains_in_log(
f"copyS3File: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}" f"copyS3File: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}"
) )
@ -167,8 +167,8 @@ def test_backup_to_s3_native_copy_multipart():
backup_name = new_backup_name() backup_name = new_backup_name()
backup_destination = f"S3('http://minio1:9001/root/data/backups/multipart/{backup_name}', 'minio', 'minio123')" backup_destination = f"S3('http://minio1:9001/root/data/backups/multipart/{backup_name}', 'minio', 'minio123')"
check_backup_and_restore(storage_policy, backup_destination, size=1000000) check_backup_and_restore(storage_policy, backup_destination, size=1000000)
assert node.contains_in_log("BackupImpl.*using native copy") assert node.contains_in_log("BackupWriterS3.*using native copy")
assert node.contains_in_log("copyS3FileToDisk.*using native copy") assert node.contains_in_log("BackupReaderS3.*using native copy")
assert node.contains_in_log( assert node.contains_in_log(
f"copyS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}/" f"copyS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}/"
) )

View File

@ -0,0 +1,5 @@
<clickhouse>
<backups>
<allowed_path>/backups/</allowed_path>
</backups>
</clickhouse>

View File

@ -1,6 +1,7 @@
import pytest import pytest
from helpers.cluster import ClickHouseCluster from helpers.cluster import ClickHouseCluster
from helpers.client import QueryRuntimeException from helpers.client import QueryRuntimeException
import os.path
from helpers.test_tools import assert_eq_with_retry from helpers.test_tools import assert_eq_with_retry
@ -9,8 +10,9 @@ FIRST_PART_NAME = "all_1_1_0"
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
node = cluster.add_instance( node = cluster.add_instance(
"node", "node",
main_configs=["configs/storage.xml"], main_configs=["configs/storage.xml", "configs/allow_backup_path.xml"],
tmpfs=["/disk:size=100M"], tmpfs=["/disk:size=100M"],
external_dirs=["/backups/"],
with_minio=True, with_minio=True,
stay_alive=True, stay_alive=True,
) )
@ -33,6 +35,15 @@ def cleanup_after_test():
node.query("DROP TABLE IF EXISTS encrypted_test SYNC") node.query("DROP TABLE IF EXISTS encrypted_test SYNC")
backup_id_counter = 0
def new_backup_name():
global backup_id_counter
backup_id_counter += 1
return f"backup{backup_id_counter}"
@pytest.mark.parametrize( @pytest.mark.parametrize(
"policy", "policy",
["encrypted_policy", "encrypted_policy_key192b", "local_policy", "s3_policy"], ["encrypted_policy", "encrypted_policy_key192b", "local_policy", "s3_policy"],
@ -295,3 +306,105 @@ def test_restart():
assert node.query(select_query) == "(0,'data'),(1,'data')" assert node.query(select_query) == "(0,'data'),(1,'data')"
node.query("DROP TABLE encrypted_test SYNC;") node.query("DROP TABLE encrypted_test SYNC;")
@pytest.mark.parametrize(
"backup_type,old_storage_policy,new_storage_policy,decrypt_files_from_encrypted_disks",
[
("S3", "encrypted_policy", "encrypted_policy", False),
("S3", "encrypted_policy", "s3_encrypted_default_path", False),
("S3", "s3_encrypted_default_path", "s3_encrypted_default_path", False),
("S3", "s3_encrypted_default_path", "encrypted_policy", False),
("File", "s3_encrypted_default_path", "encrypted_policy", False),
("File", "local_policy", "encrypted_policy", False),
("File", "encrypted_policy", "local_policy", False),
("File", "encrypted_policy", "local_policy", True),
],
)
def test_backup_restore(
backup_type,
old_storage_policy,
new_storage_policy,
decrypt_files_from_encrypted_disks,
):
node.query(
f"""
CREATE TABLE encrypted_test (
id Int64,
data String
) ENGINE=MergeTree()
ORDER BY id
SETTINGS storage_policy='{old_storage_policy}'
"""
)
node.query("INSERT INTO encrypted_test VALUES (0,'data'),(1,'data')")
select_query = "SELECT * FROM encrypted_test ORDER BY id FORMAT Values"
assert node.query(select_query) == "(0,'data'),(1,'data')"
backup_name = new_backup_name()
if backup_type == "S3":
backup_destination = (
f"S3('http://minio1:9001/root/backups/{backup_name}', 'minio', 'minio123')"
)
elif backup_type == "File":
backup_destination = f"File('/backups/{backup_name}/')"
node.query(
f"BACKUP TABLE encrypted_test TO {backup_destination} SETTINGS decrypt_files_from_encrypted_disks={int(decrypt_files_from_encrypted_disks)}"
)
storage_policy_changed = old_storage_policy != new_storage_policy
old_disk_encrypted = old_storage_policy.find("encrypted") != -1
new_disk_encrypted = new_storage_policy.find("encrypted") != -1
if backup_type == "File":
root_path = os.path.join(node.cluster.instances_dir, "backups", backup_name)
expect_encrypted_in_backup = (
old_disk_encrypted and not decrypt_files_from_encrypted_disks
)
with open(f"{root_path}/metadata/default/encrypted_test.sql") as file:
assert file.read().startswith("CREATE TABLE default.encrypted_test")
with open(f"{root_path}/.backup") as file:
found_encrypted_in_backup = (
file.read().find("<encrypted_by_disk>true</encrypted_by_disk>") != -1
)
assert found_encrypted_in_backup == expect_encrypted_in_backup
with open(
f"{root_path}/data/default/encrypted_test/all_1_1_0/data.bin", "rb"
) as file:
found_encrypted_in_backup = file.read().startswith(b"ENC")
assert found_encrypted_in_backup == expect_encrypted_in_backup
node.query(f"DROP TABLE encrypted_test SYNC")
if storage_policy_changed:
node.query(
f"""
CREATE TABLE encrypted_test (
id Int64,
data String
) ENGINE=MergeTree()
ORDER BY id
SETTINGS storage_policy='{new_storage_policy}'
"""
)
restore_command = f"RESTORE TABLE encrypted_test FROM {backup_destination} SETTINGS allow_different_table_def={int(storage_policy_changed)}"
expect_error = None
if (
old_disk_encrypted
and not new_disk_encrypted
and not decrypt_files_from_encrypted_disks
):
expect_error = "can be restored only to an encrypted disk"
if expect_error:
assert expect_error in node.query_and_get_error(restore_command)
else:
node.query(restore_command)
assert node.query(select_query) == "(0,'data'),(1,'data')"

View File

@ -225,7 +225,9 @@ def select(
def rename_column( def rename_column(
node, table_name, name, new_name, iterations=1, ignore_exception=False node, table_name, name, new_name, iterations=1, ignore_exception=False
): ):
for i in range(iterations): i = 0
while True:
i += 1
try: try:
node.query( node.query(
"ALTER TABLE {table_name} RENAME COLUMN {name} to {new_name}".format( "ALTER TABLE {table_name} RENAME COLUMN {name} to {new_name}".format(
@ -233,14 +235,22 @@ def rename_column(
) )
) )
except QueryRuntimeException as ex: except QueryRuntimeException as ex:
if "Coordination::Exception" in str(ex):
continue
if not ignore_exception: if not ignore_exception:
raise raise
if i >= iterations:
break
def rename_column_on_cluster( def rename_column_on_cluster(
node, table_name, name, new_name, iterations=1, ignore_exception=False node, table_name, name, new_name, iterations=1, ignore_exception=False
): ):
for i in range(iterations): i = 0
while True:
i += 1
try: try:
node.query( node.query(
"ALTER TABLE {table_name} ON CLUSTER test_cluster RENAME COLUMN {name} to {new_name}".format( "ALTER TABLE {table_name} ON CLUSTER test_cluster RENAME COLUMN {name} to {new_name}".format(
@ -248,12 +258,20 @@ def rename_column_on_cluster(
) )
) )
except QueryRuntimeException as ex: except QueryRuntimeException as ex:
if "Coordination::Exception" in str(ex):
continue
if not ignore_exception: if not ignore_exception:
raise raise
if i >= iterations:
break
def alter_move(node, table_name, iterations=1, ignore_exception=False): def alter_move(node, table_name, iterations=1, ignore_exception=False):
for i in range(iterations): i = 0
while True:
i += 1
move_part = random.randint(0, 99) move_part = random.randint(0, 99)
move_volume = "external" move_volume = "external"
try: try:
@ -263,9 +281,15 @@ def alter_move(node, table_name, iterations=1, ignore_exception=False):
) )
) )
except QueryRuntimeException as ex: except QueryRuntimeException as ex:
if "Coordination::Exception" in str(ex):
continue
if not ignore_exception: if not ignore_exception:
raise raise
if i >= iterations:
break
def test_rename_parallel_same_node(started_cluster): def test_rename_parallel_same_node(started_cluster):
table_name = "test_rename_parallel_same_node" table_name = "test_rename_parallel_same_node"

View File

@ -3,7 +3,7 @@
1 1
1 1
[[1],[-1]] [[1],[-1]]
[] \N
1 1
42 42 42 42
[NULL,'','',NULL] [NULL,'','',NULL]

View File

@ -1 +1 @@
['\0','\0','\0'] [0,0,0]

View File

@ -1,3 +0,0 @@
[1]
[0,1,2,3,4,5,6,7,8,9]
[8,9]

View File

@ -1,5 +0,0 @@
SET aggregate_functions_null_for_empty = 1;
SELECT groupArray(1);
SELECT groupArray(number) FROM numbers(10);
SELECT groupArrayLast(2)(number) FROM numbers(10);

View File

@ -0,0 +1,2 @@
SET allow_experimental_query_cache = 0;
SET allow_experimental_query_cache = 1;