mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Merge branch 'zvonand-implicit-tz' of github.com:zvonand/ClickHouse into zvonand-implicit-tz
This commit is contained in:
commit
4e59b07c18
6
.gitmodules
vendored
6
.gitmodules
vendored
@ -268,9 +268,6 @@
|
||||
[submodule "contrib/vectorscan"]
|
||||
path = contrib/vectorscan
|
||||
url = https://github.com/VectorCamp/vectorscan.git
|
||||
[submodule "contrib/c-ares"]
|
||||
path = contrib/c-ares
|
||||
url = https://github.com/ClickHouse/c-ares
|
||||
[submodule "contrib/llvm-project"]
|
||||
path = contrib/llvm-project
|
||||
url = https://github.com/ClickHouse/llvm-project
|
||||
@ -344,3 +341,6 @@
|
||||
[submodule "contrib/isa-l"]
|
||||
path = contrib/isa-l
|
||||
url = https://github.com/ClickHouse/isa-l.git
|
||||
[submodule "contrib/c-ares"]
|
||||
path = contrib/c-ares
|
||||
url = https://github.com/c-ares/c-ares.git
|
||||
|
@ -22,11 +22,10 @@ curl https://clickhouse.com/ | sh
|
||||
|
||||
## Upcoming Events
|
||||
|
||||
* [**v23.5 Release Webinar**](https://clickhouse.com/company/events/v23-5-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-05) - May 31 - 23.5 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
|
||||
* [**ClickHouse Meetup in Barcelona**](https://www.meetup.com/clickhouse-barcelona-user-group/events/292892669) - May 25
|
||||
* [**ClickHouse Meetup in London**](https://www.meetup.com/clickhouse-london-user-group/events/292892824) - May 25
|
||||
* [**v23.5 Release Webinar**](https://clickhouse.com/company/events/v23-5-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-05) - Jun 8 - 23.5 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
|
||||
* [**ClickHouse Meetup in Bangalore**](https://www.meetup.com/clickhouse-bangalore-user-group/events/293740066/) - Jun 7
|
||||
* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/293426725/) - Jun 7
|
||||
* [**ClickHouse Meetup in Stockholm**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - Jun 13
|
||||
|
||||
|
||||
Also, keep an eye out for upcoming meetups in Amsterdam, Boston, NYC, Beijing, and Toronto. Somewhere else you want us to be? Please feel free to reach out to tyler <at> clickhouse <dot> com.
|
||||
|
||||
|
2
contrib/c-ares
vendored
2
contrib/c-ares
vendored
@ -1 +1 @@
|
||||
Subproject commit afee6748b0b99acf4509d42fa37ac8422262f91b
|
||||
Subproject commit 6360e96b5cf8e5980c887ce58ef727e53d77243a
|
@ -48,6 +48,7 @@ SET(SRCS
|
||||
"${LIBRARY_DIR}/src/lib/ares_platform.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_process.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_query.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_rand.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_search.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_send.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_strcasecmp.c"
|
||||
|
2
contrib/libgsasl
vendored
2
contrib/libgsasl
vendored
@ -1 +1 @@
|
||||
Subproject commit f4e7bf0bb068030d57266f87ccac4c8c012fb5c4
|
||||
Subproject commit 0fb79e7609ae5a5e015a41d24bcbadd48f8f5469
|
2
contrib/libxml2
vendored
2
contrib/libxml2
vendored
@ -1 +1 @@
|
||||
Subproject commit f507d167f1755b7eaea09fb1a44d29aab828b6d1
|
||||
Subproject commit 223cb03a5d27b1b2393b266a8657443d046139d6
|
@ -1,6 +0,0 @@
|
||||
# ARM (AArch64) build works on Amazon Graviton, Oracle Cloud, Huawei Cloud ARM machines.
|
||||
# The support for AArch64 is pre-production ready.
|
||||
|
||||
wget 'https://builds.clickhouse.com/master/aarch64/clickhouse'
|
||||
chmod a+x ./clickhouse
|
||||
sudo ./clickhouse install
|
@ -1,3 +0,0 @@
|
||||
fetch 'https://builds.clickhouse.com/master/freebsd/clickhouse'
|
||||
chmod a+x ./clickhouse
|
||||
su -m root -c './clickhouse install'
|
@ -1,3 +0,0 @@
|
||||
wget 'https://builds.clickhouse.com/master/macos-aarch64/clickhouse'
|
||||
chmod a+x ./clickhouse
|
||||
./clickhouse
|
@ -1,3 +0,0 @@
|
||||
wget 'https://builds.clickhouse.com/master/macos/clickhouse'
|
||||
chmod a+x ./clickhouse
|
||||
./clickhouse
|
@ -43,7 +43,7 @@ sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
For other Linux distribution - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html).
|
||||
|
||||
As of April 2023, any version of Clang >= 15 will work.
|
||||
GCC as a compiler is not supported
|
||||
GCC as a compiler is not supported.
|
||||
To build with a specific Clang version:
|
||||
|
||||
:::tip
|
||||
@ -114,18 +114,3 @@ mkdir build
|
||||
cmake -S . -B build
|
||||
cmake --build build
|
||||
```
|
||||
|
||||
## You Don’t Have to Build ClickHouse {#you-dont-have-to-build-clickhouse}
|
||||
|
||||
ClickHouse is available in pre-built binaries and packages. Binaries are portable and can be run on any Linux flavour.
|
||||
|
||||
The CI checks build the binaries on each commit to [ClickHouse](https://github.com/clickhouse/clickhouse/). To download them:
|
||||
|
||||
1. Open the [commits list](https://github.com/ClickHouse/ClickHouse/commits/master)
|
||||
1. Choose a **Merge pull request** commit that includes the new feature, or was added after the new feature
|
||||
1. Click the status symbol (yellow dot, red x, green check) to open the CI check list
|
||||
1. Scroll through the list until you find **ClickHouse build check x/x artifact groups are OK**
|
||||
1. Click **Details**
|
||||
1. Find the type of package for your operating system that you need and download the files.
|
||||
|
||||
![build artifact check](images/find-build-artifact.png)
|
||||
|
@ -5,7 +5,7 @@ sidebar_label: Reddit comments
|
||||
|
||||
# Reddit comments dataset
|
||||
|
||||
This dataset contains publicly-available comments on Reddit that go back to December, 2005, to March, 2023, and contains over 7B rows of data. The raw data is in JSON format in compressed `.zst` files and the rows look like the following:
|
||||
This dataset contains publicly-available comments on Reddit that go back to December, 2005, to March, 2023, and contains over 14B rows of data. The raw data is in JSON format in compressed files and the rows look like the following:
|
||||
|
||||
```json
|
||||
{"controversiality":0,"body":"A look at Vietnam and Mexico exposes the myth of market liberalisation.","subreddit_id":"t5_6","link_id":"t3_17863","stickied":false,"subreddit":"reddit.com","score":2,"ups":2,"author_flair_css_class":null,"created_utc":1134365188,"author_flair_text":null,"author":"frjo","id":"c13","edited":false,"parent_id":"t3_17863","gilded":0,"distinguished":null,"retrieved_on":1473738411}
|
||||
@ -18,7 +18,7 @@ This dataset contains publicly-available comments on Reddit that go back to Dece
|
||||
A shoutout to Percona for the [motivation behind ingesting this dataset](https://www.percona.com/blog/big-data-set-reddit-comments-analyzing-clickhouse/), which we have downloaded and stored in an S3 bucket.
|
||||
|
||||
:::note
|
||||
The following commands were executed on ClickHouse Cloud. To run this on your own cluster, replace `default` in the `s3Cluster` function call with the name of your cluster. If you do not have a cluster, then replace the `s3Cluster` function with the `s3` function.
|
||||
The following commands were executed on a Production instance of ClickHouse Cloud with the minimum memory set to 720GB. To run this on your own cluster, replace `default` in the `s3Cluster` function call with the name of your cluster. If you do not have a cluster, then replace the `s3Cluster` function with the `s3` function.
|
||||
:::
|
||||
|
||||
1. Let's create a table for the Reddit data:
|
||||
@ -75,18 +75,6 @@ The names of the files in S3 start with `RC_YYYY-MM` where `YYYY-MM` goes from `
|
||||
|
||||
2. We are going to start with one month of data, but if you want to simply insert every row - skip ahead to step 8 below. The following file has 86M records from December, 2017:
|
||||
|
||||
```sql
|
||||
INSERT INTO reddit
|
||||
SELECT *
|
||||
FROM s3Cluster(
|
||||
'default',
|
||||
'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/reddit/original/RC_2017-12.xz',
|
||||
'JSONEachRow'
|
||||
);
|
||||
```
|
||||
|
||||
If you do not have a cluster, use `s3` instead of `s3Cluster`:
|
||||
|
||||
```sql
|
||||
INSERT INTO reddit
|
||||
SELECT *
|
||||
@ -94,6 +82,7 @@ INSERT INTO reddit
|
||||
'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/reddit/original/RC_2017-12.xz',
|
||||
'JSONEachRow'
|
||||
);
|
||||
|
||||
```
|
||||
|
||||
3. It will take a while depending on your resources, but when it's done verify it worked:
|
||||
@ -198,26 +187,81 @@ LIMIT 10;
|
||||
TRUNCATE TABLE reddit;
|
||||
```
|
||||
|
||||
8. This is a fun dataset and it looks like we can find some great information, so let's go ahead and insert the entire dataset from 2005 to 2023. When you're ready, run this command to insert all the rows. (It takes a while - up to 17 hours!)
|
||||
8. This is a fun dataset and it looks like we can find some great information, so let's go ahead and insert the entire dataset from 2005 to 2023. For practical reasons, it works well to insert the data by years starting with...
|
||||
|
||||
|
||||
```sql
|
||||
INSERT INTO reddit
|
||||
SELECT *
|
||||
FROM s3Cluster(
|
||||
'default',
|
||||
'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/reddit/original/RC_2005*',
|
||||
'JSONEachRow'
|
||||
)
|
||||
SETTINGS zstd_window_log_max = 31;
|
||||
```
|
||||
|
||||
...and ending with:
|
||||
|
||||
```sql
|
||||
INSERT INTO reddit
|
||||
SELECT *
|
||||
FROM s3Cluster(
|
||||
'default',
|
||||
'https://clickhouse-public-datasets.s3.amazonaws.com/reddit/original/RC*',
|
||||
'https://clickhouse-public-datasets.s3.amazonaws.com/reddit/original/RC_2023*',
|
||||
'JSONEachRow'
|
||||
)
|
||||
SETTINGS zstd_window_log_max = 31;
|
||||
```
|
||||
|
||||
The response looks like:
|
||||
If you do not have a cluster, use `s3` instead of `s3Cluster`:
|
||||
|
||||
```response
|
||||
0 rows in set. Elapsed: 61187.839 sec. Processed 6.74 billion rows, 2.06 TB (110.17 thousand rows/s., 33.68 MB/s.)
|
||||
```sql
|
||||
INSERT INTO reddit
|
||||
SELECT *
|
||||
FROM s3(
|
||||
'https://clickhouse-public-datasets.s3.amazonaws.com/reddit/original/RC_2005*',
|
||||
'JSONEachRow'
|
||||
)
|
||||
SETTINGS zstd_window_log_max = 31;
|
||||
```
|
||||
|
||||
8. Let's see how many rows were inserted and how much disk space the table is using:
|
||||
8. To verify it worked, here are the number of rows per year (as of February, 2023):
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toYear(created_utc) AS year,
|
||||
formatReadableQuantity(count())
|
||||
FROM reddit
|
||||
GROUP BY year;
|
||||
```
|
||||
|
||||
```response
|
||||
|
||||
┌─year─┬─formatReadableQuantity(count())─┐
|
||||
│ 2005 │ 1.07 thousand │
|
||||
│ 2006 │ 417.18 thousand │
|
||||
│ 2007 │ 2.46 million │
|
||||
│ 2008 │ 7.24 million │
|
||||
│ 2009 │ 18.86 million │
|
||||
│ 2010 │ 42.93 million │
|
||||
│ 2011 │ 28.91 million │
|
||||
│ 2012 │ 260.31 million │
|
||||
│ 2013 │ 402.21 million │
|
||||
│ 2014 │ 531.80 million │
|
||||
│ 2015 │ 667.76 million │
|
||||
│ 2016 │ 799.90 million │
|
||||
│ 2017 │ 972.86 million │
|
||||
│ 2018 │ 1.24 billion │
|
||||
│ 2019 │ 1.66 billion │
|
||||
│ 2020 │ 2.16 billion │
|
||||
│ 2021 │ 2.59 billion │
|
||||
│ 2022 │ 2.82 billion │
|
||||
│ 2023 │ 474.86 million │
|
||||
└──────┴─────────────────────────────────┘
|
||||
```
|
||||
|
||||
9. Let's see how many rows were inserted and how much disk space the table is using:
|
||||
|
||||
|
||||
```sql
|
||||
@ -227,17 +271,17 @@ SELECT
|
||||
formatReadableSize(sum(bytes)) AS disk_size,
|
||||
formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed_size
|
||||
FROM system.parts
|
||||
WHERE (table = 'reddit') AND active
|
||||
WHERE (table = 'reddit') AND active;
|
||||
```
|
||||
|
||||
Notice the compression of disk storage is about 1/3 of the uncompressed size:
|
||||
|
||||
```response
|
||||
┌──────count─┬─formatReadableQuantity(sum(rows))─┬─disk_size──┬─uncompressed_size─┐
|
||||
│ 6739503568 │ 6.74 billion │ 501.10 GiB │ 1.51 TiB │
|
||||
└────────────┴───────────────────────────────────┴────────────┴───────────────────┘
|
||||
┌───────count─┬─formatReadableQuantity(sum(rows))─┬─disk_size─┬─uncompressed_size─┐
|
||||
│ 14688534662 │ 14.69 billion │ 1.03 TiB │ 3.26 TiB │
|
||||
└─────────────┴───────────────────────────────────┴───────────┴───────────────────┘
|
||||
|
||||
1 row in set. Elapsed: 0.010 sec.
|
||||
1 row in set. Elapsed: 0.005 sec.
|
||||
```
|
||||
|
||||
9. The following query shows how many comments, authors and subreddits we have for each month:
|
||||
@ -256,185 +300,216 @@ GROUP BY firstOfMonth
|
||||
ORDER BY firstOfMonth ASC;
|
||||
```
|
||||
|
||||
This is a substantial query that has to process all 6.74 billion rows, but we still get an impressive response time (about 3 minutes):
|
||||
This is a substantial query that has to process all 14.69 billion rows, but we still get an impressive response time (about 48 seconds):
|
||||
|
||||
```response
|
||||
┌─firstOfMonth─┬─────────c─┬─bar_count─────────────────┬─authors─┬─bar_authors───────────────┬─subreddits─┬─bar_subreddits────────────┐
|
||||
│ 2005-12-01 │ 1075 │ │ 394 │ │ 1 │ │
|
||||
│ 2006-01-01 │ 3666 │ │ 791 │ │ 2 │ │
|
||||
│ 2006-02-01 │ 9095 │ │ 1464 │ │ 18 │ │
|
||||
│ 2006-03-01 │ 13859 │ │ 1958 │ │ 15 │ │
|
||||
│ 2006-04-01 │ 19090 │ │ 2334 │ │ 21 │ │
|
||||
│ 2006-05-01 │ 26859 │ │ 2698 │ │ 21 │ │
|
||||
│ 2006-06-01 │ 29163 │ │ 3043 │ │ 19 │ │
|
||||
│ 2006-07-01 │ 37031 │ │ 3532 │ │ 22 │ │
|
||||
│ 2006-08-01 │ 50559 │ │ 4750 │ │ 24 │ │
|
||||
│ 2006-09-01 │ 50675 │ │ 4908 │ │ 21 │ │
|
||||
│ 2006-10-01 │ 54148 │ │ 5654 │ │ 31 │ │
|
||||
│ 2006-11-01 │ 62021 │ │ 6490 │ │ 23 │ │
|
||||
│ 2006-12-01 │ 61018 │ │ 6707 │ │ 24 │ │
|
||||
│ 2007-01-01 │ 81341 │ │ 7931 │ │ 23 │ │
|
||||
│ 2007-02-01 │ 95634 │ │ 9020 │ │ 21 │ │
|
||||
│ 2007-03-01 │ 112444 │ │ 10842 │ │ 23 │ │
|
||||
│ 2007-04-01 │ 126773 │ │ 10701 │ │ 26 │ │
|
||||
│ 2007-05-01 │ 170097 │ │ 11365 │ │ 25 │ │
|
||||
│ 2007-06-01 │ 178800 │ │ 11267 │ │ 22 │ │
|
||||
│ 2007-07-01 │ 203319 │ │ 12482 │ │ 25 │ │
|
||||
│ 2007-08-01 │ 225111 │ │ 14124 │ │ 30 │ │
|
||||
│ 2007-09-01 │ 259497 │ ▏ │ 15416 │ │ 33 │ │
|
||||
│ 2007-10-01 │ 274170 │ ▏ │ 15302 │ │ 36 │ │
|
||||
│ 2007-11-01 │ 372983 │ ▏ │ 15134 │ │ 43 │ │
|
||||
│ 2007-12-01 │ 363390 │ ▏ │ 15915 │ │ 31 │ │
|
||||
│ 2008-01-01 │ 452990 │ ▏ │ 18857 │ │ 126 │ │
|
||||
│ 2008-02-01 │ 441768 │ ▏ │ 18266 │ │ 173 │ │
|
||||
│ 2008-03-01 │ 463728 │ ▏ │ 18947 │ │ 292 │ │
|
||||
│ 2008-04-01 │ 468317 │ ▏ │ 18590 │ │ 323 │ │
|
||||
│ 2008-05-01 │ 536380 │ ▎ │ 20861 │ │ 375 │ │
|
||||
│ 2008-06-01 │ 577684 │ ▎ │ 22557 │ │ 575 │ ▏ │
|
||||
│ 2008-07-01 │ 592610 │ ▎ │ 23123 │ │ 657 │ ▏ │
|
||||
│ 2008-08-01 │ 595959 │ ▎ │ 23729 │ │ 707 │ ▏ │
|
||||
│ 2008-09-01 │ 680892 │ ▎ │ 26374 │ ▏ │ 801 │ ▏ │
|
||||
│ 2008-10-01 │ 789874 │ ▍ │ 28970 │ ▏ │ 893 │ ▏ │
|
||||
│ 2008-11-01 │ 792310 │ ▍ │ 30272 │ ▏ │ 1024 │ ▎ │
|
||||
│ 2008-12-01 │ 850359 │ ▍ │ 34073 │ ▏ │ 1103 │ ▎ │
|
||||
│ 2009-01-01 │ 1051649 │ ▌ │ 38978 │ ▏ │ 1316 │ ▎ │
|
||||
│ 2009-02-01 │ 944711 │ ▍ │ 43390 │ ▏ │ 1132 │ ▎ │
|
||||
│ 2009-03-01 │ 1048643 │ ▌ │ 46516 │ ▏ │ 1203 │ ▎ │
|
||||
│ 2009-04-01 │ 1094599 │ ▌ │ 48284 │ ▏ │ 1334 │ ▎ │
|
||||
│ 2009-05-01 │ 1201257 │ ▌ │ 52512 │ ▎ │ 1395 │ ▎ │
|
||||
│ 2009-06-01 │ 1258750 │ ▋ │ 57728 │ ▎ │ 1473 │ ▎ │
|
||||
│ 2009-07-01 │ 1470290 │ ▋ │ 60098 │ ▎ │ 1686 │ ▍ │
|
||||
│ 2009-08-01 │ 1750688 │ ▉ │ 67347 │ ▎ │ 1777 │ ▍ │
|
||||
│ 2009-09-01 │ 2032276 │ █ │ 78051 │ ▍ │ 1784 │ ▍ │
|
||||
│ 2009-10-01 │ 2242017 │ █ │ 93409 │ ▍ │ 2071 │ ▌ │
|
||||
│ 2009-11-01 │ 2207444 │ █ │ 95940 │ ▍ │ 2141 │ ▌ │
|
||||
│ 2009-12-01 │ 2560510 │ █▎ │ 104239 │ ▌ │ 2141 │ ▌ │
|
||||
│ 2010-01-01 │ 2884096 │ █▍ │ 114314 │ ▌ │ 2313 │ ▌ │
|
||||
│ 2010-02-01 │ 2687779 │ █▎ │ 115683 │ ▌ │ 2522 │ ▋ │
|
||||
│ 2010-03-01 │ 3228254 │ █▌ │ 125775 │ ▋ │ 2890 │ ▋ │
|
||||
│ 2010-04-01 │ 3209898 │ █▌ │ 128936 │ ▋ │ 3170 │ ▊ │
|
||||
│ 2010-05-01 │ 3267363 │ █▋ │ 131851 │ ▋ │ 3166 │ ▊ │
|
||||
│ 2010-06-01 │ 3532867 │ █▊ │ 139522 │ ▋ │ 3301 │ ▊ │
|
||||
│ 2010-07-01 │ 4032737 │ ██ │ 153451 │ ▊ │ 3662 │ ▉ │
|
||||
│ 2010-08-01 │ 4247982 │ ██ │ 164071 │ ▊ │ 3653 │ ▉ │
|
||||
│ 2010-09-01 │ 4704069 │ ██▎ │ 186613 │ ▉ │ 4009 │ █ │
|
||||
│ 2010-10-01 │ 5032368 │ ██▌ │ 203800 │ █ │ 4154 │ █ │
|
||||
│ 2010-11-01 │ 5689002 │ ██▊ │ 226134 │ █▏ │ 4383 │ █ │
|
||||
│ 2010-12-01 │ 5972642 │ ██▉ │ 245824 │ █▏ │ 4692 │ █▏ │
|
||||
│ 2011-01-01 │ 6603329 │ ███▎ │ 270025 │ █▎ │ 5141 │ █▎ │
|
||||
│ 2011-02-01 │ 6363114 │ ███▏ │ 277593 │ █▍ │ 5202 │ █▎ │
|
||||
│ 2011-03-01 │ 7556165 │ ███▊ │ 314748 │ █▌ │ 5445 │ █▎ │
|
||||
│ 2011-04-01 │ 7571398 │ ███▊ │ 329920 │ █▋ │ 6128 │ █▌ │
|
||||
│ 2011-05-01 │ 8803949 │ ████▍ │ 365013 │ █▊ │ 6834 │ █▋ │
|
||||
│ 2011-06-01 │ 9766511 │ ████▉ │ 393945 │ █▉ │ 7519 │ █▉ │
|
||||
│ 2011-07-01 │ 10557466 │ █████▎ │ 424235 │ ██ │ 8293 │ ██ │
|
||||
│ 2011-08-01 │ 12316144 │ ██████▏ │ 475326 │ ██▍ │ 9657 │ ██▍ │
|
||||
│ 2011-09-01 │ 12150412 │ ██████ │ 503142 │ ██▌ │ 10278 │ ██▌ │
|
||||
│ 2011-10-01 │ 13470278 │ ██████▋ │ 548801 │ ██▋ │ 10922 │ ██▋ │
|
||||
│ 2011-11-01 │ 13621533 │ ██████▊ │ 574435 │ ██▊ │ 11572 │ ██▉ │
|
||||
│ 2011-12-01 │ 14509469 │ ███████▎ │ 622849 │ ███ │ 12335 │ ███ │
|
||||
│ 2012-01-01 │ 16350205 │ ████████▏ │ 696110 │ ███▍ │ 14281 │ ███▌ │
|
||||
│ 2012-02-01 │ 16015695 │ ████████ │ 722892 │ ███▌ │ 14949 │ ███▋ │
|
||||
│ 2012-03-01 │ 17881943 │ ████████▉ │ 789664 │ ███▉ │ 15795 │ ███▉ │
|
||||
│ 2012-04-01 │ 19044534 │ █████████▌ │ 842491 │ ████▏ │ 16440 │ ████ │
|
||||
│ 2012-05-01 │ 20388260 │ ██████████▏ │ 886176 │ ████▍ │ 16974 │ ████▏ │
|
||||
│ 2012-06-01 │ 21897913 │ ██████████▉ │ 946798 │ ████▋ │ 17952 │ ████▍ │
|
||||
│ 2012-07-01 │ 24087517 │ ████████████ │ 1018636 │ █████ │ 19069 │ ████▊ │
|
||||
│ 2012-08-01 │ 25703326 │ ████████████▊ │ 1094445 │ █████▍ │ 20553 │ █████▏ │
|
||||
│ 2012-09-01 │ 23419524 │ ███████████▋ │ 1088491 │ █████▍ │ 20831 │ █████▏ │
|
||||
│ 2012-10-01 │ 24788236 │ ████████████▍ │ 1131885 │ █████▋ │ 21868 │ █████▍ │
|
||||
│ 2012-11-01 │ 24648302 │ ████████████▎ │ 1167608 │ █████▊ │ 21791 │ █████▍ │
|
||||
│ 2012-12-01 │ 26080276 │ █████████████ │ 1218402 │ ██████ │ 22622 │ █████▋ │
|
||||
│ 2013-01-01 │ 30365867 │ ███████████████▏ │ 1341703 │ ██████▋ │ 24696 │ ██████▏ │
|
||||
│ 2013-02-01 │ 27213960 │ █████████████▌ │ 1304756 │ ██████▌ │ 24514 │ ██████▏ │
|
||||
│ 2013-03-01 │ 30771274 │ ███████████████▍ │ 1391703 │ ██████▉ │ 25730 │ ██████▍ │
|
||||
│ 2013-04-01 │ 33259557 │ ████████████████▋ │ 1485971 │ ███████▍ │ 27294 │ ██████▊ │
|
||||
│ 2013-05-01 │ 33126225 │ ████████████████▌ │ 1506473 │ ███████▌ │ 27299 │ ██████▊ │
|
||||
│ 2013-06-01 │ 32648247 │ ████████████████▎ │ 1506650 │ ███████▌ │ 27450 │ ██████▊ │
|
||||
│ 2013-07-01 │ 34922133 │ █████████████████▍ │ 1561771 │ ███████▊ │ 28294 │ ███████ │
|
||||
│ 2013-08-01 │ 34766579 │ █████████████████▍ │ 1589781 │ ███████▉ │ 28943 │ ███████▏ │
|
||||
│ 2013-09-01 │ 31990369 │ ███████████████▉ │ 1570342 │ ███████▊ │ 29408 │ ███████▎ │
|
||||
│ 2013-10-01 │ 35940040 │ █████████████████▉ │ 1683770 │ ████████▍ │ 30273 │ ███████▌ │
|
||||
│ 2013-11-01 │ 37396497 │ ██████████████████▋ │ 1757467 │ ████████▊ │ 31173 │ ███████▊ │
|
||||
│ 2013-12-01 │ 39810216 │ ███████████████████▉ │ 1846204 │ █████████▏ │ 32326 │ ████████ │
|
||||
│ 2014-01-01 │ 42420655 │ █████████████████████▏ │ 1927229 │ █████████▋ │ 35603 │ ████████▉ │
|
||||
│ 2014-02-01 │ 38703362 │ ███████████████████▎ │ 1874067 │ █████████▎ │ 37007 │ █████████▎ │
|
||||
│ 2014-03-01 │ 42459956 │ █████████████████████▏ │ 1959888 │ █████████▊ │ 37948 │ █████████▍ │
|
||||
│ 2014-04-01 │ 42440735 │ █████████████████████▏ │ 1951369 │ █████████▊ │ 38362 │ █████████▌ │
|
||||
│ 2014-05-01 │ 42514094 │ █████████████████████▎ │ 1970197 │ █████████▊ │ 39078 │ █████████▊ │
|
||||
│ 2014-06-01 │ 41990650 │ ████████████████████▉ │ 1943850 │ █████████▋ │ 38268 │ █████████▌ │
|
||||
│ 2014-07-01 │ 46868899 │ ███████████████████████▍ │ 2059346 │ ██████████▎ │ 40634 │ ██████████▏ │
|
||||
│ 2014-08-01 │ 46990813 │ ███████████████████████▍ │ 2117335 │ ██████████▌ │ 41764 │ ██████████▍ │
|
||||
│ 2014-09-01 │ 44992201 │ ██████████████████████▍ │ 2124708 │ ██████████▌ │ 41890 │ ██████████▍ │
|
||||
│ 2014-10-01 │ 47497520 │ ███████████████████████▋ │ 2206535 │ ███████████ │ 43109 │ ██████████▊ │
|
||||
│ 2014-11-01 │ 46118074 │ ███████████████████████ │ 2239747 │ ███████████▏ │ 43718 │ ██████████▉ │
|
||||
│ 2014-12-01 │ 48807699 │ ████████████████████████▍ │ 2372945 │ ███████████▊ │ 43823 │ ██████████▉ │
|
||||
│ 2015-01-01 │ 53851542 │ █████████████████████████ │ 2499536 │ ████████████▍ │ 47172 │ ███████████▊ │
|
||||
│ 2015-02-01 │ 48342747 │ ████████████████████████▏ │ 2448496 │ ████████████▏ │ 47229 │ ███████████▊ │
|
||||
│ 2015-03-01 │ 54564441 │ █████████████████████████ │ 2550534 │ ████████████▊ │ 48156 │ ████████████ │
|
||||
│ 2015-04-01 │ 55005780 │ █████████████████████████ │ 2609443 │ █████████████ │ 49865 │ ████████████▍ │
|
||||
│ 2015-05-01 │ 54504410 │ █████████████████████████ │ 2585535 │ ████████████▉ │ 50137 │ ████████████▌ │
|
||||
│ 2015-06-01 │ 54258492 │ █████████████████████████ │ 2595129 │ ████████████▉ │ 49598 │ ████████████▍ │
|
||||
│ 2015-07-01 │ 58451788 │ █████████████████████████ │ 2720026 │ █████████████▌ │ 55022 │ █████████████▊ │
|
||||
│ 2015-08-01 │ 58075327 │ █████████████████████████ │ 2743994 │ █████████████▋ │ 55302 │ █████████████▊ │
|
||||
│ 2015-09-01 │ 55574825 │ █████████████████████████ │ 2672793 │ █████████████▎ │ 53960 │ █████████████▍ │
|
||||
│ 2015-10-01 │ 59494045 │ █████████████████████████ │ 2816426 │ ██████████████ │ 70210 │ █████████████████▌ │
|
||||
│ 2015-11-01 │ 57117500 │ █████████████████████████ │ 2847146 │ ██████████████▏ │ 71363 │ █████████████████▊ │
|
||||
│ 2015-12-01 │ 58523312 │ █████████████████████████ │ 2854840 │ ██████████████▎ │ 94559 │ ███████████████████████▋ │
|
||||
│ 2016-01-01 │ 61991732 │ █████████████████████████ │ 2920366 │ ██████████████▌ │ 108438 │ █████████████████████████ │
|
||||
│ 2016-02-01 │ 59189875 │ █████████████████████████ │ 2854683 │ ██████████████▎ │ 109916 │ █████████████████████████ │
|
||||
│ 2016-03-01 │ 63918864 │ █████████████████████████ │ 2969542 │ ██████████████▊ │ 84787 │ █████████████████████▏ │
|
||||
│ 2016-04-01 │ 64271256 │ █████████████████████████ │ 2999086 │ ██████████████▉ │ 61647 │ ███████████████▍ │
|
||||
│ 2016-05-01 │ 65212004 │ █████████████████████████ │ 3034674 │ ███████████████▏ │ 67465 │ ████████████████▊ │
|
||||
│ 2016-06-01 │ 65867743 │ █████████████████████████ │ 3057604 │ ███████████████▎ │ 75170 │ ██████████████████▊ │
|
||||
│ 2016-07-01 │ 66974735 │ █████████████████████████ │ 3199374 │ ███████████████▉ │ 77732 │ ███████████████████▍ │
|
||||
│ 2016-08-01 │ 69654819 │ █████████████████████████ │ 3239957 │ ████████████████▏ │ 63080 │ ███████████████▊ │
|
||||
│ 2016-09-01 │ 67024973 │ █████████████████████████ │ 3190864 │ ███████████████▉ │ 62324 │ ███████████████▌ │
|
||||
│ 2016-10-01 │ 71826553 │ █████████████████████████ │ 3284340 │ ████████████████▍ │ 62549 │ ███████████████▋ │
|
||||
│ 2016-11-01 │ 71022319 │ █████████████████████████ │ 3300822 │ ████████████████▌ │ 69718 │ █████████████████▍ │
|
||||
│ 2016-12-01 │ 72942967 │ █████████████████████████ │ 3430324 │ █████████████████▏ │ 71705 │ █████████████████▉ │
|
||||
│ 2017-01-01 │ 78946585 │ █████████████████████████ │ 3572093 │ █████████████████▊ │ 78198 │ ███████████████████▌ │
|
||||
│ 2017-02-01 │ 70609487 │ █████████████████████████ │ 3421115 │ █████████████████ │ 69823 │ █████████████████▍ │
|
||||
│ 2017-03-01 │ 79723106 │ █████████████████████████ │ 3638122 │ ██████████████████▏ │ 73865 │ ██████████████████▍ │
|
||||
│ 2017-04-01 │ 77478009 │ █████████████████████████ │ 3620591 │ ██████████████████ │ 74387 │ ██████████████████▌ │
|
||||
│ 2017-05-01 │ 79810360 │ █████████████████████████ │ 3650820 │ ██████████████████▎ │ 74356 │ ██████████████████▌ │
|
||||
│ 2017-06-01 │ 79901711 │ █████████████████████████ │ 3737614 │ ██████████████████▋ │ 72114 │ ██████████████████ │
|
||||
│ 2017-07-01 │ 81798725 │ █████████████████████████ │ 3872330 │ ███████████████████▎ │ 76052 │ ███████████████████ │
|
||||
│ 2017-08-01 │ 84658503 │ █████████████████████████ │ 3960093 │ ███████████████████▊ │ 77798 │ ███████████████████▍ │
|
||||
│ 2017-09-01 │ 83165192 │ █████████████████████████ │ 3880501 │ ███████████████████▍ │ 78402 │ ███████████████████▌ │
|
||||
│ 2017-10-01 │ 85828912 │ █████████████████████████ │ 3980335 │ ███████████████████▉ │ 80685 │ ████████████████████▏ │
|
||||
│ 2017-11-01 │ 84965681 │ █████████████████████████ │ 4026749 │ ████████████████████▏ │ 82659 │ ████████████████████▋ │
|
||||
│ 2017-12-01 │ 85973810 │ █████████████████████████ │ 4196354 │ ████████████████████▉ │ 91984 │ ██████████████████████▉ │
|
||||
│ 2018-01-01 │ 91558594 │ █████████████████████████ │ 4364443 │ █████████████████████▊ │ 102577 │ █████████████████████████ │
|
||||
│ 2018-02-01 │ 86467179 │ █████████████████████████ │ 4277899 │ █████████████████████▍ │ 104610 │ █████████████████████████ │
|
||||
│ 2018-03-01 │ 96490262 │ █████████████████████████ │ 4422470 │ ██████████████████████ │ 112559 │ █████████████████████████ │
|
||||
│ 2018-04-01 │ 98101232 │ █████████████████████████ │ 4572434 │ ██████████████████████▊ │ 105284 │ █████████████████████████ │
|
||||
│ 2018-05-01 │ 100109100 │ █████████████████████████ │ 4698908 │ ███████████████████████▍ │ 103910 │ █████████████████████████ │
|
||||
│ 2018-06-01 │ 100009462 │ █████████████████████████ │ 4697426 │ ███████████████████████▍ │ 101107 │ █████████████████████████ │
|
||||
│ 2018-07-01 │ 108151359 │ █████████████████████████ │ 5099492 │ █████████████████████████ │ 106184 │ █████████████████████████ │
|
||||
│ 2018-08-01 │ 107330940 │ █████████████████████████ │ 5084082 │ █████████████████████████ │ 109985 │ █████████████████████████ │
|
||||
│ 2018-09-01 │ 104473929 │ █████████████████████████ │ 5011953 │ █████████████████████████ │ 109710 │ █████████████████████████ │
|
||||
│ 2018-10-01 │ 112346556 │ █████████████████████████ │ 5320405 │ █████████████████████████ │ 112533 │ █████████████████████████ │
|
||||
│ 2018-11-01 │ 112573001 │ █████████████████████████ │ 5353282 │ █████████████████████████ │ 112211 │ █████████████████████████ │
|
||||
│ 2018-12-01 │ 121953600 │ █████████████████████████ │ 5611543 │ █████████████████████████ │ 118291 │ █████████████████████████ │
|
||||
│ 2019-01-01 │ 129386587 │ █████████████████████████ │ 6016687 │ █████████████████████████ │ 125725 │ █████████████████████████ │
|
||||
│ 2019-02-01 │ 120645639 │ █████████████████████████ │ 5974488 │ █████████████████████████ │ 125420 │ █████████████████████████ │
|
||||
│ 2019-03-01 │ 137650471 │ █████████████████████████ │ 6410197 │ █████████████████████████ │ 135924 │ █████████████████████████ │
|
||||
│ 2019-04-01 │ 138473643 │ █████████████████████████ │ 6416384 │ █████████████████████████ │ 139844 │ █████████████████████████ │
|
||||
│ 2019-05-01 │ 142463421 │ █████████████████████████ │ 6574836 │ █████████████████████████ │ 142012 │ █████████████████████████ │
|
||||
│ 2019-06-01 │ 134172939 │ █████████████████████████ │ 6601267 │ █████████████████████████ │ 140997 │ █████████████████████████ │
|
||||
│ 2019-07-01 │ 145965083 │ █████████████████████████ │ 6901822 │ █████████████████████████ │ 147802 │ █████████████████████████ │
|
||||
│ 2019-08-01 │ 146854393 │ █████████████████████████ │ 6993882 │ █████████████████████████ │ 151888 │ █████████████████████████ │
|
||||
│ 2019-09-01 │ 137540219 │ █████████████████████████ │ 7001362 │ █████████████████████████ │ 148839 │ █████████████████████████ │
|
||||
│ 2019-10-01 │ 129771456 │ █████████████████████████ │ 6825690 │ █████████████████████████ │ 144453 │ █████████████████████████ │
|
||||
│ 2019-11-01 │ 107990259 │ █████████████████████████ │ 6368286 │ █████████████████████████ │ 141768 │ █████████████████████████ │
|
||||
│ 2019-12-01 │ 112895934 │ █████████████████████████ │ 6640902 │ █████████████████████████ │ 148277 │ █████████████████████████ │
|
||||
│ 2020-01-01 │ 54354879 │ █████████████████████████ │ 4782339 │ ███████████████████████▉ │ 111658 │ █████████████████████████ │
|
||||
│ 2020-02-01 │ 22696923 │ ███████████▎ │ 3135175 │ ███████████████▋ │ 79521 │ ███████████████████▉ │
|
||||
│ 2020-03-01 │ 3466677 │ █▋ │ 987960 │ ████▉ │ 40901 │ ██████████▏ │
|
||||
└──────────────┴───────────┴───────────────────────────┴─────────┴───────────────────────────┴────────────┴───────────────────────────┘
|
||||
┌─firstOfMonth─┬─────────c─┬─bar_count─────────────────┬──authors─┬─bar_authors───────────────┬─subreddits─┬─bar_subreddits────────────┐
|
||||
│ 2005-12-01 │ 1075 │ │ 394 │ │ 1 │ │
|
||||
│ 2006-01-01 │ 3666 │ │ 791 │ │ 2 │ │
|
||||
│ 2006-02-01 │ 9095 │ │ 1464 │ │ 18 │ │
|
||||
│ 2006-03-01 │ 13859 │ │ 1958 │ │ 15 │ │
|
||||
│ 2006-04-01 │ 19090 │ │ 2334 │ │ 21 │ │
|
||||
│ 2006-05-01 │ 26859 │ │ 2698 │ │ 21 │ │
|
||||
│ 2006-06-01 │ 29163 │ │ 3043 │ │ 19 │ │
|
||||
│ 2006-07-01 │ 37031 │ │ 3532 │ │ 22 │ │
|
||||
│ 2006-08-01 │ 50559 │ │ 4750 │ │ 24 │ │
|
||||
│ 2006-09-01 │ 50675 │ │ 4908 │ │ 21 │ │
|
||||
│ 2006-10-01 │ 54148 │ │ 5654 │ │ 31 │ │
|
||||
│ 2006-11-01 │ 62021 │ │ 6490 │ │ 23 │ │
|
||||
│ 2006-12-01 │ 61018 │ │ 6707 │ │ 24 │ │
|
||||
│ 2007-01-01 │ 81341 │ │ 7931 │ │ 23 │ │
|
||||
│ 2007-02-01 │ 95634 │ │ 9020 │ │ 21 │ │
|
||||
│ 2007-03-01 │ 112444 │ │ 10842 │ │ 23 │ │
|
||||
│ 2007-04-01 │ 126773 │ │ 10701 │ │ 26 │ │
|
||||
│ 2007-05-01 │ 170097 │ │ 11365 │ │ 25 │ │
|
||||
│ 2007-06-01 │ 178800 │ │ 11267 │ │ 22 │ │
|
||||
│ 2007-07-01 │ 203319 │ │ 12482 │ │ 25 │ │
|
||||
│ 2007-08-01 │ 225111 │ │ 14124 │ │ 30 │ │
|
||||
│ 2007-09-01 │ 259497 │ ▏ │ 15416 │ │ 33 │ │
|
||||
│ 2007-10-01 │ 274170 │ ▏ │ 15302 │ │ 36 │ │
|
||||
│ 2007-11-01 │ 372983 │ ▏ │ 15134 │ │ 43 │ │
|
||||
│ 2007-12-01 │ 363390 │ ▏ │ 15915 │ │ 31 │ │
|
||||
│ 2008-01-01 │ 452990 │ ▏ │ 18857 │ │ 126 │ │
|
||||
│ 2008-02-01 │ 441768 │ ▏ │ 18266 │ │ 173 │ │
|
||||
│ 2008-03-01 │ 463728 │ ▏ │ 18947 │ │ 292 │ │
|
||||
│ 2008-04-01 │ 468317 │ ▏ │ 18590 │ │ 323 │ │
|
||||
│ 2008-05-01 │ 536380 │ ▎ │ 20861 │ │ 375 │ │
|
||||
│ 2008-06-01 │ 577684 │ ▎ │ 22557 │ │ 575 │ ▏ │
|
||||
│ 2008-07-01 │ 592610 │ ▎ │ 23123 │ │ 657 │ ▏ │
|
||||
│ 2008-08-01 │ 595959 │ ▎ │ 23729 │ │ 707 │ ▏ │
|
||||
│ 2008-09-01 │ 680892 │ ▎ │ 26374 │ ▏ │ 801 │ ▏ │
|
||||
│ 2008-10-01 │ 789874 │ ▍ │ 28970 │ ▏ │ 893 │ ▏ │
|
||||
│ 2008-11-01 │ 792310 │ ▍ │ 30272 │ ▏ │ 1024 │ ▎ │
|
||||
│ 2008-12-01 │ 850359 │ ▍ │ 34073 │ ▏ │ 1103 │ ▎ │
|
||||
│ 2009-01-01 │ 1051649 │ ▌ │ 38978 │ ▏ │ 1316 │ ▎ │
|
||||
│ 2009-02-01 │ 944711 │ ▍ │ 43390 │ ▏ │ 1132 │ ▎ │
|
||||
│ 2009-03-01 │ 1048643 │ ▌ │ 46516 │ ▏ │ 1203 │ ▎ │
|
||||
│ 2009-04-01 │ 1094599 │ ▌ │ 48284 │ ▏ │ 1334 │ ▎ │
|
||||
│ 2009-05-01 │ 1201257 │ ▌ │ 52512 │ ▎ │ 1395 │ ▎ │
|
||||
│ 2009-06-01 │ 1258750 │ ▋ │ 57728 │ ▎ │ 1473 │ ▎ │
|
||||
│ 2009-07-01 │ 1470290 │ ▋ │ 60098 │ ▎ │ 1686 │ ▍ │
|
||||
│ 2009-08-01 │ 1750688 │ ▉ │ 67347 │ ▎ │ 1777 │ ▍ │
|
||||
│ 2009-09-01 │ 2032276 │ █ │ 78051 │ ▍ │ 1784 │ ▍ │
|
||||
│ 2009-10-01 │ 2242017 │ █ │ 93409 │ ▍ │ 2071 │ ▌ │
|
||||
│ 2009-11-01 │ 2207444 │ █ │ 95940 │ ▍ │ 2141 │ ▌ │
|
||||
│ 2009-12-01 │ 2560510 │ █▎ │ 104239 │ ▌ │ 2141 │ ▌ │
|
||||
│ 2010-01-01 │ 2884096 │ █▍ │ 114314 │ ▌ │ 2313 │ ▌ │
|
||||
│ 2010-02-01 │ 2687779 │ █▎ │ 115683 │ ▌ │ 2522 │ ▋ │
|
||||
│ 2010-03-01 │ 3228254 │ █▌ │ 125775 │ ▋ │ 2890 │ ▋ │
|
||||
│ 2010-04-01 │ 3209898 │ █▌ │ 128936 │ ▋ │ 3170 │ ▊ │
|
||||
│ 2010-05-01 │ 3267363 │ █▋ │ 131851 │ ▋ │ 3166 │ ▊ │
|
||||
│ 2010-06-01 │ 3532867 │ █▊ │ 139522 │ ▋ │ 3301 │ ▊ │
|
||||
│ 2010-07-01 │ 806612 │ ▍ │ 76486 │ ▍ │ 1955 │ ▍ │
|
||||
│ 2010-08-01 │ 4247982 │ ██ │ 164071 │ ▊ │ 3653 │ ▉ │
|
||||
│ 2010-09-01 │ 4704069 │ ██▎ │ 186613 │ ▉ │ 4009 │ █ │
|
||||
│ 2010-10-01 │ 5032368 │ ██▌ │ 203800 │ █ │ 4154 │ █ │
|
||||
│ 2010-11-01 │ 5689002 │ ██▊ │ 226134 │ █▏ │ 4383 │ █ │
|
||||
│ 2010-12-01 │ 3642690 │ █▊ │ 196847 │ ▉ │ 3914 │ ▉ │
|
||||
│ 2011-01-01 │ 3924540 │ █▉ │ 215057 │ █ │ 4240 │ █ │
|
||||
│ 2011-02-01 │ 3859131 │ █▉ │ 223485 │ █ │ 4371 │ █ │
|
||||
│ 2011-03-01 │ 2877996 │ █▍ │ 208607 │ █ │ 3870 │ ▉ │
|
||||
│ 2011-04-01 │ 3859131 │ █▉ │ 248931 │ █▏ │ 4881 │ █▏ │
|
||||
│ 2011-06-01 │ 3859131 │ █▉ │ 267197 │ █▎ │ 5255 │ █▎ │
|
||||
│ 2011-08-01 │ 2943405 │ █▍ │ 259428 │ █▎ │ 5806 │ █▍ │
|
||||
│ 2011-10-01 │ 3859131 │ █▉ │ 327342 │ █▋ │ 6958 │ █▋ │
|
||||
│ 2011-12-01 │ 3728313 │ █▊ │ 354817 │ █▊ │ 7713 │ █▉ │
|
||||
│ 2012-01-01 │ 16350205 │ ████████▏ │ 696110 │ ███▍ │ 14281 │ ███▌ │
|
||||
│ 2012-02-01 │ 16015695 │ ████████ │ 722892 │ ███▌ │ 14949 │ ███▋ │
|
||||
│ 2012-03-01 │ 17881943 │ ████████▉ │ 789664 │ ███▉ │ 15795 │ ███▉ │
|
||||
│ 2012-04-01 │ 19044534 │ █████████▌ │ 842491 │ ████▏ │ 16440 │ ████ │
|
||||
│ 2012-05-01 │ 20388260 │ ██████████▏ │ 886176 │ ████▍ │ 16974 │ ████▏ │
|
||||
│ 2012-06-01 │ 21897913 │ ██████████▉ │ 946798 │ ████▋ │ 17952 │ ████▍ │
|
||||
│ 2012-07-01 │ 24087517 │ ████████████ │ 1018636 │ █████ │ 19069 │ ████▊ │
|
||||
│ 2012-08-01 │ 25703326 │ ████████████▊ │ 1094445 │ █████▍ │ 20553 │ █████▏ │
|
||||
│ 2012-09-01 │ 23419524 │ ███████████▋ │ 1088491 │ █████▍ │ 20831 │ █████▏ │
|
||||
│ 2012-10-01 │ 24788236 │ ████████████▍ │ 1131885 │ █████▋ │ 21868 │ █████▍ │
|
||||
│ 2012-11-01 │ 24648302 │ ████████████▎ │ 1167608 │ █████▊ │ 21791 │ █████▍ │
|
||||
│ 2012-12-01 │ 26080276 │ █████████████ │ 1218402 │ ██████ │ 22622 │ █████▋ │
|
||||
│ 2013-01-01 │ 30365867 │ ███████████████▏ │ 1341703 │ ██████▋ │ 24696 │ ██████▏ │
|
||||
│ 2013-02-01 │ 27213960 │ █████████████▌ │ 1304756 │ ██████▌ │ 24514 │ ██████▏ │
|
||||
│ 2013-03-01 │ 30771274 │ ███████████████▍ │ 1391703 │ ██████▉ │ 25730 │ ██████▍ │
|
||||
│ 2013-04-01 │ 33259557 │ ████████████████▋ │ 1485971 │ ███████▍ │ 27294 │ ██████▊ │
|
||||
│ 2013-05-01 │ 33126225 │ ████████████████▌ │ 1506473 │ ███████▌ │ 27299 │ ██████▊ │
|
||||
│ 2013-06-01 │ 32648247 │ ████████████████▎ │ 1506650 │ ███████▌ │ 27450 │ ██████▊ │
|
||||
│ 2013-07-01 │ 34922133 │ █████████████████▍ │ 1561771 │ ███████▊ │ 28294 │ ███████ │
|
||||
│ 2013-08-01 │ 34766579 │ █████████████████▍ │ 1589781 │ ███████▉ │ 28943 │ ███████▏ │
|
||||
│ 2013-09-01 │ 31990369 │ ███████████████▉ │ 1570342 │ ███████▊ │ 29408 │ ███████▎ │
|
||||
│ 2013-10-01 │ 35940040 │ █████████████████▉ │ 1683770 │ ████████▍ │ 30273 │ ███████▌ │
|
||||
│ 2013-11-01 │ 37396497 │ ██████████████████▋ │ 1757467 │ ████████▊ │ 31173 │ ███████▊ │
|
||||
│ 2013-12-01 │ 39810216 │ ███████████████████▉ │ 1846204 │ █████████▏ │ 32326 │ ████████ │
|
||||
│ 2014-01-01 │ 42420655 │ █████████████████████▏ │ 1927229 │ █████████▋ │ 35603 │ ████████▉ │
|
||||
│ 2014-02-01 │ 38703362 │ ███████████████████▎ │ 1874067 │ █████████▎ │ 37007 │ █████████▎ │
|
||||
│ 2014-03-01 │ 42459956 │ █████████████████████▏ │ 1959888 │ █████████▊ │ 37948 │ █████████▍ │
|
||||
│ 2014-04-01 │ 42440735 │ █████████████████████▏ │ 1951369 │ █████████▊ │ 38362 │ █████████▌ │
|
||||
│ 2014-05-01 │ 42514094 │ █████████████████████▎ │ 1970197 │ █████████▊ │ 39078 │ █████████▊ │
|
||||
│ 2014-06-01 │ 41990650 │ ████████████████████▉ │ 1943850 │ █████████▋ │ 38268 │ █████████▌ │
|
||||
│ 2014-07-01 │ 46868899 │ ███████████████████████▍ │ 2059346 │ ██████████▎ │ 40634 │ ██████████▏ │
|
||||
│ 2014-08-01 │ 46990813 │ ███████████████████████▍ │ 2117335 │ ██████████▌ │ 41764 │ ██████████▍ │
|
||||
│ 2014-09-01 │ 44992201 │ ██████████████████████▍ │ 2124708 │ ██████████▌ │ 41890 │ ██████████▍ │
|
||||
│ 2014-10-01 │ 47497520 │ ███████████████████████▋ │ 2206535 │ ███████████ │ 43109 │ ██████████▊ │
|
||||
│ 2014-11-01 │ 46118074 │ ███████████████████████ │ 2239747 │ ███████████▏ │ 43718 │ ██████████▉ │
|
||||
│ 2014-12-01 │ 48807699 │ ████████████████████████▍ │ 2372945 │ ███████████▊ │ 43823 │ ██████████▉ │
|
||||
│ 2015-01-01 │ 53851542 │ █████████████████████████ │ 2499536 │ ████████████▍ │ 47172 │ ███████████▊ │
|
||||
│ 2015-02-01 │ 48342747 │ ████████████████████████▏ │ 2448496 │ ████████████▏ │ 47229 │ ███████████▊ │
|
||||
│ 2015-03-01 │ 54564441 │ █████████████████████████ │ 2550534 │ ████████████▊ │ 48156 │ ████████████ │
|
||||
│ 2015-04-01 │ 55005780 │ █████████████████████████ │ 2609443 │ █████████████ │ 49865 │ ████████████▍ │
|
||||
│ 2015-05-01 │ 54504410 │ █████████████████████████ │ 2585535 │ ████████████▉ │ 50137 │ ████████████▌ │
|
||||
│ 2015-06-01 │ 54258492 │ █████████████████████████ │ 2595129 │ ████████████▉ │ 49598 │ ████████████▍ │
|
||||
│ 2015-07-01 │ 58451788 │ █████████████████████████ │ 2720026 │ █████████████▌ │ 55022 │ █████████████▊ │
|
||||
│ 2015-08-01 │ 58075327 │ █████████████████████████ │ 2743994 │ █████████████▋ │ 55302 │ █████████████▊ │
|
||||
│ 2015-09-01 │ 55574825 │ █████████████████████████ │ 2672793 │ █████████████▎ │ 53960 │ █████████████▍ │
|
||||
│ 2015-10-01 │ 59494045 │ █████████████████████████ │ 2816426 │ ██████████████ │ 70210 │ █████████████████▌ │
|
||||
│ 2015-11-01 │ 57117500 │ █████████████████████████ │ 2847146 │ ██████████████▏ │ 71363 │ █████████████████▊ │
|
||||
│ 2015-12-01 │ 58523312 │ █████████████████████████ │ 2854840 │ ██████████████▎ │ 94559 │ ███████████████████████▋ │
|
||||
│ 2016-01-01 │ 61991732 │ █████████████████████████ │ 2920366 │ ██████████████▌ │ 108438 │ █████████████████████████ │
|
||||
│ 2016-02-01 │ 59189875 │ █████████████████████████ │ 2854683 │ ██████████████▎ │ 109916 │ █████████████████████████ │
|
||||
│ 2016-03-01 │ 63918864 │ █████████████████████████ │ 2969542 │ ██████████████▊ │ 84787 │ █████████████████████▏ │
|
||||
│ 2016-04-01 │ 64271256 │ █████████████████████████ │ 2999086 │ ██████████████▉ │ 61647 │ ███████████████▍ │
|
||||
│ 2016-05-01 │ 65212004 │ █████████████████████████ │ 3034674 │ ███████████████▏ │ 67465 │ ████████████████▊ │
|
||||
│ 2016-06-01 │ 65867743 │ █████████████████████████ │ 3057604 │ ███████████████▎ │ 75170 │ ██████████████████▊ │
|
||||
│ 2016-07-01 │ 66974735 │ █████████████████████████ │ 3199374 │ ███████████████▉ │ 77732 │ ███████████████████▍ │
|
||||
│ 2016-08-01 │ 69654819 │ █████████████████████████ │ 3239957 │ ████████████████▏ │ 63080 │ ███████████████▊ │
|
||||
│ 2016-09-01 │ 67024973 │ █████████████████████████ │ 3190864 │ ███████████████▉ │ 62324 │ ███████████████▌ │
|
||||
│ 2016-10-01 │ 71826553 │ █████████████████████████ │ 3284340 │ ████████████████▍ │ 62549 │ ███████████████▋ │
|
||||
│ 2016-11-01 │ 71022319 │ █████████████████████████ │ 3300822 │ ████████████████▌ │ 69718 │ █████████████████▍ │
|
||||
│ 2016-12-01 │ 72942967 │ █████████████████████████ │ 3430324 │ █████████████████▏ │ 71705 │ █████████████████▉ │
|
||||
│ 2017-01-01 │ 78946585 │ █████████████████████████ │ 3572093 │ █████████████████▊ │ 78198 │ ███████████████████▌ │
|
||||
│ 2017-02-01 │ 70609487 │ █████████████████████████ │ 3421115 │ █████████████████ │ 69823 │ █████████████████▍ │
|
||||
│ 2017-03-01 │ 79723106 │ █████████████████████████ │ 3638122 │ ██████████████████▏ │ 73865 │ ██████████████████▍ │
|
||||
│ 2017-04-01 │ 77478009 │ █████████████████████████ │ 3620591 │ ██████████████████ │ 74387 │ ██████████████████▌ │
|
||||
│ 2017-05-01 │ 79810360 │ █████████████████████████ │ 3650820 │ ██████████████████▎ │ 74356 │ ██████████████████▌ │
|
||||
│ 2017-06-01 │ 79901711 │ █████████████████████████ │ 3737614 │ ██████████████████▋ │ 72114 │ ██████████████████ │
|
||||
│ 2017-07-01 │ 81798725 │ █████████████████████████ │ 3872330 │ ███████████████████▎ │ 76052 │ ███████████████████ │
|
||||
│ 2017-08-01 │ 84658503 │ █████████████████████████ │ 3960093 │ ███████████████████▊ │ 77798 │ ███████████████████▍ │
|
||||
│ 2017-09-01 │ 83165192 │ █████████████████████████ │ 3880501 │ ███████████████████▍ │ 78402 │ ███████████████████▌ │
|
||||
│ 2017-10-01 │ 85828912 │ █████████████████████████ │ 3980335 │ ███████████████████▉ │ 80685 │ ████████████████████▏ │
|
||||
│ 2017-11-01 │ 84965681 │ █████████████████████████ │ 4026749 │ ████████████████████▏ │ 82659 │ ████████████████████▋ │
|
||||
│ 2017-12-01 │ 85973810 │ █████████████████████████ │ 4196354 │ ████████████████████▉ │ 91984 │ ██████████████████████▉ │
|
||||
│ 2018-01-01 │ 91558594 │ █████████████████████████ │ 4364443 │ █████████████████████▊ │ 102577 │ █████████████████████████ │
|
||||
│ 2018-02-01 │ 86467179 │ █████████████████████████ │ 4277899 │ █████████████████████▍ │ 104610 │ █████████████████████████ │
|
||||
│ 2018-03-01 │ 96490262 │ █████████████████████████ │ 4422470 │ ██████████████████████ │ 112559 │ █████████████████████████ │
|
||||
│ 2018-04-01 │ 98101232 │ █████████████████████████ │ 4572434 │ ██████████████████████▊ │ 105284 │ █████████████████████████ │
|
||||
│ 2018-05-01 │ 100109100 │ █████████████████████████ │ 4698908 │ ███████████████████████▍ │ 103910 │ █████████████████████████ │
|
||||
│ 2018-06-01 │ 100009462 │ █████████████████████████ │ 4697426 │ ███████████████████████▍ │ 101107 │ █████████████████████████ │
|
||||
│ 2018-07-01 │ 108151359 │ █████████████████████████ │ 5099492 │ █████████████████████████ │ 106184 │ █████████████████████████ │
|
||||
│ 2018-08-01 │ 107330940 │ █████████████████████████ │ 5084082 │ █████████████████████████ │ 109985 │ █████████████████████████ │
|
||||
│ 2018-09-01 │ 104473929 │ █████████████████████████ │ 5011953 │ █████████████████████████ │ 109710 │ █████████████████████████ │
|
||||
│ 2018-10-01 │ 112346556 │ █████████████████████████ │ 5320405 │ █████████████████████████ │ 112533 │ █████████████████████████ │
|
||||
│ 2018-11-01 │ 112573001 │ █████████████████████████ │ 5353282 │ █████████████████████████ │ 112211 │ █████████████████████████ │
|
||||
│ 2018-12-01 │ 121953600 │ █████████████████████████ │ 5611543 │ █████████████████████████ │ 118291 │ █████████████████████████ │
|
||||
│ 2019-01-01 │ 129386587 │ █████████████████████████ │ 6016687 │ █████████████████████████ │ 125725 │ █████████████████████████ │
|
||||
│ 2019-02-01 │ 120645639 │ █████████████████████████ │ 5974488 │ █████████████████████████ │ 125420 │ █████████████████████████ │
|
||||
│ 2019-03-01 │ 137650471 │ █████████████████████████ │ 6410197 │ █████████████████████████ │ 135924 │ █████████████████████████ │
|
||||
│ 2019-04-01 │ 138473643 │ █████████████████████████ │ 6416384 │ █████████████████████████ │ 139844 │ █████████████████████████ │
|
||||
│ 2019-05-01 │ 142463421 │ █████████████████████████ │ 6574836 │ █████████████████████████ │ 142012 │ █████████████████████████ │
|
||||
│ 2019-06-01 │ 134172939 │ █████████████████████████ │ 6601267 │ █████████████████████████ │ 140997 │ █████████████████████████ │
|
||||
│ 2019-07-01 │ 145965083 │ █████████████████████████ │ 6901822 │ █████████████████████████ │ 147802 │ █████████████████████████ │
|
||||
│ 2019-08-01 │ 146854393 │ █████████████████████████ │ 6993882 │ █████████████████████████ │ 151888 │ █████████████████████████ │
|
||||
│ 2019-09-01 │ 137540219 │ █████████████████████████ │ 7001362 │ █████████████████████████ │ 148839 │ █████████████████████████ │
|
||||
│ 2019-10-01 │ 145909884 │ █████████████████████████ │ 7160126 │ █████████████████████████ │ 152075 │ █████████████████████████ │
|
||||
│ 2019-11-01 │ 138512489 │ █████████████████████████ │ 7098723 │ █████████████████████████ │ 164597 │ █████████████████████████ │
|
||||
│ 2019-12-01 │ 146012313 │ █████████████████████████ │ 7438261 │ █████████████████████████ │ 166966 │ █████████████████████████ │
|
||||
│ 2020-01-01 │ 153498208 │ █████████████████████████ │ 7703548 │ █████████████████████████ │ 174390 │ █████████████████████████ │
|
||||
│ 2020-02-01 │ 148386817 │ █████████████████████████ │ 7582031 │ █████████████████████████ │ 170257 │ █████████████████████████ │
|
||||
│ 2020-03-01 │ 166266315 │ █████████████████████████ │ 8339049 │ █████████████████████████ │ 192460 │ █████████████████████████ │
|
||||
│ 2020-04-01 │ 178511581 │ █████████████████████████ │ 8991649 │ █████████████████████████ │ 202334 │ █████████████████████████ │
|
||||
│ 2020-05-01 │ 189993779 │ █████████████████████████ │ 9331358 │ █████████████████████████ │ 217357 │ █████████████████████████ │
|
||||
│ 2020-06-01 │ 187914434 │ █████████████████████████ │ 9085003 │ █████████████████████████ │ 223362 │ █████████████████████████ │
|
||||
│ 2020-07-01 │ 194244994 │ █████████████████████████ │ 9321706 │ █████████████████████████ │ 228222 │ █████████████████████████ │
|
||||
│ 2020-08-01 │ 196099301 │ █████████████████████████ │ 9368408 │ █████████████████████████ │ 230251 │ █████████████████████████ │
|
||||
│ 2020-09-01 │ 182549761 │ █████████████████████████ │ 9271571 │ █████████████████████████ │ 227889 │ █████████████████████████ │
|
||||
│ 2020-10-01 │ 186583890 │ █████████████████████████ │ 9396112 │ █████████████████████████ │ 233715 │ █████████████████████████ │
|
||||
│ 2020-11-01 │ 186083723 │ █████████████████████████ │ 9623053 │ █████████████████████████ │ 234963 │ █████████████████████████ │
|
||||
│ 2020-12-01 │ 191317162 │ █████████████████████████ │ 9898168 │ █████████████████████████ │ 249115 │ █████████████████████████ │
|
||||
│ 2021-01-01 │ 210496207 │ █████████████████████████ │ 10503943 │ █████████████████████████ │ 259805 │ █████████████████████████ │
|
||||
│ 2021-02-01 │ 193510365 │ █████████████████████████ │ 10215033 │ █████████████████████████ │ 253656 │ █████████████████████████ │
|
||||
│ 2021-03-01 │ 207454415 │ █████████████████████████ │ 10365629 │ █████████████████████████ │ 267263 │ █████████████████████████ │
|
||||
│ 2021-04-01 │ 204573086 │ █████████████████████████ │ 10391984 │ █████████████████████████ │ 270543 │ █████████████████████████ │
|
||||
│ 2021-05-01 │ 217655366 │ █████████████████████████ │ 10648130 │ █████████████████████████ │ 288555 │ █████████████████████████ │
|
||||
│ 2021-06-01 │ 208027069 │ █████████████████████████ │ 10397311 │ █████████████████████████ │ 291520 │ █████████████████████████ │
|
||||
│ 2021-07-01 │ 210955954 │ █████████████████████████ │ 10063967 │ █████████████████████████ │ 252061 │ █████████████████████████ │
|
||||
│ 2021-08-01 │ 225681244 │ █████████████████████████ │ 10383556 │ █████████████████████████ │ 254569 │ █████████████████████████ │
|
||||
│ 2021-09-01 │ 220086513 │ █████████████████████████ │ 10298344 │ █████████████████████████ │ 256826 │ █████████████████████████ │
|
||||
│ 2021-10-01 │ 227527379 │ █████████████████████████ │ 10729882 │ █████████████████████████ │ 283328 │ █████████████████████████ │
|
||||
│ 2021-11-01 │ 228289963 │ █████████████████████████ │ 10995197 │ █████████████████████████ │ 302386 │ █████████████████████████ │
|
||||
│ 2021-12-01 │ 235807471 │ █████████████████████████ │ 11312798 │ █████████████████████████ │ 313876 │ █████████████████████████ │
|
||||
│ 2022-01-01 │ 256766679 │ █████████████████████████ │ 12074520 │ █████████████████████████ │ 340407 │ █████████████████████████ │
|
||||
│ 2022-02-01 │ 219927645 │ █████████████████████████ │ 10846045 │ █████████████████████████ │ 293236 │ █████████████████████████ │
|
||||
│ 2022-03-01 │ 236554668 │ █████████████████████████ │ 11330285 │ █████████████████████████ │ 302387 │ █████████████████████████ │
|
||||
│ 2022-04-01 │ 231188077 │ █████████████████████████ │ 11697995 │ █████████████████████████ │ 316303 │ █████████████████████████ │
|
||||
│ 2022-05-01 │ 230492108 │ █████████████████████████ │ 11448584 │ █████████████████████████ │ 323725 │ █████████████████████████ │
|
||||
│ 2022-06-01 │ 218842949 │ █████████████████████████ │ 11400399 │ █████████████████████████ │ 324846 │ █████████████████████████ │
|
||||
│ 2022-07-01 │ 242504279 │ █████████████████████████ │ 12049204 │ █████████████████████████ │ 335621 │ █████████████████████████ │
|
||||
│ 2022-08-01 │ 247215325 │ █████████████████████████ │ 12189276 │ █████████████████████████ │ 337873 │ █████████████████████████ │
|
||||
│ 2022-09-01 │ 234131223 │ █████████████████████████ │ 11674079 │ █████████████████████████ │ 326325 │ █████████████████████████ │
|
||||
│ 2022-10-01 │ 237365072 │ █████████████████████████ │ 11804508 │ █████████████████████████ │ 336063 │ █████████████████████████ │
|
||||
│ 2022-11-01 │ 229478878 │ █████████████████████████ │ 11543020 │ █████████████████████████ │ 323122 │ █████████████████████████ │
|
||||
│ 2022-12-01 │ 238862690 │ █████████████████████████ │ 11967451 │ █████████████████████████ │ 331668 │ █████████████████████████ │
|
||||
│ 2023-01-01 │ 253577512 │ █████████████████████████ │ 12264087 │ █████████████████████████ │ 332711 │ █████████████████████████ │
|
||||
│ 2023-02-01 │ 221285501 │ █████████████████████████ │ 11537091 │ █████████████████████████ │ 317879 │ █████████████████████████ │
|
||||
└──────────────┴───────────┴───────────────────────────┴──────────┴───────────────────────────┴────────────┴───────────────────────────┘
|
||||
|
||||
172 rows in set. Elapsed: 184.809 sec. Processed 6.74 billion rows, 89.56 GB (36.47 million rows/s., 484.62 MB/s.)
|
||||
203 rows in set. Elapsed: 48.492 sec. Processed 14.69 billion rows, 213.35 GB (302.91 million rows/s., 4.40 GB/s.)
|
||||
```
|
||||
|
||||
10. Here are the top 10 subreddits of 2022:
|
||||
@ -450,23 +525,21 @@ ORDER BY count DESC
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
The response is:
|
||||
|
||||
```response
|
||||
┌─subreddit────────┬───count─┐
|
||||
│ AskReddit │ 3858203 │
|
||||
│ politics │ 1356782 │
|
||||
│ memes │ 1249120 │
|
||||
│ nfl │ 883667 │
|
||||
│ worldnews │ 866065 │
|
||||
│ teenagers │ 777095 │
|
||||
│ AmItheAsshole │ 752720 │
|
||||
│ dankmemes │ 657932 │
|
||||
│ nba │ 514184 │
|
||||
│ unpopularopinion │ 473649 │
|
||||
└──────────────────┴─────────┘
|
||||
┌─subreddit──────┬────count─┐
|
||||
│ AskReddit │ 72312060 │
|
||||
│ AmItheAsshole │ 25323210 │
|
||||
│ teenagers │ 22355960 │
|
||||
│ worldnews │ 17797707 │
|
||||
│ FreeKarma4U │ 15652274 │
|
||||
│ FreeKarma4You │ 14929055 │
|
||||
│ wallstreetbets │ 14235271 │
|
||||
│ politics │ 12511136 │
|
||||
│ memes │ 11610792 │
|
||||
│ nba │ 11586571 │
|
||||
└────────────────┴──────────┘
|
||||
|
||||
10 rows in set. Elapsed: 27.824 sec. Processed 6.74 billion rows, 53.26 GB (242.22 million rows/s., 1.91 GB/s.)
|
||||
10 rows in set. Elapsed: 5.956 sec. Processed 14.69 billion rows, 126.19 GB (2.47 billion rows/s., 21.19 GB/s.)
|
||||
```
|
||||
|
||||
11. Let's see which subreddits had the biggest increase in commnents from 2018 to 2019:
|
||||
@ -502,62 +575,62 @@ It looks like memes and teenagers were busy on Reddit in 2019:
|
||||
|
||||
```response
|
||||
┌─subreddit────────────┬─────diff─┐
|
||||
│ memes │ 15368369 │
|
||||
│ AskReddit │ 14663662 │
|
||||
│ teenagers │ 12266991 │
|
||||
│ AmItheAsshole │ 11561538 │
|
||||
│ dankmemes │ 11305158 │
|
||||
│ unpopularopinion │ 6332772 │
|
||||
│ PewdiepieSubmissions │ 5930818 │
|
||||
│ Market76 │ 5014668 │
|
||||
│ relationship_advice │ 3776383 │
|
||||
│ freefolk │ 3169236 │
|
||||
│ Minecraft │ 3160241 │
|
||||
│ classicwow │ 2907056 │
|
||||
│ Animemes │ 2673398 │
|
||||
│ gameofthrones │ 2402835 │
|
||||
│ PublicFreakout │ 2267605 │
|
||||
│ ShitPostCrusaders │ 2207266 │
|
||||
│ RoastMe │ 2195715 │
|
||||
│ gonewild │ 2148649 │
|
||||
│ AnthemTheGame │ 1803818 │
|
||||
│ entitledparents │ 1706270 │
|
||||
│ MortalKombat │ 1679508 │
|
||||
│ Cringetopia │ 1620555 │
|
||||
│ pokemon │ 1615266 │
|
||||
│ HistoryMemes │ 1608289 │
|
||||
│ Brawlstars │ 1574977 │
|
||||
│ iamatotalpieceofshit │ 1558315 │
|
||||
│ trashy │ 1518549 │
|
||||
│ ChapoTrapHouse │ 1505748 │
|
||||
│ Pikabu │ 1501001 │
|
||||
│ Showerthoughts │ 1475101 │
|
||||
│ cursedcomments │ 1465607 │
|
||||
│ ukpolitics │ 1386043 │
|
||||
│ wallstreetbets │ 1384431 │
|
||||
│ interestingasfuck │ 1378900 │
|
||||
│ wholesomememes │ 1353333 │
|
||||
│ AskOuija │ 1233263 │
|
||||
│ borderlands3 │ 1197192 │
|
||||
│ aww │ 1168257 │
|
||||
│ insanepeoplefacebook │ 1155473 │
|
||||
│ FortniteCompetitive │ 1122778 │
|
||||
│ EpicSeven │ 1117380 │
|
||||
│ FreeKarma4U │ 1116423 │
|
||||
│ YangForPresidentHQ │ 1086700 │
|
||||
│ SquaredCircle │ 1044089 │
|
||||
│ MurderedByWords │ 1042511 │
|
||||
│ AskMen │ 1024434 │
|
||||
│ thedivision │ 1016634 │
|
||||
│ barstoolsports │ 985032 │
|
||||
│ nfl │ 978340 │
|
||||
│ BattlefieldV │ 971408 │
|
||||
│ AskReddit │ 18765909 │
|
||||
│ memes │ 16496996 │
|
||||
│ teenagers │ 13071715 │
|
||||
│ AmItheAsshole │ 12312663 │
|
||||
│ dankmemes │ 12016716 │
|
||||
│ unpopularopinion │ 6809935 │
|
||||
│ PewdiepieSubmissions │ 6330844 │
|
||||
│ Market76 │ 5213690 │
|
||||
│ relationship_advice │ 4060717 │
|
||||
│ Minecraft │ 3328659 │
|
||||
│ freefolk │ 3227970 │
|
||||
│ classicwow │ 3063133 │
|
||||
│ Animemes │ 2866876 │
|
||||
│ gonewild │ 2457680 │
|
||||
│ PublicFreakout │ 2452288 │
|
||||
│ gameofthrones │ 2411661 │
|
||||
│ RoastMe │ 2378781 │
|
||||
│ ShitPostCrusaders │ 2345414 │
|
||||
│ AnthemTheGame │ 1813152 │
|
||||
│ nfl │ 1804407 │
|
||||
│ Showerthoughts │ 1797968 │
|
||||
│ Cringetopia │ 1764034 │
|
||||
│ pokemon │ 1763269 │
|
||||
│ entitledparents │ 1744852 │
|
||||
│ HistoryMemes │ 1721645 │
|
||||
│ MortalKombat │ 1718184 │
|
||||
│ trashy │ 1684357 │
|
||||
│ ChapoTrapHouse │ 1675363 │
|
||||
│ Brawlstars │ 1663763 │
|
||||
│ iamatotalpieceofshit │ 1647381 │
|
||||
│ ukpolitics │ 1599204 │
|
||||
│ cursedcomments │ 1590781 │
|
||||
│ Pikabu │ 1578597 │
|
||||
│ wallstreetbets │ 1535225 │
|
||||
│ AskOuija │ 1533214 │
|
||||
│ interestingasfuck │ 1528910 │
|
||||
│ aww │ 1439008 │
|
||||
│ wholesomememes │ 1436566 │
|
||||
│ SquaredCircle │ 1432172 │
|
||||
│ insanepeoplefacebook │ 1290686 │
|
||||
│ borderlands3 │ 1274462 │
|
||||
│ FreeKarma4U │ 1217769 │
|
||||
│ YangForPresidentHQ │ 1186918 │
|
||||
│ FortniteCompetitive │ 1184508 │
|
||||
│ AskMen │ 1180820 │
|
||||
│ EpicSeven │ 1172061 │
|
||||
│ MurderedByWords │ 1112476 │
|
||||
│ politics │ 1084087 │
|
||||
│ barstoolsports │ 1068020 │
|
||||
│ BattlefieldV │ 1053878 │
|
||||
└──────────────────────┴──────────┘
|
||||
|
||||
50 rows in set. Elapsed: 65.954 sec. Processed 13.48 billion rows, 79.67 GB (204.37 million rows/s., 1.21 GB/s.)
|
||||
50 rows in set. Elapsed: 10.680 sec. Processed 29.38 billion rows, 198.67 GB (2.75 billion rows/s., 18.60 GB/s.)
|
||||
```
|
||||
|
||||
12. One more query: let's compare ClickHouse mentions to other technologies like Snowflake and Postgres. This query is a big one because it has to search all the comments three times for a substring, and unfortunately ClickHouse user are obviously not very active on Reddit yet:
|
||||
12. One more query: let's compare ClickHouse mentions to other technologies like Snowflake and Postgres. This query is a big one because it has to search all 14.69 billion comments three times for a substring, but the performance is actually quite impressive. (Unfortunately ClickHouse users are not very active on Reddit yet):
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
@ -571,7 +644,7 @@ ORDER BY quarter ASC;
|
||||
```
|
||||
|
||||
```response
|
||||
┌────Quarter─┬─clickhouse─┬─snowflake─┬─postgres─┐
|
||||
┌────quarter─┬─clickhouse─┬─snowflake─┬─postgres─┐
|
||||
│ 2005-10-01 │ 0 │ 0 │ 0 │
|
||||
│ 2006-01-01 │ 0 │ 2 │ 23 │
|
||||
│ 2006-04-01 │ 0 │ 2 │ 24 │
|
||||
@ -591,12 +664,12 @@ ORDER BY quarter ASC;
|
||||
│ 2009-10-01 │ 0 │ 633 │ 589 │
|
||||
│ 2010-01-01 │ 0 │ 555 │ 501 │
|
||||
│ 2010-04-01 │ 0 │ 587 │ 469 │
|
||||
│ 2010-07-01 │ 0 │ 770 │ 821 │
|
||||
│ 2010-10-01 │ 0 │ 1480 │ 550 │
|
||||
│ 2011-01-01 │ 0 │ 1482 │ 568 │
|
||||
│ 2011-04-01 │ 0 │ 1558 │ 406 │
|
||||
│ 2011-07-01 │ 0 │ 2163 │ 628 │
|
||||
│ 2011-10-01 │ 0 │ 4064 │ 566 │
|
||||
│ 2010-07-01 │ 0 │ 601 │ 696 │
|
||||
│ 2010-10-01 │ 0 │ 1246 │ 505 │
|
||||
│ 2011-01-01 │ 0 │ 758 │ 247 │
|
||||
│ 2011-04-01 │ 0 │ 537 │ 113 │
|
||||
│ 2011-07-01 │ 0 │ 173 │ 64 │
|
||||
│ 2011-10-01 │ 0 │ 649 │ 96 │
|
||||
│ 2012-01-01 │ 0 │ 4621 │ 662 │
|
||||
│ 2012-04-01 │ 0 │ 5737 │ 785 │
|
||||
│ 2012-07-01 │ 0 │ 6097 │ 1127 │
|
||||
@ -628,9 +701,21 @@ ORDER BY quarter ASC;
|
||||
│ 2019-01-01 │ 14 │ 80250 │ 4305 │
|
||||
│ 2019-04-01 │ 30 │ 70307 │ 3872 │
|
||||
│ 2019-07-01 │ 33 │ 77149 │ 4164 │
|
||||
│ 2019-10-01 │ 13 │ 76746 │ 3541 │
|
||||
│ 2020-01-01 │ 16 │ 54475 │ 846 │
|
||||
│ 2019-10-01 │ 22 │ 113011 │ 4369 │
|
||||
│ 2020-01-01 │ 34 │ 238273 │ 5133 │
|
||||
│ 2020-04-01 │ 52 │ 454467 │ 6100 │
|
||||
│ 2020-07-01 │ 37 │ 406623 │ 5507 │
|
||||
│ 2020-10-01 │ 49 │ 212143 │ 5385 │
|
||||
│ 2021-01-01 │ 56 │ 151262 │ 5749 │
|
||||
│ 2021-04-01 │ 71 │ 119928 │ 6039 │
|
||||
│ 2021-07-01 │ 53 │ 110342 │ 5765 │
|
||||
│ 2021-10-01 │ 92 │ 121144 │ 6401 │
|
||||
│ 2022-01-01 │ 93 │ 107512 │ 6772 │
|
||||
│ 2022-04-01 │ 120 │ 91560 │ 6687 │
|
||||
│ 2022-07-01 │ 183 │ 99764 │ 7377 │
|
||||
│ 2022-10-01 │ 123 │ 99447 │ 7052 │
|
||||
│ 2023-01-01 │ 126 │ 58733 │ 4891 │
|
||||
└────────────┴────────────┴───────────┴──────────┘
|
||||
|
||||
58 rows in set. Elapsed: 2663.751 sec. Processed 6.74 billion rows, 1.21 TB (2.53 million rows/s., 454.37 MB/s.)
|
||||
70 rows in set. Elapsed: 325.835 sec. Processed 14.69 billion rows, 2.57 TB (45.08 million rows/s., 7.87 GB/s.)
|
||||
```
|
@ -28,23 +28,25 @@ The quickest and easiest way to get up and running with ClickHouse is to create
|
||||
For production installs of a specific release version see the [installation options](#available-installation-options) down below.
|
||||
:::
|
||||
|
||||
On Linux and macOS:
|
||||
On Linux, macOS and FreeBSD:
|
||||
|
||||
1. If you are just getting started and want to see what ClickHouse can do, the simplest way to download ClickHouse locally is to run the following command. It downloads a single binary for your operating system that can be used to run the ClickHouse server, clickhouse-client, clickhouse-local,
|
||||
ClickHouse Keeper, and other tools:
|
||||
1. If you are just getting started and want to see what ClickHouse can do, the simplest way to download ClickHouse locally is to run the
|
||||
following command. It downloads a single binary for your operating system that can be used to run the ClickHouse server,
|
||||
clickhouse-client, clickhouse-local, ClickHouse Keeper, and other tools:
|
||||
|
||||
```bash
|
||||
curl https://clickhouse.com/ | sh
|
||||
```
|
||||
|
||||
1. Run the following command to start the ClickHouse server:
|
||||
|
||||
```bash
|
||||
./clickhouse server
|
||||
```
|
||||
|
||||
The first time you run this script, the necessary files and folders are created in the current directory, then the server starts.
|
||||
The first time you run this script, the necessary files and folders are created in the current directory, then the server starts.
|
||||
|
||||
1. Open a new terminal and use the **clickhouse-client** to connect to your service:
|
||||
1. Open a new terminal and use the **./clickhouse client** to connect to your service:
|
||||
|
||||
```bash
|
||||
./clickhouse client
|
||||
@ -330,7 +332,9 @@ For production environments, it’s recommended to use the latest `stable`-versi
|
||||
|
||||
To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.docker.com/r/clickhouse/clickhouse-server/). Those images use official `deb` packages inside.
|
||||
|
||||
### From Sources {#from-sources}
|
||||
## Non-Production Deployments (Advanced)
|
||||
|
||||
### Compile From Source {#from-sources}
|
||||
|
||||
To manually compile ClickHouse, follow the instructions for [Linux](/docs/en/development/build.md) or [macOS](/docs/en/development/build-osx.md).
|
||||
|
||||
@ -346,8 +350,33 @@ You’ll need to create data and metadata folders manually and `chown` them for
|
||||
|
||||
On Gentoo, you can just use `emerge clickhouse` to install ClickHouse from sources.
|
||||
|
||||
### From CI checks pre-built binaries
|
||||
ClickHouse binaries are built for each [commit](/docs/en/development/build.md#you-dont-have-to-build-clickhouse).
|
||||
### Install a CI-generated Binary
|
||||
|
||||
ClickHouse's continuous integration (CI) infrastructure produces specialized builds for each commit in the [ClickHouse
|
||||
repository](https://github.com/clickhouse/clickhouse/), e.g. [sanitized](https://github.com/google/sanitizers) builds, unoptimized (Debug)
|
||||
builds, cross-compiled builds etc. While such builds are normally only useful during development, they can in certain situations also be
|
||||
interesting for users.
|
||||
|
||||
:::note
|
||||
Since ClickHouse's CI is evolving over time, the exact steps to download CI-generated builds may vary.
|
||||
Also, CI may delete too old build artifacts, making them unavailable for download.
|
||||
:::
|
||||
|
||||
For example, to download a aarch64 binary for ClickHouse v23.4, follow these steps:
|
||||
|
||||
- Find the GitHub pull request for release v23.4: [Release pull request for branch 23.4](https://github.com/ClickHouse/ClickHouse/pull/49238)
|
||||
- Click "Commits", then click a commit similar to "Update autogenerated version to 23.4.2.1 and contributors" for the particular version you like to install.
|
||||
- Click the green check / yellow dot / red cross to open the list of CI checks.
|
||||
- Click "Details" next to "ClickHouse Build Check" in the list, it will open a page similar to [this page](https://s3.amazonaws.com/clickhouse-test-reports/46793/b460eb70bf29b19eadd19a1f959b15d186705394/clickhouse_build_check/report.html)
|
||||
- Find the rows with compiler = "clang-*-aarch64" - there are multiple rows.
|
||||
- Download the artifacts for these builds.
|
||||
|
||||
To download binaries for very old x86-64 systems without [SSE3](https://en.wikipedia.org/wiki/SSE3) support or old ARM systems without
|
||||
[ARMv8.1-A](https://en.wikipedia.org/wiki/AArch64#ARMv8.1-A) support, open a [pull
|
||||
request](https://github.com/ClickHouse/ClickHouse/commits/master) and find CI check "BuilderBinAmd64Compat", respectively
|
||||
"BuilderBinAarch64V80Compat". Then click "Details", open the "Build" fold, scroll to the end, find message "Notice: Build URLs
|
||||
https://s3.amazonaws.com/clickhouse/builds/PRs/.../.../binary_aarch64_v80compat/clickhouse". You can then click the link to download the
|
||||
build.
|
||||
|
||||
## Launch {#launch}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -577,7 +577,7 @@ Default value: 20
|
||||
|
||||
**Usage**
|
||||
|
||||
The value of the `number_of_free_entries_in_pool_to_execute_mutation` setting should be less than the value of the [background_pool_size](/docs/en/operations/server-configuration-parameters/settings#background_pool_size) * [background_pool_size](/docs/en/operations/server-configuration-parameters/settings#background_merges_mutations_concurrency_ratio). Otherwise, ClickHouse throws an exception.
|
||||
The value of the `number_of_free_entries_in_pool_to_execute_mutation` setting should be less than the value of the [background_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_pool_size) * [background_merges_mutations_concurrency_ratio](/docs/en/operations/server-configuration-parameters/settings.md/#background_merges_mutations_concurrency_ratio). Otherwise, ClickHouse throws an exception.
|
||||
|
||||
## max_part_loading_threads {#max-part-loading-threads}
|
||||
|
||||
@ -840,4 +840,4 @@ Possible values:
|
||||
|
||||
- `Always` or `Never`.
|
||||
|
||||
Default value: `Never`
|
||||
Default value: `Never`
|
||||
|
@ -3818,8 +3818,8 @@ Result:
|
||||
## enable_extended_results_for_datetime_functions {#enable-extended-results-for-datetime-functions}
|
||||
|
||||
Enables or disables returning results of type:
|
||||
- `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md/#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md/#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md/#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md/#tostartofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md/#tostartofweek), [toMonday](../../sql-reference/functions/date-time-functions.md/#tomonday) and [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md/#tolastdayofmonth).
|
||||
- `DateTime64` with extended range (compared to type `DateTime`) for functions [toStartOfDay](../../sql-reference/functions/date-time-functions.md/#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md/#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md/#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoffifteenminutes) and [timeSlot](../../sql-reference/functions/date-time-functions.md/#timeslot).
|
||||
- `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md#tostartofmonth), [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md#tolastdayofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md#tostartofweek), [toLastDayOfWeek](../../sql-reference/functions/date-time-functions.md#tolastdayofweek) and [toMonday](../../sql-reference/functions/date-time-functions.md#tomonday).
|
||||
- `DateTime64` with extended range (compared to type `DateTime`) for functions [toStartOfDay](../../sql-reference/functions/date-time-functions.md#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffifteenminutes) and [timeSlot](../../sql-reference/functions/date-time-functions.md#timeslot).
|
||||
|
||||
Possible values:
|
||||
|
||||
|
@ -430,12 +430,14 @@ from_date32: 1509840000
|
||||
```
|
||||
|
||||
:::note
|
||||
Thes return type of `toStartOf*`, `toLastDayOfMonth`, `toMonday`, `timeSlot` functions described below is determined by the configuration parameter [enable_extended_results_for_datetime_functions](../../operations/settings/settings.md#enable-extended-results-for-datetime-functions) which is `0` by default.
|
||||
The return type of `toStartOf*`, `toLastDayOf*`, `toMonday`, `timeSlot` functions described below is determined by the configuration parameter [enable_extended_results_for_datetime_functions](../../operations/settings/settings.md#enable-extended-results-for-datetime-functions) which is `0` by default.
|
||||
|
||||
Behavior for
|
||||
* `enable_extended_results_for_datetime_functions = 0`: Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime`. Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime`. Though these functions can take values of the extended types `Date32` and `DateTime64` as an argument, passing them a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results.
|
||||
* `enable_extended_results_for_datetime_functions = 0`:
|
||||
* Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime`.
|
||||
* Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime`. Though these functions can take values of the extended types `Date32` and `DateTime64` as an argument, passing them a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results.
|
||||
* `enable_extended_results_for_datetime_functions = 1`:
|
||||
* Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime` if their argument is a `Date` or `DateTime`, and they return `Date32` or `DateTime64` if their argument is a `Date32` or `DateTime64`.
|
||||
* Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime` if their argument is a `Date` or `DateTime`, and they return `Date32` or `DateTime64` if their argument is a `Date32` or `DateTime64`.
|
||||
* Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime` if their argument is a `Date` or `DateTime`, and they return `DateTime64` if their argument is a `Date32` or `DateTime64`.
|
||||
:::
|
||||
|
||||
@ -490,6 +492,18 @@ The mode argument works exactly like the mode argument in function `toWeek()`. I
|
||||
toStartOfWeek(t[, mode[, timezone]])
|
||||
```
|
||||
|
||||
## toLastDayOfWeek
|
||||
|
||||
Rounds a date or date with time up to the nearest Saturday or Sunday.
|
||||
Returns the date.
|
||||
The mode argument works exactly like the mode argument in function `toWeek()`. If no mode is specified, mode is assumed as 0.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
toLastDayOfWeek(t[, mode[, timezone]])
|
||||
```
|
||||
|
||||
## toStartOfDay
|
||||
|
||||
Rounds down a date with time to the start of the day.
|
||||
|
@ -560,6 +560,77 @@ Result:
|
||||
└───────────────────────────┘
|
||||
```
|
||||
|
||||
## Entropy-learned hashing (experimental)
|
||||
|
||||
Entropy-learned hashing is disabled by default, to enable: `SET allow_experimental_hash_functions=1`.
|
||||
|
||||
Entropy-learned hashing is not a standalone hash function like `metroHash64`, `cityHash64`, `sipHash64` etc. Instead, it aims to preprocess
|
||||
the data to be hashed in a way that a standalone hash function can be computed more efficiently while not compromising the hash quality,
|
||||
i.e. the randomness of the hashes. For that, entropy-based hashing chooses a subset of the bytes in a training data set of Strings which has
|
||||
the same randomness (entropy) as the original Strings. For example, if the Strings are in average 100 bytes long, and we pick a subset of 5
|
||||
bytes, then a hash function will be 95% less expensive to evaluate. For details of the method, refer to [Entropy-Learned Hashing: Constant
|
||||
Time Hashing with Controllable Uniformity](https://doi.org/10.1145/3514221.3517894).
|
||||
|
||||
Entropy-learned hashing has two phases:
|
||||
|
||||
1. A training phase on a representative but typically small set of Strings to be hashed. Training consists of two steps:
|
||||
|
||||
- Function `prepareTrainEntropyLearnedHash(data, id)` caches the training data in a global state under a given `id`. It returns dummy
|
||||
value `0` on every row.
|
||||
- Function `trainEntropyLearnedHash(id)` computes a minimal partial sub-key of the training data stored stored under `id` in the global
|
||||
state. The cached training data in the global state is replaced by the partial key. Dummy value `0` is returned on every row.
|
||||
|
||||
2. An evaluation phase where hashes are computed using the previously calculated partial sub-keys. Function `entropyLearnedHash(data, id)`
|
||||
hashes `data` using the partial subkey stored as `id`. CityHash64 is used as hash function.
|
||||
|
||||
The reason that the training phase comprises two steps is that ClickHouse processes data at chunk granularity but entropy-learned hashing
|
||||
needs to process the entire training set at once.
|
||||
|
||||
Since functions `prepareTrainEntropyLearnedHash()` and `trainEntropyLearnedHash()` access global state, they should not be called in
|
||||
parallel with the same `id`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
prepareTrainEntropyLearnedHash(data, id);
|
||||
trainEntropyLearnedHash(id);
|
||||
entropyLearnedHash(data, id);
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SET allow_experimental_hash_functions=1;
|
||||
CREATE TABLE tab (col String) ENGINE=Memory;
|
||||
INSERT INTO tab VALUES ('aa'), ('ba'), ('ca');
|
||||
|
||||
SELECT prepareTrainEntropyLearnedHash(col, 'id1') AS prepared FROM tab;
|
||||
SELECT trainEntropyLearnedHash('id1') AS trained FROM tab;
|
||||
SELECT entropyLearnedHash(col, 'id1') as hashes FROM tab;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` response
|
||||
┌─prepared─┐
|
||||
│ 0 │
|
||||
│ 0 │
|
||||
│ 0 │
|
||||
└──────────┘
|
||||
|
||||
┌─trained─┐
|
||||
│ 0 │
|
||||
│ 0 │
|
||||
│ 0 │
|
||||
└─────────┘
|
||||
|
||||
┌───────────────hashes─┐
|
||||
│ 2603192927274642682 │
|
||||
│ 4947675599669400333 │
|
||||
│ 10783339242466472992 │
|
||||
└──────────────────────┘
|
||||
```
|
||||
|
||||
## metroHash64
|
||||
|
||||
Produces a 64-bit [MetroHash](http://www.jandrewrogers.com/2015/05/27/metrohash/) hash value.
|
||||
|
@ -7,6 +7,18 @@ title: "EXPLAIN Statement"
|
||||
|
||||
Shows the execution plan of a statement.
|
||||
|
||||
<div class='vimeo-container'>
|
||||
<iframe src="//www.youtube.com/embed/hP6G2Nlz_cA"
|
||||
width="640"
|
||||
height="360"
|
||||
frameborder="0"
|
||||
allow="autoplay;
|
||||
fullscreen;
|
||||
picture-in-picture"
|
||||
allowfullscreen>
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
Syntax:
|
||||
|
||||
```sql
|
||||
|
@ -5,7 +5,7 @@ sidebar_label: ORDER BY
|
||||
|
||||
# ORDER BY Clause
|
||||
|
||||
The `ORDER BY` clause contains a list of expressions, which can each be attributed with `DESC` (descending) or `ASC` (ascending) modifier which determine the sorting direction. If the direction is not specified, `ASC` is assumed, so it’s usually omitted. The sorting direction applies to a single expression, not to the entire list. Example: `ORDER BY Visits DESC, SearchPhrase`.
|
||||
The `ORDER BY` clause contains a list of expressions, which can each be attributed with `DESC` (descending) or `ASC` (ascending) modifier which determine the sorting direction. If the direction is not specified, `ASC` is assumed, so it’s usually omitted. The sorting direction applies to a single expression, not to the entire list. Example: `ORDER BY Visits DESC, SearchPhrase`. Sorting is case-sensitive.
|
||||
|
||||
If you want to sort by column numbers instead of column names, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments).
|
||||
|
||||
|
@ -3790,7 +3790,7 @@ SELECT * FROM positional_arguments ORDER BY 2,3;
|
||||
## enable_extended_results_for_datetime_functions {#enable-extended-results-for-datetime-functions}
|
||||
|
||||
Включает или отключает возвращение результатов типа:
|
||||
- `Date32` с расширенным диапазоном (по сравнению с типом `Date`) для функций [toStartOfYear](../../sql-reference/functions/date-time-functions.md#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md#tostartofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md#tostartofweek), [toMonday](../../sql-reference/functions/date-time-functions.md#tomonday) и [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md#tolastdayofmonth).
|
||||
- `Date32` с расширенным диапазоном (по сравнению с типом `Date`) для функций [toStartOfYear](../../sql-reference/functions/date-time-functions.md#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md#tostartofmonth), [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md#tolastdayofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md#tostartofweek), [toLastDayOfWeek](../../sql-reference/functions/date-time-functions.md#tolastdayofweek) и [toMonday](../../sql-reference/functions/date-time-functions.md#tomonday).
|
||||
- `DateTime64` с расширенным диапазоном (по сравнению с типом `DateTime`) для функций [toStartOfDay](../../sql-reference/functions/date-time-functions.md#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffifteenminutes) и [timeSlot](../../sql-reference/functions/date-time-functions.md#timeslot).
|
||||
|
||||
Возможные значения:
|
||||
|
@ -310,13 +310,15 @@ from_date32: 1509840000
|
||||
```
|
||||
|
||||
:::note
|
||||
Тип возвращаемого значения описанными далее функциями `toStartOf*`, `toLastDayOfMonth`, `toMonday`, `timeSlot` определяется конфигурационным параметром [enable_extended_results_for_datetime_functions](../../operations/settings/settings.md#enable-extended-results-for-datetime-functions) имеющим по умолчанию значение `0`.
|
||||
Тип возвращаемого значения описанными далее функциями `toStartOf*`, `toLastDayOf*`, `toMonday`, `timeSlot` определяется конфигурационным параметром [enable_extended_results_for_datetime_functions](../../operations/settings/settings.md#enable-extended-results-for-datetime-functions) имеющим по умолчанию значение `0`.
|
||||
|
||||
Поведение для
|
||||
* `enable_extended_results_for_datetime_functions = 0`: Функции `toStartOf*`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime`. Функции `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` возвращают `DateTime`. Хотя эти функции могут принимать значения типа `Date32` или `DateTime64` в качестве аргумента, при обработке аргумента вне нормального диапазона значений (`1970` - `2148` для `Date` и `1970-01-01 00:00:00`-`2106-02-07 08:28:15` для `DateTime`) будет получен некорректный результат.
|
||||
* `enable_extended_results_for_datetime_functions = 0`:
|
||||
* Функции `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfWeek`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime`.
|
||||
* Функции `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` возвращают `DateTime`. Хотя эти функции могут принимать значения расширенных типов `Date32` и `DateTime64` в качестве аргумента, при обработке аргумента вне нормального диапазона значений (`1970` - `2148` для `Date` и `1970-01-01 00:00:00`-`2106-02-07 08:28:15` для `DateTime`) будет получен некорректный результат.
|
||||
* `enable_extended_results_for_datetime_functions = 1`:
|
||||
* Функции `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime` если их аргумент `Date` или `DateTime` и они возвращают `Date32` или `DateTime64` если их аргумент `Date32` или `DateTime64`.
|
||||
* Функции `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` возвращают `DateTime` если их аргумент `Date` или `DateTime` и они возвращают `DateTime64` если их аргумент `Date32` или `DateTime64`.
|
||||
* Функции `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfWeek`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime` если их аргумент `Date` или `DateTime` и они возвращают `Date32` или `DateTime64` если их аргумент `Date32` или `DateTime64`.
|
||||
* Функции `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` возвращают `DateTime`, если их аргумент имеет тип `Date` или `DateTime`, и `DateTime64` если их аргумент имеет тип `Date32` или `DateTime64`.
|
||||
:::
|
||||
|
||||
## toStartOfYear {#tostartofyear}
|
||||
@ -366,9 +368,15 @@ SELECT toStartOfISOYear(toDate('2017-01-01')) AS ISOYear20170101;
|
||||
Округляет дату или дату-с-временем вниз до ближайшего понедельника.
|
||||
Возвращается дата.
|
||||
|
||||
## toStartOfWeek(t[,mode]) {#tostartofweek}
|
||||
## toStartOfWeek(t[, mode[, timezone]])
|
||||
|
||||
Округляет дату или дату со временем до ближайшего воскресенья или понедельника в соответствии с mode.
|
||||
Округляет дату или дату-с-временем назад, до ближайшего воскресенья или понедельника, в соответствии с mode.
|
||||
Возвращается дата.
|
||||
Аргумент mode работает точно так же, как аргумент mode [toWeek()](#toweek). Если аргумент mode опущен, то используется режим 0.
|
||||
|
||||
## toLastDayOfWeek(t[, mode[, timezone]])
|
||||
|
||||
Округляет дату или дату-с-временем вперёд, до ближайшей субботы или воскресенья, в соответствии с mode.
|
||||
Возвращается дата.
|
||||
Аргумент mode работает точно так же, как аргумент mode [toWeek()](#toweek). Если аргумент mode опущен, то используется режим 0.
|
||||
|
||||
|
@ -1190,6 +1190,9 @@ void Client::processOptions(const OptionsDescription & options_description,
|
||||
|
||||
void Client::processConfig()
|
||||
{
|
||||
if (config().has("query") && config().has("queries-file"))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time");
|
||||
|
||||
/// Batch mode is enabled if one of the following is true:
|
||||
/// - -q (--query) command line option is present.
|
||||
/// The value of the option is used as the text of query (or of multiple queries).
|
||||
|
@ -516,12 +516,12 @@ void LocalServer::updateLoggerLevel(const String & logs_level)
|
||||
|
||||
void LocalServer::processConfig()
|
||||
{
|
||||
if (config().has("query") && config().has("queries-file"))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time");
|
||||
|
||||
delayed_interactive = config().has("interactive") && (config().has("query") || config().has("queries-file"));
|
||||
if (is_interactive && !delayed_interactive)
|
||||
{
|
||||
if (config().has("query") && config().has("queries-file"))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specify either `query` or `queries-file` option");
|
||||
|
||||
if (config().has("multiquery"))
|
||||
is_multiquery = true;
|
||||
}
|
||||
|
@ -909,6 +909,11 @@
|
||||
<host>127.0.0.10</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<!-- Unavailable replica -->
|
||||
<replica>
|
||||
<host>127.0.0.11</host>
|
||||
<port>1234</port>
|
||||
</replica>
|
||||
</shard>
|
||||
</parallel_replicas>
|
||||
<test_cluster_two_shards_localhost>
|
||||
|
@ -1205,6 +1205,56 @@ private:
|
||||
|
||||
static std::string rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, const ContextPtr & context);
|
||||
|
||||
static std::optional<JoinTableSide> getColumnSideFromJoinTree(const QueryTreeNodePtr & resolved_identifier, const JoinNode & join_node)
|
||||
{
|
||||
if (resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT)
|
||||
return {};
|
||||
|
||||
if (resolved_identifier->getNodeType() == QueryTreeNodeType::FUNCTION)
|
||||
{
|
||||
const auto & resolved_function = resolved_identifier->as<FunctionNode &>();
|
||||
|
||||
const auto & argument_nodes = resolved_function.getArguments().getNodes();
|
||||
|
||||
std::optional<JoinTableSide> result;
|
||||
for (const auto & argument_node : argument_nodes)
|
||||
{
|
||||
auto table_side = getColumnSideFromJoinTree(argument_node, join_node);
|
||||
if (table_side && result && *table_side != *result)
|
||||
{
|
||||
throw Exception(ErrorCodes::AMBIGUOUS_IDENTIFIER,
|
||||
"Ambiguous identifier {}. In scope {}",
|
||||
resolved_identifier->formatASTForErrorMessage(),
|
||||
join_node.formatASTForErrorMessage());
|
||||
}
|
||||
if (table_side)
|
||||
result = *table_side;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
const auto * column_src = resolved_identifier->as<ColumnNode &>().getColumnSource().get();
|
||||
|
||||
if (join_node.getLeftTableExpression().get() == column_src)
|
||||
return JoinTableSide::Left;
|
||||
if (join_node.getRightTableExpression().get() == column_src)
|
||||
return JoinTableSide::Right;
|
||||
return {};
|
||||
}
|
||||
|
||||
static void convertJoinedColumnTypeToNullIfNeeded(QueryTreeNodePtr & resolved_identifier, const JoinKind & join_kind, std::optional<JoinTableSide> resolved_side)
|
||||
{
|
||||
if (resolved_identifier->getNodeType() == QueryTreeNodeType::COLUMN &&
|
||||
JoinCommon::canBecomeNullable(resolved_identifier->getResultType()) &&
|
||||
(isFull(join_kind) ||
|
||||
(isLeft(join_kind) && resolved_side && *resolved_side == JoinTableSide::Right) ||
|
||||
(isRight(join_kind) && resolved_side && *resolved_side == JoinTableSide::Left)))
|
||||
{
|
||||
auto & resolved_column = resolved_identifier->as<ColumnNode &>();
|
||||
resolved_column.setColumnType(makeNullableOrLowCardinalityNullable(resolved_column.getColumnType()));
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolve identifier functions
|
||||
|
||||
static QueryTreeNodePtr tryResolveTableIdentifierFromDatabaseCatalog(const Identifier & table_identifier, ContextPtr context);
|
||||
@ -2982,6 +3032,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
|
||||
QueryTreeNodePtr resolved_identifier;
|
||||
|
||||
JoinKind join_kind = from_join_node.getKind();
|
||||
bool join_use_nulls = scope.context->getSettingsRef().join_use_nulls;
|
||||
|
||||
if (left_resolved_identifier && right_resolved_identifier)
|
||||
{
|
||||
@ -3027,19 +3078,31 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
|
||||
*
|
||||
* Otherwise we prefer column from left table.
|
||||
*/
|
||||
if (identifier_path_part == right_column_source_alias)
|
||||
return right_resolved_identifier;
|
||||
else if (!left_column_source_alias.empty() &&
|
||||
right_column_source_alias.empty() &&
|
||||
identifier_path_part != left_column_source_alias)
|
||||
return right_resolved_identifier;
|
||||
bool column_resolved_using_right_alias = identifier_path_part == right_column_source_alias;
|
||||
bool column_resolved_without_using_left_alias = !left_column_source_alias.empty()
|
||||
&& right_column_source_alias.empty()
|
||||
&& identifier_path_part != left_column_source_alias;
|
||||
if (column_resolved_using_right_alias || column_resolved_without_using_left_alias)
|
||||
{
|
||||
resolved_side = JoinTableSide::Right;
|
||||
resolved_identifier = right_resolved_identifier;
|
||||
}
|
||||
else
|
||||
{
|
||||
resolved_side = JoinTableSide::Left;
|
||||
resolved_identifier = left_resolved_identifier;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
resolved_side = JoinTableSide::Left;
|
||||
resolved_identifier = left_resolved_identifier;
|
||||
}
|
||||
|
||||
return left_resolved_identifier;
|
||||
}
|
||||
else if (scope.joins_count == 1 && scope.context->getSettingsRef().single_join_prefer_left_table)
|
||||
{
|
||||
return left_resolved_identifier;
|
||||
resolved_side = JoinTableSide::Left;
|
||||
resolved_identifier = left_resolved_identifier;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -3092,17 +3155,10 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
|
||||
if (join_node_in_resolve_process || !resolved_identifier)
|
||||
return resolved_identifier;
|
||||
|
||||
bool join_use_nulls = scope.context->getSettingsRef().join_use_nulls;
|
||||
|
||||
if (join_use_nulls &&
|
||||
resolved_identifier->getNodeType() == QueryTreeNodeType::COLUMN &&
|
||||
(isFull(join_kind) ||
|
||||
(isLeft(join_kind) && resolved_side && *resolved_side == JoinTableSide::Right) ||
|
||||
(isRight(join_kind) && resolved_side && *resolved_side == JoinTableSide::Left)))
|
||||
if (join_use_nulls)
|
||||
{
|
||||
resolved_identifier = resolved_identifier->clone();
|
||||
auto & resolved_column = resolved_identifier->as<ColumnNode &>();
|
||||
resolved_column.setColumnType(makeNullableOrLowCardinalityNullable(resolved_column.getColumnType()));
|
||||
convertJoinedColumnTypeToNullIfNeeded(resolved_identifier, join_kind, resolved_side);
|
||||
}
|
||||
|
||||
return resolved_identifier;
|
||||
@ -4001,6 +4057,27 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I
|
||||
else
|
||||
matched_expression_nodes_with_names = resolveUnqualifiedMatcher(matcher_node, scope);
|
||||
|
||||
if (scope.context->getSettingsRef().join_use_nulls)
|
||||
{
|
||||
/** If we are resolving matcher came from the result of JOIN and `join_use_nulls` is set,
|
||||
* we need to convert joined column type to Nullable.
|
||||
* We are taking the nearest JoinNode to check to which table column belongs,
|
||||
* because for LEFT/RIGHT join, we convert only the corresponding side.
|
||||
*/
|
||||
const auto * nearest_query_scope = scope.getNearestQueryScope();
|
||||
const QueryNode * nearest_scope_query_node = nearest_query_scope ? nearest_query_scope->scope_node->as<QueryNode>() : nullptr;
|
||||
const QueryTreeNodePtr & nearest_scope_join_tree = nearest_scope_query_node ? nearest_scope_query_node->getJoinTree() : nullptr;
|
||||
const JoinNode * nearest_scope_join_node = nearest_scope_join_tree ? nearest_scope_join_tree->as<JoinNode>() : nullptr;
|
||||
if (nearest_scope_join_node)
|
||||
{
|
||||
for (auto & [node, node_name] : matched_expression_nodes_with_names)
|
||||
{
|
||||
auto join_identifier_side = getColumnSideFromJoinTree(node, *nearest_scope_join_node);
|
||||
convertJoinedColumnTypeToNullIfNeeded(node, nearest_scope_join_node->getKind(), join_identifier_side);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_map<const IColumnTransformerNode *, std::unordered_set<std::string>> strict_transformer_to_used_column_names;
|
||||
for (const auto & transformer : matcher_node_typed.getColumnTransformers().getNodes())
|
||||
{
|
||||
|
@ -29,10 +29,15 @@ protected:
|
||||
/// Make encrypted disk.
|
||||
auto settings = std::make_unique<DiskEncryptedSettings>();
|
||||
settings->wrapped_disk = local_disk;
|
||||
settings->current_algorithm = FileEncryption::Algorithm::AES_128_CTR;
|
||||
settings->keys[0] = "1234567890123456";
|
||||
settings->current_key_id = 0;
|
||||
settings->disk_path = "encrypted/";
|
||||
|
||||
settings->current_algorithm = FileEncryption::Algorithm::AES_128_CTR;
|
||||
String key = "1234567890123456";
|
||||
UInt128 fingerprint = FileEncryption::calculateKeyFingerprint(key);
|
||||
settings->all_keys[fingerprint] = key;
|
||||
settings->current_key = key;
|
||||
settings->current_key_fingerprint = fingerprint;
|
||||
|
||||
encrypted_disk = std::make_shared<DiskEncrypted>("encrypted_disk", std::move(settings), true);
|
||||
}
|
||||
|
||||
|
@ -232,12 +232,28 @@ void Connection::disconnect()
|
||||
maybe_compressed_out = nullptr;
|
||||
in = nullptr;
|
||||
last_input_packet_type.reset();
|
||||
out = nullptr; // can write to socket
|
||||
std::exception_ptr finalize_exception;
|
||||
try
|
||||
{
|
||||
// finalize() can write to socket and throw an exception.
|
||||
if (out)
|
||||
out->finalize();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// Don't throw an exception here, it will leave Connection in invalid state.
|
||||
finalize_exception = std::current_exception();
|
||||
}
|
||||
out = nullptr;
|
||||
|
||||
if (socket)
|
||||
socket->close();
|
||||
socket = nullptr;
|
||||
connected = false;
|
||||
nonce.reset();
|
||||
|
||||
if (finalize_exception)
|
||||
std::rethrow_exception(finalize_exception);
|
||||
}
|
||||
|
||||
|
||||
|
@ -313,11 +313,6 @@ public:
|
||||
|
||||
/// All functions below are thread-safe; arguments are not checked.
|
||||
|
||||
static ExtendedDayNum toDayNum(ExtendedDayNum d)
|
||||
{
|
||||
return d;
|
||||
}
|
||||
|
||||
static UInt32 saturateMinus(UInt32 x, UInt32 y)
|
||||
{
|
||||
UInt32 res = x - y;
|
||||
@ -325,6 +320,11 @@ public:
|
||||
return res;
|
||||
}
|
||||
|
||||
static ExtendedDayNum toDayNum(ExtendedDayNum d)
|
||||
{
|
||||
return d;
|
||||
}
|
||||
|
||||
static ExtendedDayNum toDayNum(LUTIndex d)
|
||||
{
|
||||
return ExtendedDayNum{static_cast<ExtendedDayNum::UnderlyingType>(d.toUnderType() - daynum_offset_epoch)};
|
||||
@ -365,6 +365,27 @@ public:
|
||||
return toDayNum(LUTIndex(i - (lut[i].day_of_week - 1)));
|
||||
}
|
||||
|
||||
/// Round up to the last day of week.
|
||||
template <typename DateOrTime>
|
||||
inline Time toLastDayOfWeek(DateOrTime v) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
|
||||
return lut_saturated[i + (7 - lut[i].day_of_week)].date;
|
||||
else
|
||||
return lut[i + (7 - lut[i].day_of_week)].date;
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline auto toLastDayNumOfWeek(DateOrTime v) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
|
||||
return toDayNum(LUTIndexWithSaturation(i + (7 - lut[i].day_of_week)));
|
||||
else
|
||||
return toDayNum(LUTIndex(i + (7 - lut[i].day_of_week)));
|
||||
}
|
||||
|
||||
/// Round down to start of month.
|
||||
template <typename DateOrTime>
|
||||
inline Time toFirstDayOfMonth(DateOrTime v) const
|
||||
@ -858,10 +879,31 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto day_of_week = toDayOfWeek(v);
|
||||
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
|
||||
return (toDayOfWeek(v) != 7) ? DayNum(saturateMinus(v, toDayOfWeek(v))) : toDayNum(v);
|
||||
return (day_of_week != 7) ? DayNum(saturateMinus(v, day_of_week)) : toDayNum(v);
|
||||
else
|
||||
return (toDayOfWeek(v) != 7) ? ExtendedDayNum(v - toDayOfWeek(v)) : toDayNum(v);
|
||||
return (day_of_week != 7) ? ExtendedDayNum(v - day_of_week) : toDayNum(v);
|
||||
}
|
||||
}
|
||||
|
||||
/// Get last day of week with week_mode, return Saturday or Sunday
|
||||
template <typename DateOrTime>
|
||||
inline auto toLastDayNumOfWeek(DateOrTime v, UInt8 week_mode) const
|
||||
{
|
||||
bool monday_first_mode = week_mode & static_cast<UInt8>(WeekModeFlag::MONDAY_FIRST);
|
||||
if (monday_first_mode)
|
||||
{
|
||||
return toLastDayNumOfWeek(v);
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto day_of_week = toDayOfWeek(v);
|
||||
v += 6;
|
||||
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
|
||||
return (day_of_week != 7) ? DayNum(saturateMinus(v, day_of_week)) : toDayNum(v);
|
||||
else
|
||||
return (day_of_week != 7) ? ExtendedDayNum(v - day_of_week) : toDayNum(v);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -138,7 +138,7 @@ void FileChecker::save() const
|
||||
std::string tmp_files_info_path = parentPath(files_info_path) + "tmp_" + fileName(files_info_path);
|
||||
|
||||
{
|
||||
std::unique_ptr<WriteBuffer> out = disk ? disk->writeFile(tmp_files_info_path) : std::make_unique<WriteBufferFromFile>(tmp_files_info_path);
|
||||
std::unique_ptr<WriteBufferFromFileBase> out = disk ? disk->writeFile(tmp_files_info_path) : std::make_unique<WriteBufferFromFile>(tmp_files_info_path);
|
||||
|
||||
/// So complex JSON structure - for compatibility with the old format.
|
||||
writeCString("{\"clickhouse\":{", *out);
|
||||
@ -157,7 +157,9 @@ void FileChecker::save() const
|
||||
}
|
||||
|
||||
writeCString("}}", *out);
|
||||
out->next();
|
||||
|
||||
out->sync();
|
||||
out->finalize();
|
||||
}
|
||||
|
||||
if (disk)
|
||||
|
@ -9,12 +9,13 @@
|
||||
#include "Common/formatReadable.h"
|
||||
#include <Common/TerminalSize.h>
|
||||
#include <Common/UnicodeBar.h>
|
||||
#include "IO/WriteBufferFromString.h"
|
||||
#include <Databases/DatabaseMemory.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
/// http://en.wikipedia.org/wiki/ANSI_escape_code
|
||||
#define CLEAR_TO_END_OF_LINE "\033[K"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
@ -40,14 +40,10 @@ void Pool::Entry::decrementRefCount()
|
||||
{
|
||||
/// We were the last user of this thread, deinitialize it
|
||||
mysql_thread_end();
|
||||
}
|
||||
else if (data->removed_from_pool)
|
||||
{
|
||||
/// data->ref_count == 0 in case we removed connection from pool (see Pool::removeConnection).
|
||||
chassert(ref_count == 0);
|
||||
/// In Pool::Entry::disconnect() we remove connection from the list of pool's connections.
|
||||
/// So now we must deallocate the memory.
|
||||
::delete data;
|
||||
if (data->removed_from_pool)
|
||||
::delete data;
|
||||
}
|
||||
}
|
||||
|
||||
@ -234,11 +230,8 @@ void Pool::removeConnection(Connection* connection)
|
||||
std::lock_guard lock(mutex);
|
||||
if (connection)
|
||||
{
|
||||
if (connection->ref_count > 0)
|
||||
{
|
||||
if (!connection->removed_from_pool)
|
||||
connection->conn.disconnect();
|
||||
connection->ref_count = 0;
|
||||
}
|
||||
connections.remove(connection);
|
||||
connection->removed_from_pool = true;
|
||||
}
|
||||
|
@ -148,6 +148,8 @@ TEST(DateLUTTest, TimeValuesInMiddleOfRange)
|
||||
EXPECT_EQ(lut.addYears(time, 10), 1884270011 /*time_t*/);
|
||||
EXPECT_EQ(lut.timeToString(time), "2019-09-16 19:20:11" /*std::string*/);
|
||||
EXPECT_EQ(lut.dateToString(time), "2019-09-16" /*std::string*/);
|
||||
EXPECT_EQ(lut.toLastDayOfWeek(time), 1569099600 /*time_t*/);
|
||||
EXPECT_EQ(lut.toLastDayNumOfWeek(time), DayNum(18161) /*DayNum*/);
|
||||
EXPECT_EQ(lut.toLastDayOfMonth(time), 1569790800 /*time_t*/);
|
||||
EXPECT_EQ(lut.toLastDayNumOfMonth(time), DayNum(18169) /*DayNum*/);
|
||||
}
|
||||
@ -211,6 +213,8 @@ TEST(DateLUTTest, TimeValuesAtLeftBoderOfRange)
|
||||
EXPECT_EQ(lut.addYears(time, 10), 315532800 /*time_t*/);
|
||||
EXPECT_EQ(lut.timeToString(time), "1970-01-01 00:00:00" /*std::string*/);
|
||||
EXPECT_EQ(lut.dateToString(time), "1970-01-01" /*std::string*/);
|
||||
EXPECT_EQ(lut.toLastDayOfWeek(time), 259200 /*time_t*/);
|
||||
EXPECT_EQ(lut.toLastDayNumOfWeek(time), DayNum(3) /*DayNum*/);
|
||||
EXPECT_EQ(lut.toLastDayOfMonth(time), 2592000 /*time_t*/);
|
||||
EXPECT_EQ(lut.toLastDayNumOfMonth(time), DayNum(30) /*DayNum*/);
|
||||
}
|
||||
@ -276,6 +280,8 @@ TEST(DateLUTTest, TimeValuesAtRightBoderOfRangeOfOldLUT)
|
||||
|
||||
EXPECT_EQ(lut.timeToString(time), "2106-01-31 01:17:53" /*std::string*/);
|
||||
EXPECT_EQ(lut.dateToString(time), "2106-01-31" /*std::string*/);
|
||||
EXPECT_EQ(lut.toLastDayOfWeek(time), 4294339200 /*time_t*/);
|
||||
EXPECT_EQ(lut.toLastDayNumOfWeek(time), DayNum(49703) /*DayNum*/);
|
||||
EXPECT_EQ(lut.toLastDayOfMonth(time), 4294339200 /*time_t*/); // 2106-01-01
|
||||
EXPECT_EQ(lut.toLastDayNumOfMonth(time), DayNum(49703));
|
||||
}
|
||||
|
@ -14,7 +14,6 @@
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Storages/NamedCollectionsHelpers.h>
|
||||
#include <Common/NamedCollections/NamedCollections.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/Macros.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
|
@ -262,9 +262,13 @@ void MaterializedMySQLSyncThread::synchronization()
|
||||
|
||||
try
|
||||
{
|
||||
BinlogEventPtr binlog_event = client.readOneBinlogEvent(std::max(UInt64(1), max_flush_time - watch.elapsedMilliseconds()));
|
||||
if (binlog_event)
|
||||
onEvent(buffers, binlog_event, metadata);
|
||||
UInt64 elapsed_ms = watch.elapsedMilliseconds();
|
||||
if (elapsed_ms < max_flush_time)
|
||||
{
|
||||
BinlogEventPtr binlog_event = client.readOneBinlogEvent(max_flush_time - elapsed_ms);
|
||||
if (binlog_event)
|
||||
onEvent(buffers, binlog_event, metadata);
|
||||
}
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
|
@ -19,7 +19,6 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int INCORRECT_DISK_INDEX;
|
||||
extern const int DATA_ENCRYPTION_ERROR;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
@ -42,87 +41,201 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads encryption keys from the configuration.
|
||||
void getKeysFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix,
|
||||
std::map<UInt64, String> & out_keys_by_id, Strings & out_keys_without_id)
|
||||
{
|
||||
Strings config_keys;
|
||||
config.keys(config_prefix, config_keys);
|
||||
|
||||
for (const std::string & config_key : config_keys)
|
||||
{
|
||||
String key;
|
||||
std::optional<UInt64> key_id;
|
||||
|
||||
if ((config_key == "key") || config_key.starts_with("key["))
|
||||
{
|
||||
String key_path = config_prefix + "." + config_key;
|
||||
key = config.getString(key_path);
|
||||
String key_id_path = key_path + "[@id]";
|
||||
if (config.has(key_id_path))
|
||||
key_id = config.getUInt64(key_id_path);
|
||||
}
|
||||
else if ((config_key == "key_hex") || config_key.starts_with("key_hex["))
|
||||
{
|
||||
String key_path = config_prefix + "." + config_key;
|
||||
key = unhexKey(config.getString(key_path));
|
||||
String key_id_path = key_path + "[@id]";
|
||||
if (config.has(key_id_path))
|
||||
key_id = config.getUInt64(key_id_path);
|
||||
}
|
||||
else
|
||||
continue;
|
||||
|
||||
if (key_id)
|
||||
{
|
||||
if (!out_keys_by_id.contains(*key_id))
|
||||
out_keys_by_id[*key_id] = key;
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple keys specified for same ID {}", *key_id);
|
||||
}
|
||||
else
|
||||
out_keys_without_id.push_back(key);
|
||||
}
|
||||
|
||||
if (out_keys_by_id.empty() && out_keys_without_id.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "No encryption keys found");
|
||||
|
||||
if (out_keys_by_id.empty() && (out_keys_without_id.size() == 1))
|
||||
{
|
||||
out_keys_by_id[0] = out_keys_without_id.front();
|
||||
out_keys_without_id.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads the current encryption key from the configuration.
|
||||
String getCurrentKeyFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix,
|
||||
const std::map<UInt64, String> & keys_by_id, const Strings & keys_without_id)
|
||||
{
|
||||
String key_path = config_prefix + ".current_key";
|
||||
String key_hex_path = config_prefix + ".current_key_hex";
|
||||
String key_id_path = config_prefix + ".current_key_id";
|
||||
|
||||
if (config.has(key_path) + config.has(key_hex_path) + config.has(key_id_path) > 1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The current key is specified multiple times");
|
||||
|
||||
auto check_current_key_found = [&](const String & current_key_)
|
||||
{
|
||||
for (const auto & [_, key] : keys_by_id)
|
||||
{
|
||||
if (key == current_key_)
|
||||
return;
|
||||
}
|
||||
for (const auto & key : keys_without_id)
|
||||
{
|
||||
if (key == current_key_)
|
||||
return;
|
||||
}
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The current key is not found in keys");
|
||||
};
|
||||
|
||||
if (config.has(key_path))
|
||||
{
|
||||
String current_key = config.getString(key_path);
|
||||
check_current_key_found(current_key);
|
||||
return current_key;
|
||||
}
|
||||
else if (config.has(key_hex_path))
|
||||
{
|
||||
String current_key = unhexKey(config.getString(key_hex_path));
|
||||
check_current_key_found(current_key);
|
||||
return current_key;
|
||||
}
|
||||
else if (config.has(key_id_path))
|
||||
{
|
||||
UInt64 current_key_id = config.getUInt64(key_id_path);
|
||||
auto it = keys_by_id.find(current_key_id);
|
||||
if (it == keys_by_id.end())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not found a key with the current ID {}", current_key_id);
|
||||
return it->second;
|
||||
}
|
||||
else if (keys_by_id.size() == 1 && keys_without_id.empty() && keys_by_id.begin()->first == 0)
|
||||
{
|
||||
/// There is only a single key defined with id=0, so we can choose it as current.
|
||||
return keys_by_id.begin()->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The current key is not specified");
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads the current encryption algorithm from the configuration.
|
||||
Algorithm getCurrentAlgorithmFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
|
||||
{
|
||||
String path = config_prefix + ".algorithm";
|
||||
if (!config.has(path))
|
||||
return DEFAULT_ENCRYPTION_ALGORITHM;
|
||||
return parseAlgorithmFromString(config.getString(path));
|
||||
}
|
||||
|
||||
/// Reads the name of a wrapped disk & the path on the wrapped disk and then finds that disk in a disk map.
|
||||
void getDiskAndPathFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const DisksMap & map,
|
||||
DiskPtr & out_disk, String & out_path)
|
||||
{
|
||||
String disk_name = config.getString(config_prefix + ".disk", "");
|
||||
if (disk_name.empty())
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS, "Name of the wrapped disk must not be empty. Encrypted disk is a wrapper over another disk");
|
||||
|
||||
auto disk_it = map.find(disk_name);
|
||||
if (disk_it == map.end())
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS, "The wrapped disk must have been announced earlier. No disk with name {}", disk_name);
|
||||
|
||||
out_disk = disk_it->second;
|
||||
|
||||
out_path = config.getString(config_prefix + ".path", "");
|
||||
if (!out_path.empty() && (out_path.back() != '/'))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk path must ends with '/', but '{}' doesn't.", quoteString(out_path));
|
||||
}
|
||||
|
||||
/// Parses the settings of an ecnrypted disk from the configuration.
|
||||
std::unique_ptr<const DiskEncryptedSettings> parseDiskEncryptedSettings(
|
||||
const String & name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const DisksMap & map)
|
||||
const String & disk_name,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const String & config_prefix,
|
||||
const DisksMap & disk_map)
|
||||
{
|
||||
try
|
||||
{
|
||||
auto res = std::make_unique<DiskEncryptedSettings>();
|
||||
res->current_algorithm = DEFAULT_ENCRYPTION_ALGORITHM;
|
||||
if (config.has(config_prefix + ".algorithm"))
|
||||
parseFromString(res->current_algorithm, config.getString(config_prefix + ".algorithm"));
|
||||
|
||||
Strings config_keys;
|
||||
config.keys(config_prefix, config_keys);
|
||||
for (const std::string & config_key : config_keys)
|
||||
std::map<UInt64, String> keys_by_id;
|
||||
Strings keys_without_id;
|
||||
getKeysFromConfig(config, config_prefix, keys_by_id, keys_without_id);
|
||||
|
||||
for (const auto & [key_id, key] : keys_by_id)
|
||||
{
|
||||
String key;
|
||||
UInt64 key_id;
|
||||
auto fingerprint = calculateKeyFingerprint(key);
|
||||
res->all_keys[fingerprint] = key;
|
||||
|
||||
if ((config_key == "key") || config_key.starts_with("key["))
|
||||
{
|
||||
key = config.getString(config_prefix + "." + config_key, "");
|
||||
key_id = config.getUInt64(config_prefix + "." + config_key + "[@id]", 0);
|
||||
}
|
||||
else if ((config_key == "key_hex") || config_key.starts_with("key_hex["))
|
||||
{
|
||||
key = unhexKey(config.getString(config_prefix + "." + config_key, ""));
|
||||
key_id = config.getUInt64(config_prefix + "." + config_key + "[@id]", 0);
|
||||
}
|
||||
else
|
||||
continue;
|
||||
|
||||
if (res->keys.contains(key_id))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple keys have the same ID {}", key_id);
|
||||
res->keys[key_id] = key;
|
||||
/// Version 1 used key fingerprints based on the key id.
|
||||
/// We have to add such fingerprints to the map too to support reading files encrypted by version 1.
|
||||
auto v1_fingerprint = calculateV1KeyFingerprint(key, key_id);
|
||||
res->all_keys[v1_fingerprint] = key;
|
||||
}
|
||||
|
||||
if (res->keys.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "No keys, an encrypted disk needs keys to work");
|
||||
|
||||
if (!config.has(config_prefix + ".current_key_id"))
|
||||
for (const auto & key : keys_without_id)
|
||||
{
|
||||
/// In case of multiple keys, current_key_id is mandatory
|
||||
if (res->keys.size() > 1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There are multiple keys in config. current_key_id is required");
|
||||
|
||||
/// If there is only one key with non zero ID, curren_key_id should be defined.
|
||||
if (res->keys.size() == 1 && !res->keys.contains(0))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Config has one key with non zero id. сurrent_key_id is required");
|
||||
auto fingerprint = calculateKeyFingerprint(key);
|
||||
res->all_keys[fingerprint] = key;
|
||||
}
|
||||
|
||||
res->current_key_id = config.getUInt64(config_prefix + ".current_key_id", 0);
|
||||
if (!res->keys.contains(res->current_key_id))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not found a key with the current ID {}", res->current_key_id);
|
||||
FileEncryption::checkKeySize(res->current_algorithm, res->keys[res->current_key_id].size());
|
||||
String current_key = getCurrentKeyFromConfig(config, config_prefix, keys_by_id, keys_without_id);
|
||||
res->current_key = current_key;
|
||||
res->current_key_fingerprint = calculateKeyFingerprint(current_key);
|
||||
|
||||
String wrapped_disk_name = config.getString(config_prefix + ".disk", "");
|
||||
if (wrapped_disk_name.empty())
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"Name of the wrapped disk must not be empty. Encrypted disk is a wrapper over another disk");
|
||||
res->current_algorithm = getCurrentAlgorithmFromConfig(config, config_prefix);
|
||||
|
||||
auto wrapped_disk_it = map.find(wrapped_disk_name);
|
||||
if (wrapped_disk_it == map.end())
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"The wrapped disk must have been announced earlier. No disk with name {}",
|
||||
wrapped_disk_name);
|
||||
res->wrapped_disk = wrapped_disk_it->second;
|
||||
FileEncryption::checkKeySize(res->current_key.size(), res->current_algorithm);
|
||||
|
||||
res->disk_path = config.getString(config_prefix + ".path", "");
|
||||
if (!res->disk_path.empty() && (res->disk_path.back() != '/'))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk path must ends with '/', but '{}' doesn't.", quoteString(res->disk_path));
|
||||
DiskPtr wrapped_disk;
|
||||
String disk_path;
|
||||
getDiskAndPathFromConfig(config, config_prefix, disk_map, wrapped_disk, disk_path);
|
||||
res->wrapped_disk = wrapped_disk;
|
||||
res->disk_path = disk_path;
|
||||
|
||||
return res;
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("Disk " + name);
|
||||
e.addMessage("Disk " + disk_name);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads the header of an encrypted file.
|
||||
FileEncryption::Header readHeader(ReadBufferFromFileBase & read_buffer)
|
||||
{
|
||||
try
|
||||
@ -138,24 +251,6 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
String getKey(const String & path, const FileEncryption::Header & header, const DiskEncryptedSettings & settings)
|
||||
{
|
||||
auto it = settings.keys.find(header.key_id);
|
||||
if (it == settings.keys.end())
|
||||
throw Exception(
|
||||
ErrorCodes::DATA_ENCRYPTION_ERROR,
|
||||
"Not found a key with ID {} required to decipher file {}",
|
||||
header.key_id,
|
||||
quoteString(path));
|
||||
|
||||
String key = it->second;
|
||||
if (calculateKeyHash(key) != header.key_hash)
|
||||
throw Exception(
|
||||
ErrorCodes::DATA_ENCRYPTION_ERROR, "Wrong key with ID {}, could not decipher file {}", header.key_id, quoteString(path));
|
||||
|
||||
return key;
|
||||
}
|
||||
|
||||
bool inline isSameDiskType(const IDisk & one, const IDisk & another)
|
||||
{
|
||||
return typeid(one) == typeid(another);
|
||||
@ -225,7 +320,7 @@ void DiskEncrypted::copy(const String & from_path, const std::shared_ptr<IDisk>
|
||||
{
|
||||
auto from_settings = current_settings.get();
|
||||
auto to_settings = to_disk_enc->current_settings.get();
|
||||
if (from_settings->keys == to_settings->keys)
|
||||
if (from_settings->all_keys == to_settings->all_keys)
|
||||
{
|
||||
/// Keys are the same so we can simply copy the encrypted file.
|
||||
auto wrapped_from_path = wrappedPath(from_path);
|
||||
@ -252,7 +347,7 @@ void DiskEncrypted::copyDirectoryContent(const String & from_dir, const std::sha
|
||||
{
|
||||
auto from_settings = current_settings.get();
|
||||
auto to_settings = to_disk_enc->current_settings.get();
|
||||
if (from_settings->keys == to_settings->keys)
|
||||
if (from_settings->all_keys == to_settings->all_keys)
|
||||
{
|
||||
/// Keys are the same so we can simply copy the encrypted file.
|
||||
auto wrapped_from_path = wrappedPath(from_dir);
|
||||
@ -293,7 +388,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskEncrypted::readFile(
|
||||
}
|
||||
auto encryption_settings = current_settings.get();
|
||||
FileEncryption::Header header = readHeader(*buffer);
|
||||
String key = getKey(path, header, *encryption_settings);
|
||||
String key = encryption_settings->findKeyByFingerprint(header.key_fingerprint, path);
|
||||
return std::make_unique<ReadBufferFromEncryptedFile>(settings.local_fs_buffer_size, std::move(buffer), key, header);
|
||||
}
|
||||
|
||||
|
@ -38,39 +38,21 @@ FileEncryption::Header readHeader(ReadBufferFromFileBase & read_buffer)
|
||||
}
|
||||
}
|
||||
|
||||
String getCurrentKey(const String & path, const DiskEncryptedSettings & settings)
|
||||
}
|
||||
|
||||
String DiskEncryptedSettings::findKeyByFingerprint(UInt128 key_fingerprint, const String & path_for_logs) const
|
||||
{
|
||||
auto it = settings.keys.find(settings.current_key_id);
|
||||
if (it == settings.keys.end())
|
||||
auto it = all_keys.find(key_fingerprint);
|
||||
if (it == all_keys.end())
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::DATA_ENCRYPTION_ERROR,
|
||||
"Not found a key with the current ID {} required to cipher file {}",
|
||||
settings.current_key_id,
|
||||
quoteString(path));
|
||||
|
||||
"Not found an encryption key required to decipher file {}",
|
||||
quoteString(path_for_logs));
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
String getKey(const String & path, const FileEncryption::Header & header, const DiskEncryptedSettings & settings)
|
||||
{
|
||||
auto it = settings.keys.find(header.key_id);
|
||||
if (it == settings.keys.end())
|
||||
throw Exception(
|
||||
ErrorCodes::DATA_ENCRYPTION_ERROR,
|
||||
"Not found a key with ID {} required to decipher file {}",
|
||||
header.key_id,
|
||||
quoteString(path));
|
||||
|
||||
String key = it->second;
|
||||
if (FileEncryption::calculateKeyHash(key) != header.key_hash)
|
||||
throw Exception(
|
||||
ErrorCodes::DATA_ENCRYPTION_ERROR, "Wrong key with ID {}, could not decipher file {}", header.key_id, quoteString(path));
|
||||
|
||||
return key;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void DiskEncryptedTransaction::copyFile(const std::string & from_file_path, const std::string & to_file_path)
|
||||
{
|
||||
auto wrapped_from_path = wrappedPath(from_file_path);
|
||||
@ -98,16 +80,15 @@ std::unique_ptr<WriteBufferFromFileBase> DiskEncryptedTransaction::writeFile( //
|
||||
/// Append mode: we continue to use the same header.
|
||||
auto read_buffer = delegate_disk->readFile(wrapped_path, ReadSettings().adjustBufferSize(FileEncryption::Header::kSize));
|
||||
header = readHeader(*read_buffer);
|
||||
key = getKey(path, header, current_settings);
|
||||
key = current_settings.findKeyByFingerprint(header.key_fingerprint, path);
|
||||
}
|
||||
}
|
||||
if (!old_file_size)
|
||||
{
|
||||
/// Rewrite mode: we generate a new header.
|
||||
key = getCurrentKey(path, current_settings);
|
||||
header.algorithm = current_settings.current_algorithm;
|
||||
header.key_id = current_settings.current_key_id;
|
||||
header.key_hash = FileEncryption::calculateKeyHash(key);
|
||||
key = current_settings.current_key;
|
||||
header.key_fingerprint = current_settings.current_key_fingerprint;
|
||||
header.init_vector = FileEncryption::InitVector::random();
|
||||
}
|
||||
auto buffer = delegate_transaction->writeFile(wrapped_path, buf_size, mode, settings, autocommit);
|
||||
|
@ -18,9 +18,13 @@ struct DiskEncryptedSettings
|
||||
{
|
||||
DiskPtr wrapped_disk;
|
||||
String disk_path;
|
||||
std::unordered_map<UInt64, String> keys;
|
||||
UInt64 current_key_id;
|
||||
String current_key;
|
||||
UInt128 current_key_fingerprint;
|
||||
FileEncryption::Algorithm current_algorithm;
|
||||
std::unordered_map<UInt128 /* fingerprint */, String /* key */> all_keys;
|
||||
|
||||
/// Returns an encryption key found by its fingerprint.
|
||||
String findKeyByFingerprint(UInt128 key_fingerprint, const String & path_for_logs) const;
|
||||
};
|
||||
|
||||
|
||||
|
@ -298,7 +298,7 @@ off_t AsynchronousBoundedReadBuffer::seek(off_t offset, int whence)
|
||||
* Lazy ignore. Save number of bytes to ignore and ignore it either for prefetch buffer or current buffer.
|
||||
* Note: we read in range [file_offset_of_buffer_end, read_until_position).
|
||||
*/
|
||||
if (read_until_position && new_pos < *read_until_position
|
||||
if (file_offset_of_buffer_end && read_until_position && new_pos < *read_until_position
|
||||
&& new_pos > file_offset_of_buffer_end
|
||||
&& new_pos < file_offset_of_buffer_end + read_settings.remote_read_min_bytes_for_seek)
|
||||
{
|
||||
|
@ -51,8 +51,8 @@ CachedOnDiskReadBufferFromFile::CachedOnDiskReadBufferFromFile(
|
||||
std::optional<size_t> read_until_position_,
|
||||
std::shared_ptr<FilesystemCacheLog> cache_log_)
|
||||
: ReadBufferFromFileBase(use_external_buffer_ ? 0 : settings_.remote_fs_buffer_size, nullptr, 0, file_size_)
|
||||
#ifndef NDEBUG
|
||||
, log(&Poco::Logger::get("CachedOnDiskReadBufferFromFile(" + source_file_path_ + ")"))
|
||||
#ifdef ABORT_ON_LOGICAL_ERROR
|
||||
, log(&Poco::Logger::get(fmt::format("CachedOnDiskReadBufferFromFile({})", cache_key_)))
|
||||
#else
|
||||
, log(&Poco::Logger::get("CachedOnDiskReadBufferFromFile"))
|
||||
#endif
|
||||
@ -75,6 +75,9 @@ CachedOnDiskReadBufferFromFile::CachedOnDiskReadBufferFromFile(
|
||||
void CachedOnDiskReadBufferFromFile::appendFilesystemCacheLog(
|
||||
const FileSegment::Range & file_segment_range, CachedOnDiskReadBufferFromFile::ReadType type)
|
||||
{
|
||||
if (!cache_log)
|
||||
return;
|
||||
|
||||
FilesystemCacheLogElement elem
|
||||
{
|
||||
.event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()),
|
||||
@ -104,8 +107,7 @@ void CachedOnDiskReadBufferFromFile::appendFilesystemCacheLog(
|
||||
break;
|
||||
}
|
||||
|
||||
if (cache_log)
|
||||
cache_log->add(elem);
|
||||
cache_log->add(elem);
|
||||
}
|
||||
|
||||
void CachedOnDiskReadBufferFromFile::initialize(size_t offset, size_t size)
|
||||
@ -363,8 +365,8 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegment & file_s
|
||||
else
|
||||
{
|
||||
LOG_TRACE(
|
||||
log,
|
||||
"Bypassing cache because file segment state is `PARTIALLY_DOWNLOADED_NO_CONTINUATION` and downloaded part already used");
|
||||
log, "Bypassing cache because file segment state is "
|
||||
"`PARTIALLY_DOWNLOADED_NO_CONTINUATION` and downloaded part already used");
|
||||
read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE;
|
||||
return getRemoteReadBuffer(file_segment, read_type);
|
||||
}
|
||||
@ -411,7 +413,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme
|
||||
{
|
||||
case ReadType::CACHED:
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
#ifdef ABORT_ON_LOGICAL_ERROR
|
||||
size_t file_size = getFileSizeFromReadBuffer(*read_buffer_for_file_segment);
|
||||
if (file_size == 0 || range.left + file_size <= file_offset_of_buffer_end)
|
||||
throw Exception(
|
||||
@ -456,7 +458,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme
|
||||
{
|
||||
read_buffer_for_file_segment->seek(file_offset_of_buffer_end, SEEK_SET);
|
||||
|
||||
assert(read_buffer_for_file_segment->getFileOffsetOfBufferEnd() == file_offset_of_buffer_end);
|
||||
chassert(read_buffer_for_file_segment->getFileOffsetOfBufferEnd() == file_offset_of_buffer_end);
|
||||
}
|
||||
|
||||
const auto current_write_offset = file_segment.getCurrentWriteOffset(false);
|
||||
@ -464,8 +466,8 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Buffer's offsets mismatch. Cached buffer offset: {}, current_write_offset: {}, implementation buffer position: {}, "
|
||||
"implementation buffer end position: {}, file segment info: {}",
|
||||
"Buffer's offsets mismatch. Cached buffer offset: {}, current_write_offset: {}, "
|
||||
"implementation buffer position: {}, implementation buffer end position: {}, file segment info: {}",
|
||||
file_offset_of_buffer_end,
|
||||
current_write_offset,
|
||||
read_buffer_for_file_segment->getPosition(),
|
||||
@ -707,14 +709,18 @@ bool CachedOnDiskReadBufferFromFile::updateImplementationBufferIfNeeded()
|
||||
}
|
||||
else if (current_write_offset < file_offset_of_buffer_end)
|
||||
{
|
||||
const auto path = file_segment.getPathInLocalCache();
|
||||
size_t file_size = 0;
|
||||
if (fs::exists(path))
|
||||
file_size = fs::file_size(path);
|
||||
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Expected {} >= {} ({})",
|
||||
current_write_offset, file_offset_of_buffer_end, getInfoForLog());
|
||||
"Invariant failed. Expected {} >= {} (size on fs: {}, {})",
|
||||
current_write_offset, file_offset_of_buffer_end, file_size, getInfoForLog());
|
||||
}
|
||||
}
|
||||
|
||||
if (read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE)
|
||||
else if (read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE)
|
||||
{
|
||||
/**
|
||||
* ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE means that on previous getImplementationBuffer() call
|
||||
@ -883,7 +889,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
|
||||
if (!result)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
#ifdef ABORT_ON_LOGICAL_ERROR
|
||||
if (read_type == ReadType::CACHED)
|
||||
{
|
||||
size_t cache_file_size = getFileSizeFromReadBuffer(*implementation_buffer);
|
||||
@ -897,10 +903,9 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(file_offset_of_buffer_end == static_cast<size_t>(implementation_buffer->getFileOffsetOfBufferEnd()));
|
||||
chassert(file_offset_of_buffer_end == static_cast<size_t>(implementation_buffer->getFileOffsetOfBufferEnd()));
|
||||
}
|
||||
|
||||
assert(!implementation_buffer->hasPendingData());
|
||||
chassert(!implementation_buffer->hasPendingData());
|
||||
#endif
|
||||
|
||||
Stopwatch watch(CLOCK_MONOTONIC);
|
||||
@ -918,14 +923,27 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
|
||||
LOG_TEST(
|
||||
log,
|
||||
"Read {} bytes, read type {}, position: {}, offset: {}, segment end: {}",
|
||||
size, toString(read_type), implementation_buffer->getPosition(),
|
||||
implementation_buffer->getFileOffsetOfBufferEnd(), file_segment.range().right);
|
||||
"Read {} bytes, read type {}, file offset: {}, impl offset: {}/{}, segment: {}",
|
||||
size, toString(read_type), file_offset_of_buffer_end,
|
||||
implementation_buffer->getFileOffsetOfBufferEnd(), read_until_position, file_segment.range().toString());
|
||||
|
||||
if (read_type == ReadType::CACHED)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromCacheBytes, size);
|
||||
ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromCacheMicroseconds, elapsed);
|
||||
|
||||
#ifdef ABORT_ON_LOGICAL_ERROR
|
||||
const size_t new_file_offset = file_offset_of_buffer_end + size;
|
||||
chassert(new_file_offset - 1 <= file_segment.range().right);
|
||||
const size_t file_segment_write_offset = file_segment.getCurrentWriteOffset(true);
|
||||
if (new_file_offset > file_segment_write_offset)
|
||||
{
|
||||
LOG_TRACE(
|
||||
log, "Read {} bytes, file offset: {}, segment: {}, segment write offset: {}",
|
||||
size, file_offset_of_buffer_end, file_segment.range().toString(), file_segment_write_offset);
|
||||
chassert(false);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1082,8 +1100,8 @@ off_t CachedOnDiskReadBufferFromFile::seek(off_t offset, int whence)
|
||||
if (file_offset_of_buffer_end - working_buffer.size() <= new_pos && new_pos <= file_offset_of_buffer_end)
|
||||
{
|
||||
pos = working_buffer.end() - file_offset_of_buffer_end + new_pos;
|
||||
assert(pos >= working_buffer.begin());
|
||||
assert(pos <= working_buffer.end());
|
||||
chassert(pos >= working_buffer.begin());
|
||||
chassert(pos <= working_buffer.end());
|
||||
return new_pos;
|
||||
}
|
||||
}
|
||||
|
@ -13,19 +13,6 @@ WriteBufferWithFinalizeCallback::WriteBufferWithFinalizeCallback(
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
WriteBufferWithFinalizeCallback::~WriteBufferWithFinalizeCallback()
|
||||
{
|
||||
try
|
||||
{
|
||||
finalize();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
|
||||
void WriteBufferWithFinalizeCallback::finalizeImpl()
|
||||
{
|
||||
WriteBufferFromFileDecorator::finalizeImpl();
|
||||
|
@ -19,8 +19,6 @@ public:
|
||||
FinalizeCallback && create_callback_,
|
||||
const String & remote_path_);
|
||||
|
||||
~WriteBufferWithFinalizeCallback() override;
|
||||
|
||||
String getFileName() const override { return remote_path; }
|
||||
|
||||
private:
|
||||
|
@ -37,8 +37,10 @@ protected:
|
||||
auto settings = std::make_unique<DiskEncryptedSettings>();
|
||||
settings->wrapped_disk = local_disk;
|
||||
settings->current_algorithm = algorithm;
|
||||
settings->keys[0] = key;
|
||||
settings->current_key_id = 0;
|
||||
auto fingerprint = FileEncryption::calculateKeyFingerprint(key);
|
||||
settings->all_keys[fingerprint] = key;
|
||||
settings->current_key = key;
|
||||
settings->current_key_fingerprint = fingerprint;
|
||||
settings->disk_path = path;
|
||||
encrypted_disk = std::make_shared<DiskEncrypted>("encrypted_disk", std::move(settings), true);
|
||||
}
|
||||
@ -255,7 +257,7 @@ TEST_F(DiskEncryptedTest, RandomIV)
|
||||
|
||||
String bina = getBinaryRepresentation(getDirectory() + "a.txt");
|
||||
String binb = getBinaryRepresentation(getDirectory() + "b.txt");
|
||||
constexpr size_t iv_offset = 16;
|
||||
constexpr size_t iv_offset = 23; /// See the description of the format in the comment for FileEncryption::Header.
|
||||
constexpr size_t iv_size = FileEncryption::InitVector::kSize;
|
||||
EXPECT_EQ(bina.substr(0, iv_offset), binb.substr(0, iv_offset)); /// Part of the header before IV is the same.
|
||||
EXPECT_NE(bina.substr(iv_offset, iv_size), binb.substr(iv_offset, iv_size)); /// IV differs.
|
||||
|
@ -1,13 +1,13 @@
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <Formats/ReadSchemaUtils.h>
|
||||
#include <Processors/Formats/ISchemaReader.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Processors/Formats/ISchemaReader.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -20,8 +20,7 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
|
||||
}
|
||||
|
||||
static std::optional<NamesAndTypesList> getOrderedColumnsList(
|
||||
const NamesAndTypesList & columns_list, const Names & columns_order_hint)
|
||||
static std::optional<NamesAndTypesList> getOrderedColumnsList(const NamesAndTypesList & columns_list, const Names & columns_order_hint)
|
||||
{
|
||||
if (columns_list.size() != columns_order_hint.size())
|
||||
return {};
|
||||
@ -65,7 +64,8 @@ ColumnsDescription readSchemaFromFormat(
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage(fmt::format("Cannot extract table structure from {} format file. You can specify the structure manually", format_name));
|
||||
e.addMessage(
|
||||
fmt::format("Cannot extract table structure from {} format file. You can specify the structure manually", format_name));
|
||||
throw;
|
||||
}
|
||||
}
|
||||
@ -73,7 +73,8 @@ ColumnsDescription readSchemaFromFormat(
|
||||
{
|
||||
std::string exception_messages;
|
||||
SchemaReaderPtr schema_reader;
|
||||
size_t max_rows_to_read = format_settings ? format_settings->max_rows_to_read_for_schema_inference : context->getSettingsRef().input_format_max_rows_to_read_for_schema_inference;
|
||||
size_t max_rows_to_read = format_settings ? format_settings->max_rows_to_read_for_schema_inference
|
||||
: context->getSettingsRef().input_format_max_rows_to_read_for_schema_inference;
|
||||
size_t iterations = 0;
|
||||
ColumnsDescription cached_columns;
|
||||
while (true)
|
||||
@ -88,8 +89,8 @@ ColumnsDescription readSchemaFromFormat(
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage(fmt::format(
|
||||
"Cannot extract table structure from {} format file. You can specify the structure manually", format_name));
|
||||
e.addMessage(
|
||||
fmt::format("Cannot extract table structure from {} format file. You can specify the structure manually", format_name));
|
||||
throw;
|
||||
}
|
||||
catch (...)
|
||||
@ -109,7 +110,8 @@ ColumnsDescription readSchemaFromFormat(
|
||||
auto exception_message = fmt::format("Cannot extract table structure from {} format file, file is empty", format_name);
|
||||
|
||||
if (!retry)
|
||||
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "{}. You can specify the structure manually", exception_message);
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "{}. You can specify the structure manually", exception_message);
|
||||
|
||||
exception_messages += "\n" + exception_message;
|
||||
continue;
|
||||
@ -132,7 +134,8 @@ ColumnsDescription readSchemaFromFormat(
|
||||
max_rows_to_read -= schema_reader->getNumRowsRead();
|
||||
if (rows_read != 0 && max_rows_to_read == 0)
|
||||
{
|
||||
exception_message += "\nTo increase the maximum number of rows to read for structure determination, use setting input_format_max_rows_to_read_for_schema_inference";
|
||||
exception_message += "\nTo increase the maximum number of rows to read for structure determination, use setting "
|
||||
"input_format_max_rows_to_read_for_schema_inference";
|
||||
if (iterations > 1)
|
||||
{
|
||||
exception_messages += "\n" + exception_message;
|
||||
@ -150,15 +153,18 @@ ColumnsDescription readSchemaFromFormat(
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage(fmt::format("Cannot extract table structure from {} format file. You can specify the structure manually", format_name));
|
||||
e.addMessage(fmt::format(
|
||||
"Cannot extract table structure from {} format file. You can specify the structure manually", format_name));
|
||||
throw;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
|
||||
"Cannot extract table structure from {} format file. "
|
||||
"Error: {}. You can specify the structure manually",
|
||||
format_name, exception_message);
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
|
||||
"Cannot extract table structure from {} format file. "
|
||||
"Error: {}. You can specify the structure manually",
|
||||
format_name,
|
||||
exception_message);
|
||||
}
|
||||
}
|
||||
|
||||
@ -170,9 +176,11 @@ ColumnsDescription readSchemaFromFormat(
|
||||
return cached_columns;
|
||||
|
||||
if (names_and_types.empty())
|
||||
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
|
||||
"All attempts to extract table structure from files failed. "
|
||||
"Errors:{}\nYou can specify the structure manually", exception_messages);
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
|
||||
"All attempts to extract table structure from files failed. "
|
||||
"Errors:{}\nYou can specify the structure manually",
|
||||
exception_messages);
|
||||
|
||||
/// If we have "INSERT SELECT" query then try to order
|
||||
/// columns as they are ordered in table schema for formats
|
||||
@ -191,20 +199,30 @@ ColumnsDescription readSchemaFromFormat(
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"{} file format doesn't support schema inference. You must specify the structure manually",
|
||||
format_name);
|
||||
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"{} file format doesn't support schema inference. You must specify the structure manually",
|
||||
format_name);
|
||||
/// Some formats like CSVWithNames can contain empty column names. We don't support empty column names and further processing can fail with an exception. Let's just remove columns with empty names from the structure.
|
||||
names_and_types.erase(
|
||||
std::remove_if(names_and_types.begin(), names_and_types.end(), [](const NameAndTypePair & pair) { return pair.name.empty(); }),
|
||||
names_and_types.end());
|
||||
return ColumnsDescription(names_and_types);
|
||||
}
|
||||
|
||||
ColumnsDescription readSchemaFromFormat(const String & format_name, const std::optional<FormatSettings> & format_settings, ReadBufferIterator & read_buffer_iterator, bool retry, ContextPtr & context)
|
||||
ColumnsDescription readSchemaFromFormat(
|
||||
const String & format_name,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
ReadBufferIterator & read_buffer_iterator,
|
||||
bool retry,
|
||||
ContextPtr & context)
|
||||
{
|
||||
std::unique_ptr<ReadBuffer> buf_out;
|
||||
return readSchemaFromFormat(format_name, format_settings, read_buffer_iterator, retry, context, buf_out);
|
||||
}
|
||||
|
||||
SchemaCache::Key getKeyForSchemaCache(const String & source, const String & format, const std::optional<FormatSettings> & format_settings, const ContextPtr & context)
|
||||
SchemaCache::Key getKeyForSchemaCache(
|
||||
const String & source, const String & format, const std::optional<FormatSettings> & format_settings, const ContextPtr & context)
|
||||
{
|
||||
return getKeysForSchemaCache({source}, format, format_settings, context).front();
|
||||
}
|
||||
@ -214,7 +232,8 @@ static SchemaCache::Key makeSchemaCacheKey(const String & source, const String &
|
||||
return SchemaCache::Key{source, format, additional_format_info};
|
||||
}
|
||||
|
||||
SchemaCache::Keys getKeysForSchemaCache(const Strings & sources, const String & format, const std::optional<FormatSettings> & format_settings, const ContextPtr & context)
|
||||
SchemaCache::Keys getKeysForSchemaCache(
|
||||
const Strings & sources, const String & format, const std::optional<FormatSettings> & format_settings, const ContextPtr & context)
|
||||
{
|
||||
/// For some formats data schema depends on some settings, so it's possible that
|
||||
/// two queries to the same source will get two different schemas. To process this
|
||||
@ -224,7 +243,11 @@ SchemaCache::Keys getKeysForSchemaCache(const Strings & sources, const String &
|
||||
String additional_format_info = FormatFactory::instance().getAdditionalInfoForSchemaCache(format, context, format_settings);
|
||||
SchemaCache::Keys cache_keys;
|
||||
cache_keys.reserve(sources.size());
|
||||
std::transform(sources.begin(), sources.end(), std::back_inserter(cache_keys), [&](const auto & source){ return makeSchemaCacheKey(source, format, additional_format_info); });
|
||||
std::transform(
|
||||
sources.begin(),
|
||||
sources.end(),
|
||||
std::back_inserter(cache_keys),
|
||||
[&](const auto & source) { return makeSchemaCacheKey(source, format, additional_format_info); });
|
||||
return cache_keys;
|
||||
}
|
||||
|
||||
|
@ -88,6 +88,38 @@ struct ToStartOfWeekImpl
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
||||
struct ToLastDayOfWeekImpl
|
||||
{
|
||||
static constexpr auto name = "toLastDayOfWeek";
|
||||
|
||||
static inline UInt16 execute(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toLastDayNumOfWeek(time_zone.toDayNum(t), week_mode);
|
||||
}
|
||||
static inline UInt16 execute(UInt32 t, UInt8 week_mode, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toLastDayNumOfWeek(time_zone.toDayNum(t), week_mode);
|
||||
}
|
||||
static inline UInt16 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toLastDayNumOfWeek(ExtendedDayNum(d), week_mode);
|
||||
}
|
||||
static inline UInt16 execute(UInt16 d, UInt8 week_mode, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toLastDayNumOfWeek(DayNum(d), week_mode);
|
||||
}
|
||||
static inline Int64 executeExtendedResult(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toLastDayNumOfWeek(time_zone.toDayNum(t), week_mode);
|
||||
}
|
||||
static inline Int32 executeExtendedResult(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toLastDayNumOfWeek(ExtendedDayNum(d), week_mode);
|
||||
}
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
||||
struct ToWeekImpl
|
||||
{
|
||||
static constexpr auto name = "toWeek";
|
||||
|
395
src/Functions/EntropyLearnedHash.cpp
Normal file
395
src/Functions/EntropyLearnedHash.cpp
Normal file
@ -0,0 +1,395 @@
|
||||
#include <base/defines.h>
|
||||
#include <base/types.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
/// Implementation of entropy-learned hashing: https://doi.org/10.1145/3514221.3517894
|
||||
/// If you change something in this file, please don't deviate too much from the pseudocode in the paper!
|
||||
|
||||
/// TODOs for future work:
|
||||
/// - allow to specify an arbitrary hash function (currently always CityHash is used)
|
||||
/// - allow function chaining a la entropyLearnedHash(trainEntropyLearnedHash())
|
||||
/// - support more datatypes for data (besides String)
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int SUPPORT_IS_DISABLED;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
using PartialKeyPositions = std::vector<size_t>;
|
||||
using Entropies = std::vector<size_t>;
|
||||
|
||||
void getPartialKey(std::string_view key, const PartialKeyPositions & partial_key_positions, String & result)
|
||||
{
|
||||
result.clear();
|
||||
result.reserve(partial_key_positions.size());
|
||||
|
||||
for (auto partial_key_position : partial_key_positions)
|
||||
if (partial_key_position < key.size())
|
||||
result.push_back(key[partial_key_position]);
|
||||
}
|
||||
|
||||
bool allPartialKeysAreUnique(const std::vector<std::string_view> & keys, const PartialKeyPositions & partial_key_positions)
|
||||
{
|
||||
std::unordered_set<String> unique_partial_keys;
|
||||
unique_partial_keys.reserve(keys.size());
|
||||
String partial_key;
|
||||
|
||||
for (const auto & key : keys)
|
||||
{
|
||||
getPartialKey(key, partial_key_positions, partial_key);
|
||||
if (!unique_partial_keys.insert(partial_key).second)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// NextByte returns position of byte which adds the most entropy and the new entropy
|
||||
std::pair<size_t, size_t> nextByte(const std::vector<std::string_view> & keys, size_t max_len, PartialKeyPositions & partial_key_positions)
|
||||
{
|
||||
size_t min_collisions = std::numeric_limits<size_t>::max();
|
||||
size_t best_position = 0;
|
||||
|
||||
std::unordered_map<String, size_t> count_table;
|
||||
count_table.reserve(keys.size());
|
||||
|
||||
String partial_key;
|
||||
|
||||
for (size_t i = 0; i < max_len; ++i)
|
||||
{
|
||||
count_table.clear();
|
||||
|
||||
partial_key_positions.push_back(i);
|
||||
size_t collisions = 0;
|
||||
for (const auto & key : keys)
|
||||
{
|
||||
getPartialKey(key, partial_key_positions, partial_key);
|
||||
collisions += count_table[partial_key]++;
|
||||
}
|
||||
|
||||
if (collisions < min_collisions)
|
||||
{
|
||||
min_collisions = collisions;
|
||||
best_position = i;
|
||||
}
|
||||
partial_key_positions.pop_back();
|
||||
}
|
||||
|
||||
return {best_position, min_collisions};
|
||||
}
|
||||
|
||||
std::pair<PartialKeyPositions, Entropies> chooseBytes(const std::vector<std::string_view> & train_data)
|
||||
{
|
||||
if (train_data.size() <= 1)
|
||||
return {};
|
||||
|
||||
PartialKeyPositions partial_key_positions;
|
||||
Entropies entropies;
|
||||
|
||||
size_t max_len = 0; /// length of the longest key in training data
|
||||
for (const auto & key : train_data)
|
||||
max_len = std::max(max_len, key.size());
|
||||
|
||||
while (!allPartialKeysAreUnique(train_data, partial_key_positions))
|
||||
{
|
||||
auto [new_position, new_entropy] = nextByte(train_data, max_len, partial_key_positions);
|
||||
if (!entropies.empty() && new_entropy == entropies.back())
|
||||
break;
|
||||
partial_key_positions.push_back(new_position);
|
||||
entropies.push_back(new_entropy);
|
||||
}
|
||||
return {partial_key_positions, entropies};
|
||||
}
|
||||
|
||||
/// Contains global state to convey information between SQL functions
|
||||
/// - prepareTrainEntropyLearnedHash(),
|
||||
/// - trainEntropyLearnedHash() and
|
||||
/// - entropyLearnedHash().
|
||||
///
|
||||
/// The reason this machinery is necessary is that ClickHouse processes data in chunks of unpredictable size, yet the training step of
|
||||
/// entropy-learned hashing needs to process *all* training data in one go. The downside is that the training step becomes quite expensive :-(
|
||||
class EntropyLearnedHashGlobalState
|
||||
{
|
||||
public:
|
||||
static EntropyLearnedHashGlobalState & instance()
|
||||
{
|
||||
static EntropyLearnedHashGlobalState instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
/// Called by prepareTrainEntropyLearnedHash()
|
||||
void cacheTrainingSample(const String & user_name, const String & id, IColumn::MutablePtr column)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto & ids_for_user = global_state[user_name];
|
||||
auto & training_samples_for_id = ids_for_user[id].training_samples;
|
||||
training_samples_for_id.push_back(std::move(column));
|
||||
}
|
||||
|
||||
void train(const String & user_name, const String & id)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto & ids_for_user = global_state[user_name];
|
||||
auto & training_samples = ids_for_user[id].training_samples;
|
||||
|
||||
if (training_samples.empty())
|
||||
return;
|
||||
|
||||
auto & concatenated_training_sample = training_samples[0];
|
||||
for (size_t i = 1; i < training_samples.size(); ++i)
|
||||
{
|
||||
auto & other_training_sample = training_samples[i];
|
||||
concatenated_training_sample->insertRangeFrom(*other_training_sample, 0, other_training_sample->size());
|
||||
}
|
||||
|
||||
const ColumnString * concatenated_training_sample_string = checkAndGetColumn<ColumnString>(*concatenated_training_sample);
|
||||
if (!concatenated_training_sample_string)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column");
|
||||
|
||||
const size_t num_rows = concatenated_training_sample_string->size();
|
||||
std::vector<std::string_view> training_data;
|
||||
for (size_t i = 0; i < num_rows; ++i)
|
||||
{
|
||||
std::string_view string_view = concatenated_training_sample_string->getDataAt(i).toView();
|
||||
training_data.emplace_back(string_view);
|
||||
}
|
||||
|
||||
PartialKeyPositions partial_key_positions = chooseBytes(training_data).first;
|
||||
|
||||
ids_for_user[id].partial_key_positions = partial_key_positions;
|
||||
training_samples.clear();
|
||||
}
|
||||
|
||||
const PartialKeyPositions & getPartialKeyPositions(const String & user_name, const String & id) const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto it_user = global_state.find(user_name);
|
||||
if (it_user == global_state.end())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Id {} not registered for user in entropy learned hashing", id);
|
||||
auto it_id = it_user->second.find(id);
|
||||
if (it_id == it_user->second.end())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Id {} not registered for user in entropy learned hashing", id);
|
||||
return it_id->second.partial_key_positions;
|
||||
}
|
||||
|
||||
private:
|
||||
mutable std::mutex mutex;
|
||||
|
||||
/// The state.
|
||||
struct ColumnsAndPartialKeyPositions
|
||||
{
|
||||
/// Caches training data chunks. Filled by prepareTrainEntropyLearnedHash(), cleared by trainEntropyLearnedHash().
|
||||
MutableColumns training_samples;
|
||||
/// The result of the training phase. Filled by trainEntropyLearnedHash().
|
||||
PartialKeyPositions partial_key_positions;
|
||||
};
|
||||
|
||||
/// Maps a state id to the state.
|
||||
using IdToColumnsAndPartialKeyPositions = std::map<String, ColumnsAndPartialKeyPositions>;
|
||||
|
||||
/// Maps the user name to a state id. As a result, the state id is unique at user scope.
|
||||
using UserNameToId = std::map<String, IdToColumnsAndPartialKeyPositions>;
|
||||
|
||||
UserNameToId global_state TSA_GUARDED_BY(mutex);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
/// Copies all chunks of the training sample column into the global state under a given id.
|
||||
class FunctionPrepareTrainEntropyLearnedHash : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "prepareTrainEntropyLearnedHash";
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
if (!context->getSettings().allow_experimental_hash_functions)
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
|
||||
"Entropy-learned hashing is experimental. Set `allow_experimental_hash_functions` setting to enable it");
|
||||
|
||||
return std::make_shared<FunctionPrepareTrainEntropyLearnedHash>(context->getUserName());
|
||||
}
|
||||
explicit FunctionPrepareTrainEntropyLearnedHash(const String & user_name_) : IFunction(), user_name(user_name_) {}
|
||||
|
||||
String getName() const override { return name; }
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
FunctionArgumentDescriptors args{
|
||||
{"data", &isString<IDataType>, nullptr, "String"},
|
||||
{"id", &isString<IDataType>, nullptr, "String"}
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(*this, arguments, args);
|
||||
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override
|
||||
{
|
||||
const IColumn * id_col = arguments[1].column.get();
|
||||
const ColumnConst * id_col_const = checkAndGetColumn<ColumnConst>(id_col);
|
||||
const String id = id_col_const->getValue<String>();
|
||||
|
||||
IColumn::Ptr data_col = arguments[0].column;
|
||||
IColumn::MutablePtr data_col_mutable = IColumn::mutate(data_col);
|
||||
|
||||
auto & global_state = EntropyLearnedHashGlobalState::instance();
|
||||
global_state.cacheTrainingSample(user_name, id, std::move(data_col_mutable));
|
||||
|
||||
const size_t num_rows = data_col->size();
|
||||
return result_type->createColumnConst(num_rows, 0u); /// dummy output
|
||||
}
|
||||
private:
|
||||
const String user_name;
|
||||
};
|
||||
|
||||
|
||||
/// 1. Concatenates the training samples of a given id in the global state.
|
||||
/// 2. Computes the partial key positions from the concatenated training samples and stores that in the global state.
|
||||
/// 3. clear()-s the training samples in the global state.
|
||||
class FunctionTrainEntropyLearnedHash : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "trainEntropyLearnedHash";
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
if (!context->getSettings().allow_experimental_hash_functions)
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
|
||||
"Entropy-learned hashing is experimental. Set `allow_experimental_hash_functions` setting to enable it");
|
||||
return std::make_shared<FunctionTrainEntropyLearnedHash>(context->getUserName());
|
||||
}
|
||||
explicit FunctionTrainEntropyLearnedHash(const String & user_name_) : IFunction(), user_name(user_name_) {}
|
||||
|
||||
String getName() const override { return name; }
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
bool useDefaultImplementationForConstants() const override { return false; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
FunctionArgumentDescriptors args{
|
||||
{"id", &isString<IDataType>, nullptr, "String"}
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(*this, arguments, args);
|
||||
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override
|
||||
{
|
||||
const IColumn * id_col = arguments[0].column.get();
|
||||
const ColumnConst * id_col_const = checkAndGetColumn<ColumnConst>(id_col);
|
||||
if (!id_col_const)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
|
||||
arguments.begin()->column->getName(), getName());
|
||||
|
||||
auto & global_state = EntropyLearnedHashGlobalState::instance();
|
||||
|
||||
const String id = id_col_const->getValue<String>();
|
||||
global_state.train(user_name, id);
|
||||
|
||||
const size_t num_rows = id_col->size();
|
||||
return result_type->createColumnConst(num_rows, 0u); /// dummy output
|
||||
}
|
||||
private:
|
||||
const String user_name;
|
||||
};
|
||||
|
||||
|
||||
/// Hashes input strings using partial key positions stored in the global state.
|
||||
class FunctionEntropyLearnedHash : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "entropyLearnedHash";
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
if (!context->getSettings().allow_experimental_hash_functions)
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
|
||||
"Entropy-learned hashing experimental. Set `allow_experimental_hash_functions` setting to enable it");
|
||||
return std::make_shared<FunctionEntropyLearnedHash>(context->getUserName());
|
||||
}
|
||||
explicit FunctionEntropyLearnedHash(const String & user_name_) : IFunction(), user_name(user_name_) {}
|
||||
|
||||
String getName() const override { return name; }
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
||||
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
FunctionArgumentDescriptors args{
|
||||
{"data", &isString<IDataType>, nullptr, "String"},
|
||||
{"id", &isString<IDataType>, nullptr, "String"}
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(*this, arguments, args);
|
||||
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
|
||||
{
|
||||
const IColumn * id_col = arguments.back().column.get();
|
||||
const ColumnConst * id_col_const = checkAndGetColumn<ColumnConst>(id_col);
|
||||
const String id = id_col_const->getValue<String>();
|
||||
|
||||
const auto & global_state = EntropyLearnedHashGlobalState::instance();
|
||||
const auto & partial_key_positions = global_state.getPartialKeyPositions(user_name, id);
|
||||
|
||||
const auto * data_col = arguments[0].column.get();
|
||||
if (const auto * col_data_string = checkAndGetColumn<ColumnString>(data_col))
|
||||
{
|
||||
const size_t num_rows = col_data_string->size();
|
||||
auto col_res = ColumnUInt64::create(num_rows);
|
||||
|
||||
auto & col_res_vec = col_res->getData();
|
||||
String partial_key;
|
||||
for (size_t i = 0; i < num_rows; ++i)
|
||||
{
|
||||
std::string_view string_ref = col_data_string->getDataAt(i).toView();
|
||||
getPartialKey(string_ref, partial_key_positions, partial_key);
|
||||
col_res_vec[i] = CityHash_v1_0_2::CityHash64(partial_key.data(), partial_key.size());
|
||||
}
|
||||
|
||||
return col_res;
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
|
||||
arguments.begin()->column->getName(), getName());
|
||||
}
|
||||
private:
|
||||
const String user_name;
|
||||
};
|
||||
|
||||
REGISTER_FUNCTION(EntropyLearnedHash)
|
||||
{
|
||||
factory.registerFunction<FunctionPrepareTrainEntropyLearnedHash>();
|
||||
factory.registerFunction<FunctionTrainEntropyLearnedHash>();
|
||||
factory.registerFunction<FunctionEntropyLearnedHash>();
|
||||
}
|
||||
|
||||
}
|
@ -1741,7 +1741,7 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A
|
||||
OpImpl::template processString<OpCase::Vector>(in_vec.data(), col_left->getOffsets().data(), &value, out_vec, out_offsets, 1);
|
||||
}
|
||||
|
||||
return ColumnConst::create(std::move(col_res), col_left->size());
|
||||
return ColumnConst::create(std::move(col_res), col_left_const->size());
|
||||
}
|
||||
else if (!col_left_const && !col_right_const && col_right)
|
||||
{
|
||||
|
@ -11,12 +11,14 @@ namespace DB
|
||||
using FunctionToWeek = FunctionCustomWeekToSomething<DataTypeUInt8, ToWeekImpl>;
|
||||
using FunctionToYearWeek = FunctionCustomWeekToSomething<DataTypeUInt32, ToYearWeekImpl>;
|
||||
using FunctionToStartOfWeek = FunctionCustomWeekToDateOrDate32<ToStartOfWeekImpl>;
|
||||
using FunctionToLastDayOfWeek = FunctionCustomWeekToDateOrDate32<ToLastDayOfWeekImpl>;
|
||||
|
||||
REGISTER_FUNCTION(ToCustomWeek)
|
||||
{
|
||||
factory.registerFunction<FunctionToWeek>();
|
||||
factory.registerFunction<FunctionToYearWeek>();
|
||||
factory.registerFunction<FunctionToStartOfWeek>();
|
||||
factory.registerFunction<FunctionToLastDayOfWeek>();
|
||||
|
||||
/// Compatibility aliases for mysql.
|
||||
factory.registerAlias("week", "toWeek", FunctionFactory::CaseInsensitive);
|
||||
|
@ -191,7 +191,10 @@ namespace
|
||||
explicit StreamFromWriteBuffer(std::unique_ptr<WriteBuffer> write_buffer_)
|
||||
: write_buffer(std::move(write_buffer_)), start_offset(write_buffer->count()) {}
|
||||
|
||||
~StreamFromWriteBuffer() { write_buffer->finalize(); }
|
||||
~StreamFromWriteBuffer()
|
||||
{
|
||||
write_buffer->finalize();
|
||||
}
|
||||
|
||||
static int closeFileFunc(void *, void * stream)
|
||||
{
|
||||
|
@ -34,6 +34,7 @@ namespace
|
||||
case Algorithm::AES_128_CTR: return EVP_aes_128_ctr();
|
||||
case Algorithm::AES_192_CTR: return EVP_aes_192_ctr();
|
||||
case Algorithm::AES_256_CTR: return EVP_aes_256_ctr();
|
||||
case Algorithm::MAX: break;
|
||||
}
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
@ -187,10 +188,14 @@ namespace
|
||||
return plaintext_size;
|
||||
}
|
||||
|
||||
constexpr const char kHeaderSignature[] = "ENC";
|
||||
constexpr const UInt16 kHeaderCurrentVersion = 1;
|
||||
}
|
||||
constexpr const std::string_view kHeaderSignature = "ENC";
|
||||
|
||||
UInt128 calculateV1KeyFingerprint(UInt8 small_key_hash, UInt64 key_id)
|
||||
{
|
||||
/// In the version 1 we stored {key_id, very_small_hash(key)} instead of a fingerprint.
|
||||
return static_cast<UInt128>(key_id) | (static_cast<UInt128>(small_key_hash) << 64);
|
||||
}
|
||||
}
|
||||
|
||||
String toString(Algorithm algorithm)
|
||||
{
|
||||
@ -199,6 +204,7 @@ String toString(Algorithm algorithm)
|
||||
case Algorithm::AES_128_CTR: return "aes_128_ctr";
|
||||
case Algorithm::AES_192_CTR: return "aes_192_ctr";
|
||||
case Algorithm::AES_256_CTR: return "aes_256_ctr";
|
||||
case Algorithm::MAX: break;
|
||||
}
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
@ -206,14 +212,14 @@ String toString(Algorithm algorithm)
|
||||
static_cast<int>(algorithm));
|
||||
}
|
||||
|
||||
void parseFromString(Algorithm & algorithm, const String & str)
|
||||
Algorithm parseAlgorithmFromString(const String & str)
|
||||
{
|
||||
if (boost::iequals(str, "aes_128_ctr"))
|
||||
algorithm = Algorithm::AES_128_CTR;
|
||||
return Algorithm::AES_128_CTR;
|
||||
else if (boost::iequals(str, "aes_192_ctr"))
|
||||
algorithm = Algorithm::AES_192_CTR;
|
||||
return Algorithm::AES_192_CTR;
|
||||
else if (boost::iequals(str, "aes_256_ctr"))
|
||||
algorithm = Algorithm::AES_256_CTR;
|
||||
return Algorithm::AES_256_CTR;
|
||||
else
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
@ -221,7 +227,7 @@ void parseFromString(Algorithm & algorithm, const String & str)
|
||||
str);
|
||||
}
|
||||
|
||||
void checkKeySize(Algorithm algorithm, size_t key_size) { checkKeySize(getCipher(algorithm), key_size); }
|
||||
void checkKeySize(size_t key_size, Algorithm algorithm) { checkKeySize(getCipher(algorithm), key_size); }
|
||||
|
||||
|
||||
String InitVector::toString() const
|
||||
@ -364,54 +370,92 @@ void Encryptor::decrypt(const char * data, size_t size, char * out)
|
||||
|
||||
void Header::read(ReadBuffer & in)
|
||||
{
|
||||
constexpr size_t header_signature_size = std::size(kHeaderSignature) - 1;
|
||||
char signature[std::size(kHeaderSignature)] = {};
|
||||
in.readStrict(signature, header_signature_size);
|
||||
if (strcmp(signature, kHeaderSignature) != 0)
|
||||
char signature[kHeaderSignature.length()];
|
||||
in.readStrict(signature, kHeaderSignature.length());
|
||||
if (memcmp(signature, kHeaderSignature.data(), kHeaderSignature.length()) != 0)
|
||||
throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Wrong signature, this is not an encrypted file");
|
||||
|
||||
UInt16 version;
|
||||
readPODBinary(version, in);
|
||||
if (version != kHeaderCurrentVersion)
|
||||
/// The endianness of how the header is written.
|
||||
/// Starting from version 2 the header is always in little endian.
|
||||
std::endian endian = std::endian::little;
|
||||
|
||||
readBinaryLittleEndian(version, in);
|
||||
|
||||
if (version == 0x0100ULL)
|
||||
{
|
||||
/// Version 1 could write the header of an encrypted file in either little-endian or big-endian.
|
||||
/// So now if we read the version as little-endian and it's 256 that means two things: the version is actually 1 and the whole header is in big endian.
|
||||
endian = std::endian::big;
|
||||
version = 1;
|
||||
}
|
||||
|
||||
if (version < 1 || version > kCurrentVersion)
|
||||
throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Version {} of the header is not supported", version);
|
||||
|
||||
UInt16 algorithm_u16;
|
||||
readPODBinary(algorithm_u16, in);
|
||||
if (std::endian::native != endian)
|
||||
algorithm_u16 = std::byteswap(algorithm_u16);
|
||||
if (algorithm_u16 >= static_cast<UInt16>(Algorithm::MAX))
|
||||
throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Algorithm {} is not supported", algorithm_u16);
|
||||
algorithm = static_cast<Algorithm>(algorithm_u16);
|
||||
|
||||
readPODBinary(key_id, in);
|
||||
readPODBinary(key_hash, in);
|
||||
size_t bytes_to_skip = kSize - kHeaderSignature.length() - sizeof(version) - sizeof(algorithm_u16) - InitVector::kSize;
|
||||
|
||||
if (version < 2)
|
||||
{
|
||||
UInt64 key_id;
|
||||
UInt8 small_key_hash;
|
||||
readPODBinary(key_id, in);
|
||||
readPODBinary(small_key_hash, in);
|
||||
bytes_to_skip -= sizeof(key_id) + sizeof(small_key_hash);
|
||||
if (std::endian::native != endian)
|
||||
key_id = std::byteswap(key_id);
|
||||
key_fingerprint = calculateV1KeyFingerprint(small_key_hash, key_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
readBinaryLittleEndian(key_fingerprint, in);
|
||||
bytes_to_skip -= sizeof(key_fingerprint);
|
||||
}
|
||||
|
||||
init_vector.read(in);
|
||||
|
||||
constexpr size_t reserved_size = kSize - header_signature_size - sizeof(version) - sizeof(algorithm_u16) - sizeof(key_id) - sizeof(key_hash) - InitVector::kSize;
|
||||
static_assert(reserved_size < kSize);
|
||||
in.ignore(reserved_size);
|
||||
chassert(bytes_to_skip < kSize);
|
||||
in.ignore(bytes_to_skip);
|
||||
}
|
||||
|
||||
void Header::write(WriteBuffer & out) const
|
||||
{
|
||||
constexpr size_t header_signature_size = std::size(kHeaderSignature) - 1;
|
||||
out.write(kHeaderSignature, header_signature_size);
|
||||
writeString(kHeaderSignature, out);
|
||||
|
||||
UInt16 version = kHeaderCurrentVersion;
|
||||
writePODBinary(version, out);
|
||||
writeBinaryLittleEndian(version, out);
|
||||
|
||||
UInt16 algorithm_u16 = static_cast<UInt16>(algorithm);
|
||||
writePODBinary(algorithm_u16, out);
|
||||
writeBinaryLittleEndian(algorithm_u16, out);
|
||||
|
||||
writeBinaryLittleEndian(key_fingerprint, out);
|
||||
|
||||
writePODBinary(key_id, out);
|
||||
writePODBinary(key_hash, out);
|
||||
init_vector.write(out);
|
||||
|
||||
constexpr size_t reserved_size = kSize - header_signature_size - sizeof(version) - sizeof(algorithm_u16) - sizeof(key_id) - sizeof(key_hash) - InitVector::kSize;
|
||||
constexpr size_t reserved_size = kSize - kHeaderSignature.length() - sizeof(version) - sizeof(algorithm_u16) - sizeof(key_fingerprint) - InitVector::kSize;
|
||||
static_assert(reserved_size < kSize);
|
||||
char reserved_zero_bytes[reserved_size] = {};
|
||||
out.write(reserved_zero_bytes, reserved_size);
|
||||
char zero_bytes[reserved_size] = {};
|
||||
out.write(zero_bytes, reserved_size);
|
||||
}
|
||||
|
||||
UInt8 calculateKeyHash(const String & key)
|
||||
UInt128 calculateKeyFingerprint(const String & key)
|
||||
{
|
||||
return static_cast<UInt8>(sipHash64(key.data(), key.size())) & 0x0F;
|
||||
const UInt64 seed0 = 0x4368456E63727970ULL; // ChEncryp
|
||||
const UInt64 seed1 = 0x7465644469736B46ULL; // tedDiskF
|
||||
return sipHash128Keyed(seed0, seed1, key.data(), key.size());
|
||||
}
|
||||
|
||||
UInt128 calculateV1KeyFingerprint(const String & key, UInt64 key_id)
|
||||
{
|
||||
/// In the version 1 we stored {key_id, very_small_hash(key)} instead of a fingerprint.
|
||||
UInt8 small_key_hash = sipHash64(key.data(), key.size()) & 0x0F;
|
||||
return calculateV1KeyFingerprint(small_key_hash, key_id);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -23,13 +23,14 @@ enum class Algorithm
|
||||
AES_128_CTR, /// Size of key is 16 bytes.
|
||||
AES_192_CTR, /// Size of key is 24 bytes.
|
||||
AES_256_CTR, /// Size of key is 32 bytes.
|
||||
MAX
|
||||
};
|
||||
|
||||
String toString(Algorithm algorithm);
|
||||
void parseFromString(Algorithm & algorithm, const String & str);
|
||||
Algorithm parseAlgorithmFromString(const String & str);
|
||||
|
||||
/// Throws an exception if a specified key size doesn't correspond a specified encryption algorithm.
|
||||
void checkKeySize(Algorithm algorithm, size_t key_size);
|
||||
void checkKeySize(size_t key_size, Algorithm algorithm);
|
||||
|
||||
|
||||
/// Initialization vector. Its size is always 16 bytes.
|
||||
@ -103,15 +104,34 @@ private:
|
||||
|
||||
|
||||
/// File header which is stored at the beginning of encrypted files.
|
||||
///
|
||||
/// The format of that header is following:
|
||||
/// +--------+------+--------------------------------------------------------------------------+
|
||||
/// | offset | size | description |
|
||||
/// +--------+------+--------------------------------------------------------------------------+
|
||||
/// | 0 | 3 | 'E', 'N', 'C' (file's signature) |
|
||||
/// | 3 | 2 | version of this header (1..2) |
|
||||
/// | 5 | 2 | encryption algorithm (0..2, 0=AES_128_CTR, 1=AES_192_CTR, 2=AES_256_CTR) |
|
||||
/// | 7 | 16 | fingerprint of encryption key (SipHash) |
|
||||
/// | 23 | 16 | initialization vector (randomly generated) |
|
||||
/// | 39 | 25 | reserved for future use |
|
||||
/// +--------+------+--------------------------------------------------------------------------+
|
||||
///
|
||||
struct Header
|
||||
{
|
||||
/// Versions:
|
||||
/// 1 - Initial version
|
||||
/// 2 - The header of an encrypted file contains the fingerprint of a used encryption key instead of a pair {key_id, very_small_hash(key)}.
|
||||
/// The header is always stored in little endian.
|
||||
static constexpr const UInt16 kCurrentVersion = 2;
|
||||
|
||||
UInt16 version = kCurrentVersion;
|
||||
|
||||
/// Encryption algorithm.
|
||||
Algorithm algorithm = Algorithm::AES_128_CTR;
|
||||
|
||||
/// Identifier of the key to encrypt or decrypt this file.
|
||||
UInt64 key_id = 0;
|
||||
|
||||
/// Hash of the key to encrypt or decrypt this file.
|
||||
UInt8 key_hash = 0;
|
||||
/// Fingerprint of a key.
|
||||
UInt128 key_fingerprint = 0;
|
||||
|
||||
InitVector init_vector;
|
||||
|
||||
@ -122,9 +142,11 @@ struct Header
|
||||
void write(WriteBuffer & out) const;
|
||||
};
|
||||
|
||||
/// Calculates the hash of a passed key.
|
||||
/// 1 byte is enough because this hash is used only for the first check.
|
||||
UInt8 calculateKeyHash(const String & key);
|
||||
/// Calculates the fingerprint of a passed encryption key.
|
||||
UInt128 calculateKeyFingerprint(const String & key);
|
||||
|
||||
/// Calculates kind of the fingerprint of a passed encryption key & key ID as it was implemented in version 1.
|
||||
UInt128 calculateV1KeyFingerprint(const String & key, UInt64 key_id);
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -30,15 +30,6 @@ void WriteBufferFromFileDecorator::finalizeImpl()
|
||||
|
||||
WriteBufferFromFileDecorator::~WriteBufferFromFileDecorator()
|
||||
{
|
||||
try
|
||||
{
|
||||
finalize();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
|
||||
/// It is not a mistake that swap is called here
|
||||
/// Swap has been called at constructor, it should be called at destructor
|
||||
/// In oreder to provide valid buffer for impl's d-tor call
|
||||
|
@ -106,7 +106,14 @@ WriteBufferFromPocoSocket::WriteBufferFromPocoSocket(Poco::Net::Socket & socket_
|
||||
|
||||
WriteBufferFromPocoSocket::~WriteBufferFromPocoSocket()
|
||||
{
|
||||
finalize();
|
||||
try
|
||||
{
|
||||
finalize();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -109,8 +109,8 @@ void WriteBufferFromS3::nextImpl()
|
||||
|
||||
if (is_prefinalized)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Cannot write to prefinalized buffer for S3, the file could have been created with PutObjectRequest");
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Cannot write to prefinalized buffer for S3, the file could have been created with PutObjectRequest");
|
||||
|
||||
/// Make sense to call waitIfAny before adding new async task to check if there is an exception
|
||||
/// The faster the exception is propagated the lesser time is spent for cancellation
|
||||
@ -242,7 +242,13 @@ WriteBufferFromS3::~WriteBufferFromS3()
|
||||
// That destructor could be call with finalized=false in case of exceptions
|
||||
if (!finalized)
|
||||
{
|
||||
LOG_ERROR(log, "WriteBufferFromS3 is not finalized in destructor. It could be if an exception occurs. File is not written to S3. {}.", getLogDetails());
|
||||
LOG_INFO(log,
|
||||
"WriteBufferFromS3 is not finalized in destructor. "
|
||||
"It could be if an exception occurs. File is not written to S3. "
|
||||
"{}. "
|
||||
"Stack trace: {}",
|
||||
getLogDetails(),
|
||||
StackTrace().toString());
|
||||
}
|
||||
|
||||
task_tracker->safeWaitAll();
|
||||
|
@ -121,17 +121,17 @@ void WriteBufferFromS3::TaskTracker::add(Callback && func)
|
||||
/// preallocation for the second issue
|
||||
FinishedList pre_allocated_finished {future_placeholder};
|
||||
|
||||
Callback func_with_notification = [&, func=std::move(func), pre_allocated_finished=std::move(pre_allocated_finished)] () mutable
|
||||
Callback func_with_notification = [&, my_func = std::move(func), my_pre_allocated_finished = std::move(pre_allocated_finished)]() mutable
|
||||
{
|
||||
SCOPE_EXIT({
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
finished_futures.splice(finished_futures.end(), pre_allocated_finished);
|
||||
finished_futures.splice(finished_futures.end(), my_pre_allocated_finished);
|
||||
has_finished.notify_one();
|
||||
});
|
||||
|
||||
func();
|
||||
my_func();
|
||||
};
|
||||
|
||||
/// this move is nothrow
|
||||
@ -162,15 +162,8 @@ void WriteBufferFromS3::TaskTracker::waitTilInflightShrink()
|
||||
|
||||
for (auto & it : finished_futures)
|
||||
{
|
||||
SCOPE_EXIT({
|
||||
/// According to basic exception safety TaskTracker has to be destroyed after exception
|
||||
/// If it would be true than this SCOPE_EXIT is superfluous
|
||||
/// However WriteBufferWithFinalizeCallback, WriteBufferFromFileDecorator do call finalize in d-tor
|
||||
/// TaskTracker has to cope this until the issue with finalizing in d-tor is addressed in #50274
|
||||
futures.erase(it);
|
||||
});
|
||||
|
||||
it->get();
|
||||
futures.erase(it);
|
||||
}
|
||||
|
||||
finished_futures.clear();
|
||||
|
@ -49,6 +49,8 @@ private:
|
||||
/// waitTilInflightShrink waits til the number of in-flight tasks beyond the limit `max_tasks_inflight`.
|
||||
void waitTilInflightShrink() TSA_NO_THREAD_SAFETY_ANALYSIS;
|
||||
|
||||
void collectFinishedFutures(bool propagate_exceptions) TSA_REQUIRES(mutex);
|
||||
|
||||
const bool is_async;
|
||||
ThreadPoolCallbackRunner<void> scheduler;
|
||||
const size_t max_tasks_inflight;
|
||||
|
@ -226,8 +226,7 @@ TEST(FileEncryptionPositionUpdateTest, Decryption)
|
||||
String key = "1234567812345678";
|
||||
FileEncryption::Header header;
|
||||
header.algorithm = Algorithm::AES_128_CTR;
|
||||
header.key_id = 1;
|
||||
header.key_hash = calculateKeyHash(key);
|
||||
header.key_fingerprint = calculateKeyFingerprint(key);
|
||||
header.init_vector = InitVector::random();
|
||||
|
||||
auto lwb = std::make_unique<WriteBufferFromFile>(tmp_path);
|
||||
|
@ -609,9 +609,16 @@ protected:
|
||||
test_with_pool = GetParam();
|
||||
client = MockS3::Client::CreateClient(bucket);
|
||||
if (test_with_pool)
|
||||
{
|
||||
/// Do not block the main thread awaiting the others task.
|
||||
/// This test use the only one thread at all
|
||||
getSettings().s3_max_inflight_parts_for_one_file = 0;
|
||||
async_policy = std::make_unique<MockS3::SimpleAsyncTasks>();
|
||||
}
|
||||
else
|
||||
{
|
||||
async_policy = std::make_unique<MockS3::BaseSyncPolicy>();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -592,7 +592,6 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size)
|
||||
std::unordered_map<Key, EvictionCandidates> to_delete;
|
||||
size_t freeable_space = 0, freeable_count = 0;
|
||||
|
||||
size_t removed_size = 0;
|
||||
auto iterate_func = [&](LockedKey & locked_key, FileSegmentMetadataPtr segment_metadata)
|
||||
{
|
||||
chassert(segment_metadata->file_segment->assertCorrectness());
|
||||
@ -659,8 +658,8 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size)
|
||||
&& freeable_count == 0 && main_priority->getElementsCount(cache_lock) == main_priority->getElementsLimit());
|
||||
|
||||
LOG_TEST(
|
||||
log, "Overflow: {}, size: {}, ready to remove: {}, current cache size: {}/{}, elements: {}/{}, while reserving for {}:{}",
|
||||
is_overflow, size, removed_size,
|
||||
log, "Overflow: {}, size: {}, ready to remove: {} ({} in number), current cache size: {}/{}, elements: {}/{}, while reserving for {}:{}",
|
||||
is_overflow, size, freeable_space, freeable_count,
|
||||
main_priority->getSize(cache_lock), main_priority->getSizeLimit(),
|
||||
main_priority->getElementsCount(cache_lock), main_priority->getElementsLimit(),
|
||||
file_segment.key(), file_segment.offset());
|
||||
|
@ -468,6 +468,12 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
}
|
||||
}
|
||||
|
||||
/// Set skip_unavailable_shards to true only if it wasn't disabled explicitly
|
||||
if (settings.allow_experimental_parallel_reading_from_replicas > 0 && !settings.skip_unavailable_shards && !settings.isChanged("skip_unavailable_shards"))
|
||||
{
|
||||
context->setSetting("skip_unavailable_shards", true);
|
||||
}
|
||||
|
||||
/// Check support for JOIN for parallel replicas with custom key
|
||||
if (joined_tables.tablesCount() > 1 && !settings.parallel_replicas_custom_key.value.empty())
|
||||
{
|
||||
|
@ -10,14 +10,37 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ASTPtr generateOptimizedDateFilterAST(const String & comparator, const String & converter, const String & column, UInt64 year)
|
||||
ASTPtr generateOptimizedDateFilterAST(const String & comparator, const String & converter, const String & column, UInt64 compare_to)
|
||||
{
|
||||
const DateLUTImpl & date_lut = DateLUT::instance();
|
||||
|
||||
if (converter != "toYear") return {};
|
||||
String start_date;
|
||||
String end_date;
|
||||
|
||||
String start_date = date_lut.dateToString(date_lut.makeDayNum(year, 1, 1));
|
||||
String end_date = date_lut.dateToString(date_lut.makeDayNum(year, 12, 31));
|
||||
if (converter == "toYear")
|
||||
{
|
||||
UInt64 year = compare_to;
|
||||
start_date = date_lut.dateToString(date_lut.makeDayNum(year, 1, 1));
|
||||
end_date = date_lut.dateToString(date_lut.makeDayNum(year, 12, 31));
|
||||
}
|
||||
else if (converter == "toYYYYMM")
|
||||
{
|
||||
UInt64 year = compare_to / 100;
|
||||
UInt64 month = compare_to % 100;
|
||||
|
||||
if (month == 0 || month > 12) return {};
|
||||
|
||||
static constexpr UInt8 days_of_month[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
|
||||
|
||||
bool leap_year = (year & 3) == 0 && (year % 100 || (year % 400 == 0 && year));
|
||||
|
||||
start_date = date_lut.dateToString(date_lut.makeDayNum(year, month, 1));
|
||||
end_date = date_lut.dateToString(date_lut.makeDayNum(year, month, days_of_month[month - 1] + (leap_year && month == 2)));
|
||||
}
|
||||
else
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
if (comparator == "equals")
|
||||
{
|
||||
@ -82,7 +105,7 @@ bool rewritePredicateInPlace(ASTFunction & function, ASTPtr & ast)
|
||||
{
|
||||
if (const auto * func = function.arguments->children[i]->as<ASTFunction>(); func)
|
||||
{
|
||||
if (func->name == "toYear")
|
||||
if (func->name == "toYear" || func->name == "toYYYYMM")
|
||||
{
|
||||
func_id = i;
|
||||
}
|
||||
|
@ -2332,47 +2332,47 @@ const std::vector<std::pair<std::string_view, Operator>> ParserExpressionImpl::o
|
||||
{":", Operator("if", 3, 3, OperatorType::FinishIf)},
|
||||
{"OR", Operator("or", 3, 2, OperatorType::Mergeable)},
|
||||
{"AND", Operator("and", 4, 2, OperatorType::Mergeable)},
|
||||
{"BETWEEN", Operator("", 6, 0, OperatorType::StartBetween)},
|
||||
{"NOT BETWEEN", Operator("", 6, 0, OperatorType::StartNotBetween)},
|
||||
{"==", Operator("equals", 8, 2, OperatorType::Comparison)},
|
||||
{"!=", Operator("notEquals", 8, 2, OperatorType::Comparison)},
|
||||
{"<>", Operator("notEquals", 8, 2, OperatorType::Comparison)},
|
||||
{"<=", Operator("lessOrEquals", 8, 2, OperatorType::Comparison)},
|
||||
{">=", Operator("greaterOrEquals", 8, 2, OperatorType::Comparison)},
|
||||
{"<", Operator("less", 8, 2, OperatorType::Comparison)},
|
||||
{">", Operator("greater", 8, 2, OperatorType::Comparison)},
|
||||
{"=", Operator("equals", 8, 2, OperatorType::Comparison)},
|
||||
{"LIKE", Operator("like", 8, 2)},
|
||||
{"ILIKE", Operator("ilike", 8, 2)},
|
||||
{"NOT LIKE", Operator("notLike", 8, 2)},
|
||||
{"NOT ILIKE", Operator("notILike", 8, 2)},
|
||||
{"REGEXP", Operator("match", 8, 2)},
|
||||
{"IN", Operator("in", 8, 2)},
|
||||
{"NOT IN", Operator("notIn", 8, 2)},
|
||||
{"GLOBAL IN", Operator("globalIn", 8, 2)},
|
||||
{"GLOBAL NOT IN", Operator("globalNotIn", 8, 2)},
|
||||
{"||", Operator("concat", 9, 2, OperatorType::Mergeable)},
|
||||
{"+", Operator("plus", 10, 2)},
|
||||
{"-", Operator("minus", 10, 2)},
|
||||
{"*", Operator("multiply", 11, 2)},
|
||||
{"/", Operator("divide", 11, 2)},
|
||||
{"%", Operator("modulo", 11, 2)},
|
||||
{"MOD", Operator("modulo", 11, 2)},
|
||||
{"DIV", Operator("intDiv", 11, 2)},
|
||||
{".", Operator("tupleElement", 13, 2, OperatorType::TupleElement)},
|
||||
{"[", Operator("arrayElement", 13, 2, OperatorType::ArrayElement)},
|
||||
{"::", Operator("CAST", 13, 2, OperatorType::Cast)},
|
||||
{"IS NULL", Operator("isNull", 13, 1, OperatorType::IsNull)},
|
||||
{"IS NOT NULL", Operator("isNotNull", 13, 1, OperatorType::IsNull)},
|
||||
{"IS NULL", Operator("isNull", 6, 1, OperatorType::IsNull)},
|
||||
{"IS NOT NULL", Operator("isNotNull", 6, 1, OperatorType::IsNull)},
|
||||
{"BETWEEN", Operator("", 7, 0, OperatorType::StartBetween)},
|
||||
{"NOT BETWEEN", Operator("", 7, 0, OperatorType::StartNotBetween)},
|
||||
{"==", Operator("equals", 9, 2, OperatorType::Comparison)},
|
||||
{"!=", Operator("notEquals", 9, 2, OperatorType::Comparison)},
|
||||
{"<>", Operator("notEquals", 9, 2, OperatorType::Comparison)},
|
||||
{"<=", Operator("lessOrEquals", 9, 2, OperatorType::Comparison)},
|
||||
{">=", Operator("greaterOrEquals", 9, 2, OperatorType::Comparison)},
|
||||
{"<", Operator("less", 9, 2, OperatorType::Comparison)},
|
||||
{">", Operator("greater", 9, 2, OperatorType::Comparison)},
|
||||
{"=", Operator("equals", 9, 2, OperatorType::Comparison)},
|
||||
{"LIKE", Operator("like", 9, 2)},
|
||||
{"ILIKE", Operator("ilike", 9, 2)},
|
||||
{"NOT LIKE", Operator("notLike", 9, 2)},
|
||||
{"NOT ILIKE", Operator("notILike", 9, 2)},
|
||||
{"REGEXP", Operator("match", 9, 2)},
|
||||
{"IN", Operator("in", 9, 2)},
|
||||
{"NOT IN", Operator("notIn", 9, 2)},
|
||||
{"GLOBAL IN", Operator("globalIn", 9, 2)},
|
||||
{"GLOBAL NOT IN", Operator("globalNotIn", 9, 2)},
|
||||
{"||", Operator("concat", 10, 2, OperatorType::Mergeable)},
|
||||
{"+", Operator("plus", 11, 2)},
|
||||
{"-", Operator("minus", 11, 2)},
|
||||
{"*", Operator("multiply", 12, 2)},
|
||||
{"/", Operator("divide", 12, 2)},
|
||||
{"%", Operator("modulo", 12, 2)},
|
||||
{"MOD", Operator("modulo", 12, 2)},
|
||||
{"DIV", Operator("intDiv", 12, 2)},
|
||||
{".", Operator("tupleElement", 14, 2, OperatorType::TupleElement)},
|
||||
{"[", Operator("arrayElement", 14, 2, OperatorType::ArrayElement)},
|
||||
{"::", Operator("CAST", 14, 2, OperatorType::Cast)},
|
||||
};
|
||||
|
||||
const std::vector<std::pair<std::string_view, Operator>> ParserExpressionImpl::unary_operators_table
|
||||
{
|
||||
{"NOT", Operator("not", 5, 1)},
|
||||
{"-", Operator("negate", 12, 1)}
|
||||
{"-", Operator("negate", 13, 1)}
|
||||
};
|
||||
|
||||
const Operator ParserExpressionImpl::finish_between_operator("", 7, 0, OperatorType::FinishBetween);
|
||||
const Operator ParserExpressionImpl::finish_between_operator("", 8, 0, OperatorType::FinishBetween);
|
||||
|
||||
const std::array<std::string_view, 1> ParserExpressionImpl::overlapping_operators_to_skip
|
||||
{
|
||||
@ -2392,6 +2392,7 @@ bool ParserExpressionImpl::parse(std::unique_ptr<Layer> start, IParser::Pos & po
|
||||
{
|
||||
if (!layers.back()->parse(pos, expected, next))
|
||||
break;
|
||||
|
||||
if (layers.back()->isFinished())
|
||||
{
|
||||
if (layers.size() == 1)
|
||||
@ -2735,11 +2736,19 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po
|
||||
}
|
||||
}
|
||||
|
||||
layers.back()->pushOperator(op);
|
||||
|
||||
/// isNull & isNotNull are postfix unary operators
|
||||
if (op.type == OperatorType::IsNull)
|
||||
{
|
||||
ASTPtr function = makeASTFunction(op);
|
||||
|
||||
if (!layers.back()->popLastNOperands(function->children[0]->children, 1))
|
||||
return Action::NONE;
|
||||
|
||||
layers.back()->pushOperand(std::move(function));
|
||||
return Action::OPERATOR;
|
||||
}
|
||||
|
||||
layers.back()->pushOperator(op);
|
||||
|
||||
if (op.type == OperatorType::Cast)
|
||||
{
|
||||
|
@ -272,7 +272,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
|
||||
{
|
||||
/// If totals step has HAVING expression, skip it for now.
|
||||
/// TODO:
|
||||
/// We can merge HAVING expression with current filer.
|
||||
/// We can merge HAVING expression with current filter.
|
||||
/// Also, we can push down part of HAVING which depend only on aggregation keys.
|
||||
if (totals_having->getActions())
|
||||
return 0;
|
||||
@ -323,9 +323,9 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
|
||||
{
|
||||
const auto & table_join = join ? join->getJoin()->getTableJoin() : filled_join->getJoin()->getTableJoin();
|
||||
|
||||
/// Only inner and left(/right) join are supported. Other types may generate default values for left table keys.
|
||||
/// Only inner, cross and left(/right) join are supported. Other types may generate default values for left table keys.
|
||||
/// So, if we push down a condition like `key != 0`, not all rows may be filtered.
|
||||
if (table_join.kind() != JoinKind::Inner && table_join.kind() != kind)
|
||||
if (table_join.kind() != JoinKind::Inner && table_join.kind() != JoinKind::Cross && table_join.kind() != kind)
|
||||
return 0;
|
||||
|
||||
bool is_left = kind == JoinKind::Left;
|
||||
|
@ -47,8 +47,7 @@ RemoteQueryExecutor::RemoteQueryExecutor(
|
||||
QueryProcessingStage::Enum stage_, std::optional<Extension> extension_)
|
||||
: header(header_), query(query_), context(context_), scalars(scalars_)
|
||||
, external_tables(external_tables_), stage(stage_)
|
||||
, task_iterator(extension_ ? extension_->task_iterator : nullptr)
|
||||
, parallel_reading_coordinator(extension_ ? extension_->parallel_reading_coordinator : nullptr)
|
||||
, extension(extension_)
|
||||
{}
|
||||
|
||||
RemoteQueryExecutor::RemoteQueryExecutor(
|
||||
@ -90,8 +89,7 @@ RemoteQueryExecutor::RemoteQueryExecutor(
|
||||
QueryProcessingStage::Enum stage_, std::optional<Extension> extension_)
|
||||
: header(header_), query(query_), context(context_)
|
||||
, scalars(scalars_), external_tables(external_tables_), stage(stage_)
|
||||
, task_iterator(extension_ ? extension_->task_iterator : nullptr)
|
||||
, parallel_reading_coordinator(extension_ ? extension_->parallel_reading_coordinator : nullptr)
|
||||
, extension(extension_)
|
||||
{
|
||||
create_connections = [this, connections_, throttler, extension_](AsyncCallback) mutable {
|
||||
auto res = std::make_unique<MultiplexedConnections>(std::move(connections_), context->getSettingsRef(), throttler);
|
||||
@ -108,8 +106,7 @@ RemoteQueryExecutor::RemoteQueryExecutor(
|
||||
QueryProcessingStage::Enum stage_, std::optional<Extension> extension_)
|
||||
: header(header_), query(query_), context(context_)
|
||||
, scalars(scalars_), external_tables(external_tables_), stage(stage_)
|
||||
, task_iterator(extension_ ? extension_->task_iterator : nullptr)
|
||||
, parallel_reading_coordinator(extension_ ? extension_->parallel_reading_coordinator : nullptr)
|
||||
, extension(extension_)
|
||||
{
|
||||
create_connections = [this, pool, throttler, extension_](AsyncCallback async_callback)->std::unique_ptr<IConnections>
|
||||
{
|
||||
@ -247,6 +244,13 @@ void RemoteQueryExecutor::sendQueryUnlocked(ClientInfo::QueryKind query_kind, As
|
||||
finished = true;
|
||||
sent_query = true;
|
||||
|
||||
/// We need to tell the coordinator not to wait for this replica.
|
||||
if (extension && extension->parallel_reading_coordinator)
|
||||
{
|
||||
chassert(extension->replica_info);
|
||||
extension->parallel_reading_coordinator->markReplicaAsUnavailable(extension->replica_info->number_of_current_replica);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@ -360,7 +364,18 @@ RemoteQueryExecutor::ReadResult RemoteQueryExecutor::readAsync()
|
||||
read_context->resume();
|
||||
|
||||
if (needToSkipUnavailableShard())
|
||||
{
|
||||
/// We need to tell the coordinator not to wait for this replica.
|
||||
/// But at this point it may lead to an incomplete result set, because
|
||||
/// this replica committed to read some part of there data and then died.
|
||||
if (extension && extension->parallel_reading_coordinator)
|
||||
{
|
||||
chassert(extension->parallel_reading_coordinator);
|
||||
extension->parallel_reading_coordinator->markReplicaAsUnavailable(extension->replica_info->number_of_current_replica);
|
||||
}
|
||||
|
||||
return ReadResult(Block());
|
||||
}
|
||||
|
||||
/// Check if packet is not ready yet.
|
||||
if (read_context->isInProgress())
|
||||
@ -527,30 +542,30 @@ bool RemoteQueryExecutor::setPartUUIDs(const std::vector<UUID> & uuids)
|
||||
|
||||
void RemoteQueryExecutor::processReadTaskRequest()
|
||||
{
|
||||
if (!task_iterator)
|
||||
if (!extension || !extension->task_iterator)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Distributed task iterator is not initialized");
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::ReadTaskRequestsReceived);
|
||||
auto response = (*task_iterator)();
|
||||
auto response = (*extension->task_iterator)();
|
||||
connections->sendReadTaskResponse(response);
|
||||
}
|
||||
|
||||
void RemoteQueryExecutor::processMergeTreeReadTaskRequest(ParallelReadRequest request)
|
||||
{
|
||||
if (!parallel_reading_coordinator)
|
||||
if (!extension || !extension->parallel_reading_coordinator)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Coordinator for parallel reading from replicas is not initialized");
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::MergeTreeReadTaskRequestsReceived);
|
||||
auto response = parallel_reading_coordinator->handleRequest(std::move(request));
|
||||
auto response = extension->parallel_reading_coordinator->handleRequest(std::move(request));
|
||||
connections->sendMergeTreeReadTaskResponse(response);
|
||||
}
|
||||
|
||||
void RemoteQueryExecutor::processMergeTreeInitialReadAnnounecement(InitialAllRangesAnnouncement announcement)
|
||||
{
|
||||
if (!parallel_reading_coordinator)
|
||||
if (!extension || !extension->parallel_reading_coordinator)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Coordinator for parallel reading from replicas is not initialized");
|
||||
|
||||
parallel_reading_coordinator->handleInitialAllRangesAnnouncement(announcement);
|
||||
extension->parallel_reading_coordinator->handleInitialAllRangesAnnouncement(announcement);
|
||||
}
|
||||
|
||||
void RemoteQueryExecutor::finish()
|
||||
|
@ -212,11 +212,11 @@ private:
|
||||
/// Temporary tables needed to be sent to remote servers
|
||||
Tables external_tables;
|
||||
QueryProcessingStage::Enum stage;
|
||||
|
||||
std::optional<Extension> extension;
|
||||
/// Initiator identifier for distributed task processing
|
||||
std::shared_ptr<TaskIterator> task_iterator;
|
||||
|
||||
std::shared_ptr<ParallelReplicasReadingCoordinator> parallel_reading_coordinator;
|
||||
|
||||
/// This is needed only for parallel reading from replicas, because
|
||||
/// we create a RemoteQueryExecutor per replica and have to store additional info
|
||||
/// about the number of the current replica or the count of replicas at all.
|
||||
|
@ -93,10 +93,13 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe
|
||||
|
||||
auto write_response = [&](const std::string & message)
|
||||
{
|
||||
if (response.sent())
|
||||
return;
|
||||
|
||||
auto & out = *used_output.out;
|
||||
if (response.sent())
|
||||
{
|
||||
out.finalize();
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
writeString(message, out);
|
||||
@ -127,7 +130,10 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe
|
||||
catch (Exception & e)
|
||||
{
|
||||
if (e.code() == ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES)
|
||||
{
|
||||
used_output.out->finalize();
|
||||
return;
|
||||
}
|
||||
|
||||
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
|
||||
|
||||
|
@ -490,6 +490,7 @@ private:
|
||||
{
|
||||
/// Stop ParallelFormattingOutputFormat correctly.
|
||||
writer.reset();
|
||||
write_buf->finalize();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
@ -365,7 +365,7 @@ Fetcher::Fetcher(StorageReplicatedMergeTree & data_)
|
||||
, log(&Poco::Logger::get(data.getStorageID().getNameForLogs() + " (Fetcher)"))
|
||||
{}
|
||||
|
||||
MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart(
|
||||
std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> Fetcher::fetchSelectedPart(
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
ContextPtr context,
|
||||
const String & part_name,
|
||||
@ -601,7 +601,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart(
|
||||
return std::make_unique<WriteBufferFromFile>(full_path, std::min<UInt64>(DBMS_DEFAULT_BUFFER_SIZE, file_size));
|
||||
};
|
||||
|
||||
return downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix, disk, true, *in, output_buffer_getter, projections, throttler, sync);
|
||||
return std::make_pair(downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix, disk, true, *in, output_buffer_getter, projections, throttler, sync), std::move(temporary_directory_lock));
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
@ -667,11 +667,11 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart(
|
||||
data.getRelativeDataPath(),
|
||||
part_name);
|
||||
|
||||
return downloadPartToMemory(
|
||||
return std::make_pair(downloadPartToMemory(
|
||||
data_part_storage, part_name,
|
||||
MergeTreePartInfo::fromPartName(part_name, data.format_version),
|
||||
part_uuid, metadata_snapshot, context, *in,
|
||||
projections, false, throttler);
|
||||
projections, false, throttler), std::move(temporary_directory_lock));
|
||||
}
|
||||
|
||||
auto output_buffer_getter = [](IDataPartStorage & part_storage, const String & file_name, size_t file_size)
|
||||
@ -679,10 +679,10 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart(
|
||||
return part_storage.writeFile(file_name, std::min<UInt64>(file_size, DBMS_DEFAULT_BUFFER_SIZE), {});
|
||||
};
|
||||
|
||||
return downloadPartToDisk(
|
||||
return std::make_pair(downloadPartToDisk(
|
||||
part_name, replica_path, to_detached, tmp_prefix,
|
||||
disk, false, *in, output_buffer_getter,
|
||||
projections, throttler, sync);
|
||||
projections, throttler, sync),std::move(temporary_directory_lock));
|
||||
}
|
||||
|
||||
MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory(
|
||||
|
@ -66,7 +66,7 @@ public:
|
||||
explicit Fetcher(StorageReplicatedMergeTree & data_);
|
||||
|
||||
/// Downloads a part to tmp_directory. If to_detached - downloads to the `detached` directory.
|
||||
MergeTreeData::MutableDataPartPtr fetchSelectedPart(
|
||||
std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> fetchSelectedPart(
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
ContextPtr context,
|
||||
const String & part_name,
|
||||
|
@ -166,6 +166,7 @@ UInt32 GinIndexStore::getNextSegmentIDRange(const String & file_name, size_t n)
|
||||
/// Write segment ID 1
|
||||
writeVarUInt(1, *ostr);
|
||||
ostr->sync();
|
||||
ostr->finalize();
|
||||
}
|
||||
|
||||
/// Read id in file
|
||||
@ -188,6 +189,7 @@ UInt32 GinIndexStore::getNextSegmentIDRange(const String & file_name, size_t n)
|
||||
|
||||
writeVarUInt(result + n, *ostr);
|
||||
ostr->sync();
|
||||
ostr->finalize();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -317,8 +319,13 @@ void GinIndexStore::writeSegment()
|
||||
current_segment.segment_id = getNextSegmentID();
|
||||
|
||||
metadata_file_stream->sync();
|
||||
metadata_file_stream->finalize();
|
||||
|
||||
dict_file_stream->sync();
|
||||
dict_file_stream->finalize();
|
||||
|
||||
postings_file_stream->sync();
|
||||
postings_file_stream->finalize();
|
||||
}
|
||||
|
||||
GinIndexStoreDeserializer::GinIndexStoreDeserializer(const GinIndexStorePtr & store_)
|
||||
|
@ -119,22 +119,12 @@ void MergedBlockOutputStream::Finalizer::Impl::finish()
|
||||
part->getDataPartStorage().removeFile(file_name);
|
||||
}
|
||||
|
||||
MergedBlockOutputStream::Finalizer::~Finalizer()
|
||||
{
|
||||
try
|
||||
{
|
||||
finish();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException("MergedBlockOutputStream");
|
||||
}
|
||||
}
|
||||
|
||||
MergedBlockOutputStream::Finalizer::Finalizer(Finalizer &&) noexcept = default;
|
||||
MergedBlockOutputStream::Finalizer & MergedBlockOutputStream::Finalizer::operator=(Finalizer &&) noexcept = default;
|
||||
MergedBlockOutputStream::Finalizer::Finalizer(std::unique_ptr<Impl> impl_) : impl(std::move(impl_)) {}
|
||||
|
||||
MergedBlockOutputStream::Finalizer::~Finalizer() = default;
|
||||
|
||||
void MergedBlockOutputStream::finalizePart(
|
||||
const MergeTreeMutableDataPartPtr & new_part,
|
||||
bool sync,
|
||||
|
@ -44,9 +44,10 @@ public:
|
||||
std::unique_ptr<Impl> impl;
|
||||
|
||||
explicit Finalizer(std::unique_ptr<Impl> impl_);
|
||||
~Finalizer();
|
||||
Finalizer(Finalizer &&) noexcept;
|
||||
Finalizer & operator=(Finalizer &&) noexcept;
|
||||
~Finalizer();
|
||||
|
||||
|
||||
void finish();
|
||||
};
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "Storages/MergeTree/RequestResponse.h"
|
||||
#include <Storages/MergeTree/MarkRange.h>
|
||||
#include <Storages/MergeTree/IntersectionsIndexes.h>
|
||||
#include <fmt/core.h>
|
||||
#include <fmt/format.h>
|
||||
|
||||
namespace DB
|
||||
@ -61,18 +62,22 @@ public:
|
||||
{
|
||||
size_t number_of_requests{0};
|
||||
size_t sum_marks{0};
|
||||
bool is_unavailable{false};
|
||||
};
|
||||
using Stats = std::vector<Stat>;
|
||||
static String toString(Stats stats)
|
||||
{
|
||||
String result = "Statistics: ";
|
||||
std::vector<String> stats_by_replica;
|
||||
for (size_t i = 0; i < stats.size(); ++i)
|
||||
result += fmt::format("-- replica {}, requests: {} marks: {} ", i, stats[i].number_of_requests, stats[i].sum_marks);
|
||||
stats_by_replica.push_back(fmt::format("replica {}{} - {{requests: {} marks: {}}}", i, stats[i].is_unavailable ? " is unavailable" : "", stats[i].number_of_requests, stats[i].sum_marks));
|
||||
result += fmt::format("{}", fmt::join(stats_by_replica, "; "));
|
||||
return result;
|
||||
}
|
||||
|
||||
Stats stats;
|
||||
size_t replicas_count;
|
||||
size_t replicas_count{0};
|
||||
size_t unavailable_replicas_count{0};
|
||||
|
||||
explicit ImplInterface(size_t replicas_count_)
|
||||
: stats{replicas_count_}
|
||||
@ -82,6 +87,7 @@ public:
|
||||
virtual ~ImplInterface() = default;
|
||||
virtual ParallelReadResponse handleRequest(ParallelReadRequest request) = 0;
|
||||
virtual void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) = 0;
|
||||
virtual void markReplicaAsUnavailable(size_t replica_number) = 0;
|
||||
};
|
||||
|
||||
using Parts = std::set<Part>;
|
||||
@ -128,6 +134,7 @@ public:
|
||||
|
||||
ParallelReadResponse handleRequest(ParallelReadRequest request) override;
|
||||
void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) override;
|
||||
void markReplicaAsUnavailable(size_t replica_number) override;
|
||||
|
||||
void updateReadingState(const InitialAllRangesAnnouncement & announcement);
|
||||
void finalizeReadingState();
|
||||
@ -199,6 +206,17 @@ void DefaultCoordinator::updateReadingState(const InitialAllRangesAnnouncement &
|
||||
}
|
||||
}
|
||||
|
||||
void DefaultCoordinator::markReplicaAsUnavailable(size_t replica_number)
|
||||
{
|
||||
LOG_DEBUG(log, "Replica number {} is unavailable", replica_number);
|
||||
|
||||
++unavailable_replicas_count;
|
||||
stats[replica_number].is_unavailable = true;
|
||||
|
||||
if (sent_initial_requests == replicas_count - unavailable_replicas_count)
|
||||
finalizeReadingState();
|
||||
}
|
||||
|
||||
void DefaultCoordinator::finalizeReadingState()
|
||||
{
|
||||
/// Clear all the delayed queue
|
||||
@ -345,12 +363,23 @@ public:
|
||||
|
||||
ParallelReadResponse handleRequest([[ maybe_unused ]] ParallelReadRequest request) override;
|
||||
void handleInitialAllRangesAnnouncement([[ maybe_unused ]] InitialAllRangesAnnouncement announcement) override;
|
||||
void markReplicaAsUnavailable(size_t replica_number) override;
|
||||
|
||||
Parts all_parts_to_read;
|
||||
|
||||
Poco::Logger * log = &Poco::Logger::get(fmt::format("{}{}", magic_enum::enum_name(mode), "Coordinator"));
|
||||
};
|
||||
|
||||
template <CoordinationMode mode>
|
||||
void InOrderCoordinator<mode>::markReplicaAsUnavailable(size_t replica_number)
|
||||
{
|
||||
LOG_DEBUG(log, "Replica number {} is unavailable", replica_number);
|
||||
|
||||
stats[replica_number].is_unavailable = true;
|
||||
++unavailable_replicas_count;
|
||||
|
||||
/// There is nothing to do else.
|
||||
}
|
||||
|
||||
template <CoordinationMode mode>
|
||||
void InOrderCoordinator<mode>::handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement)
|
||||
@ -388,7 +417,6 @@ void InOrderCoordinator<mode>::handleInitialAllRangesAnnouncement(InitialAllRang
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <CoordinationMode mode>
|
||||
ParallelReadResponse InOrderCoordinator<mode>::handleRequest(ParallelReadRequest request)
|
||||
{
|
||||
@ -486,7 +514,7 @@ void ParallelReplicasReadingCoordinator::handleInitialAllRangesAnnouncement(Init
|
||||
|
||||
if (!pimpl)
|
||||
{
|
||||
setMode(announcement.mode);
|
||||
mode = announcement.mode;
|
||||
initialize();
|
||||
}
|
||||
|
||||
@ -500,16 +528,23 @@ ParallelReadResponse ParallelReplicasReadingCoordinator::handleRequest(ParallelR
|
||||
|
||||
if (!pimpl)
|
||||
{
|
||||
setMode(request.mode);
|
||||
mode = request.mode;
|
||||
initialize();
|
||||
}
|
||||
|
||||
return pimpl->handleRequest(std::move(request));
|
||||
}
|
||||
|
||||
void ParallelReplicasReadingCoordinator::setMode(CoordinationMode mode_)
|
||||
void ParallelReplicasReadingCoordinator::markReplicaAsUnavailable(size_t replica_number)
|
||||
{
|
||||
mode = mode_;
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
if (!pimpl)
|
||||
{
|
||||
initialize();
|
||||
}
|
||||
|
||||
return pimpl->markReplicaAsUnavailable(replica_number);
|
||||
}
|
||||
|
||||
void ParallelReplicasReadingCoordinator::initialize()
|
||||
|
@ -18,10 +18,15 @@ public:
|
||||
explicit ParallelReplicasReadingCoordinator(size_t replicas_count_);
|
||||
~ParallelReplicasReadingCoordinator();
|
||||
|
||||
void setMode(CoordinationMode mode);
|
||||
void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement);
|
||||
ParallelReadResponse handleRequest(ParallelReadRequest request);
|
||||
|
||||
/// Called when some replica is unavailable and we skipped it.
|
||||
/// This is needed to "finalize" reading state e.g. spread all the marks using
|
||||
/// consistent hashing, because otherwise coordinator will continue working in
|
||||
/// "pending" state waiting for the unavailable replica to send the announcement.
|
||||
void markReplicaAsUnavailable(size_t replica_number);
|
||||
|
||||
private:
|
||||
void initialize();
|
||||
|
||||
|
@ -1137,13 +1137,6 @@ SinkToStoragePtr StorageDistributed::write(const ASTPtr &, const StorageMetadata
|
||||
auto cluster = getCluster();
|
||||
const auto & settings = local_context->getSettingsRef();
|
||||
|
||||
/// Ban an attempt to make async insert into the table belonging to DatabaseMemory
|
||||
if (!storage_policy && !owned_cluster && !settings.insert_distributed_sync && !settings.insert_shard_id)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage {} must have own data directory to enable asynchronous inserts",
|
||||
getName());
|
||||
}
|
||||
|
||||
auto shard_num = cluster->getLocalShardCount() + cluster->getRemoteShardCount();
|
||||
|
||||
/// If sharding key is not specified, then you can only write to a shard containing only one shard
|
||||
|
@ -955,6 +955,7 @@ private:
|
||||
{
|
||||
/// Stop ParallelFormattingOutputFormat correctly.
|
||||
writer.reset();
|
||||
write_buf->finalize();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
@ -341,7 +341,10 @@ private:
|
||||
void finalize()
|
||||
{
|
||||
compressed.next();
|
||||
compressed.finalize();
|
||||
|
||||
plain->next();
|
||||
plain->finalize();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -2344,16 +2344,19 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
|
||||
|
||||
auto credentials = getContext()->getInterserverCredentials();
|
||||
String interserver_scheme = getContext()->getInterserverScheme();
|
||||
scope_guard part_temp_directory_lock;
|
||||
|
||||
if (interserver_scheme != address.scheme)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Interserver schemas are different '{}' != '{}', can't fetch part from {}",
|
||||
interserver_scheme, address.scheme, address.host);
|
||||
|
||||
part_desc->res_part = fetcher.fetchSelectedPart(
|
||||
auto [fetched_part, lock] = fetcher.fetchSelectedPart(
|
||||
metadata_snapshot, getContext(), part_desc->found_new_part_name, zookeeper_name, source_replica_path,
|
||||
address.host, address.replication_port, timeouts, credentials->getUser(), credentials->getPassword(),
|
||||
interserver_scheme, replicated_fetches_throttler, false, TMP_PREFIX + "fetch_");
|
||||
part_desc->res_part = fetched_part;
|
||||
part_temp_directory_lock = std::move(lock);
|
||||
|
||||
/// TODO: check columns_version of fetched part
|
||||
|
||||
@ -2460,6 +2463,7 @@ void StorageReplicatedMergeTree::executeClonePartFromShard(const LogEntry & entr
|
||||
auto timeouts = getHTTPTimeouts(getContext());
|
||||
auto credentials = getContext()->getInterserverCredentials();
|
||||
String interserver_scheme = getContext()->getInterserverScheme();
|
||||
scope_guard part_temp_directory_lock;
|
||||
|
||||
auto get_part = [&, address, timeouts, credentials, interserver_scheme]()
|
||||
{
|
||||
@ -2467,11 +2471,13 @@ void StorageReplicatedMergeTree::executeClonePartFromShard(const LogEntry & entr
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Interserver schemes are different: '{}' != '{}', can't fetch part from {}",
|
||||
interserver_scheme, address.scheme, address.host);
|
||||
|
||||
return fetcher.fetchSelectedPart(
|
||||
auto [fetched_part, lock] = fetcher.fetchSelectedPart(
|
||||
metadata_snapshot, getContext(), entry.new_part_name, zookeeper_name, source_replica_path,
|
||||
address.host, address.replication_port,
|
||||
timeouts, credentials->getUser(), credentials->getPassword(), interserver_scheme,
|
||||
replicated_fetches_throttler, true);
|
||||
part_temp_directory_lock = std::move(lock);
|
||||
return fetched_part;
|
||||
};
|
||||
|
||||
part = get_part();
|
||||
@ -4170,14 +4176,14 @@ bool StorageReplicatedMergeTree::fetchPart(
|
||||
std::optional<CurrentlySubmergingEmergingTagger> tagger_ptr;
|
||||
std::function<MutableDataPartPtr()> get_part;
|
||||
MergeTreeData::HardlinkedFiles hardlinked_files;
|
||||
scope_guard part_to_clone_lock;
|
||||
scope_guard part_directory_lock;
|
||||
|
||||
if (part_to_clone)
|
||||
{
|
||||
get_part = [&, part_to_clone]()
|
||||
{
|
||||
auto [cloned_part, lock] = cloneAndLoadDataPartOnSameDisk(part_to_clone, "tmp_clone_", part_info, metadata_snapshot, NO_TRANSACTION_PTR, &hardlinked_files, false, {});
|
||||
part_to_clone_lock = std::move(lock);
|
||||
part_directory_lock = std::move(lock);
|
||||
return cloned_part;
|
||||
};
|
||||
}
|
||||
@ -4195,7 +4201,7 @@ bool StorageReplicatedMergeTree::fetchPart(
|
||||
throw Exception(ErrorCodes::INTERSERVER_SCHEME_DOESNT_MATCH, "Interserver schemes are different: "
|
||||
"'{}' != '{}', can't fetch part from {}", interserver_scheme, address.scheme, address.host);
|
||||
|
||||
return fetcher.fetchSelectedPart(
|
||||
auto [fetched_part, lock] = fetcher.fetchSelectedPart(
|
||||
metadata_snapshot,
|
||||
getContext(),
|
||||
part_name,
|
||||
@ -4212,6 +4218,8 @@ bool StorageReplicatedMergeTree::fetchPart(
|
||||
"",
|
||||
&tagger_ptr,
|
||||
try_fetch_shared);
|
||||
part_directory_lock = std::move(lock);
|
||||
return fetched_part;
|
||||
};
|
||||
}
|
||||
|
||||
@ -4355,6 +4363,7 @@ MutableDataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart(
|
||||
auto timeouts = getHTTPTimeouts(getContext());
|
||||
auto credentials = getContext()->getInterserverCredentials();
|
||||
String interserver_scheme = getContext()->getInterserverScheme();
|
||||
scope_guard part_temp_directory_lock;
|
||||
|
||||
get_part = [&, address, timeouts, interserver_scheme, credentials]()
|
||||
{
|
||||
@ -4362,12 +4371,14 @@ MutableDataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart(
|
||||
throw Exception(ErrorCodes::INTERSERVER_SCHEME_DOESNT_MATCH, "Interserver schemes are different: "
|
||||
"'{}' != '{}', can't fetch part from {}", interserver_scheme, address.scheme, address.host);
|
||||
|
||||
return fetcher.fetchSelectedPart(
|
||||
auto [fetched_part, lock] = fetcher.fetchSelectedPart(
|
||||
metadata_snapshot, getContext(), part_name, zookeeper_name, source_replica_path,
|
||||
address.host, address.replication_port,
|
||||
timeouts, credentials->getUser(), credentials->getPassword(),
|
||||
interserver_scheme, replicated_fetches_throttler, false, "", nullptr, true,
|
||||
replaced_disk);
|
||||
part_temp_directory_lock = std::move(lock);
|
||||
return fetched_part;
|
||||
};
|
||||
|
||||
try
|
||||
|
@ -831,6 +831,7 @@ private:
|
||||
{
|
||||
/// Stop ParallelFormattingOutputFormat correctly.
|
||||
writer.reset();
|
||||
write_buf->finalize();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
@ -31,7 +31,6 @@ namespace DB
|
||||
{
|
||||
|
||||
class PullingPipelineExecutor;
|
||||
class StorageS3SequentialSource;
|
||||
class NamedCollection;
|
||||
|
||||
class StorageS3Source : public ISource, WithContext
|
||||
@ -248,11 +247,6 @@ public:
|
||||
|
||||
String getPath() const { return url.key; }
|
||||
|
||||
void appendToPath(const String & suffix)
|
||||
{
|
||||
url = S3::URI{std::filesystem::path(url.uri.toString()) / suffix};
|
||||
}
|
||||
|
||||
bool update(ContextPtr context);
|
||||
|
||||
void connect(ContextPtr context);
|
||||
|
@ -480,6 +480,7 @@ void StorageURLSink::finalize()
|
||||
{
|
||||
/// Stop ParallelFormattingOutputFormat correctly.
|
||||
writer.reset();
|
||||
write_buf->finalize();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
@ -21,8 +21,6 @@
|
||||
01072_optimize_skip_unused_shards_const_expr_eval
|
||||
01083_expressions_in_engine_arguments
|
||||
01086_odbc_roundtrip
|
||||
01142_join_lc_and_nullable_in_key
|
||||
01142_merge_join_lc_and_nullable_in_key
|
||||
01152_cross_replication
|
||||
01155_rename_move_materialized_view
|
||||
01173_transaction_control_queries
|
||||
@ -39,8 +37,6 @@
|
||||
01319_optimize_skip_unused_shards_nesting
|
||||
01353_low_cardinality_join_types
|
||||
01455_shard_leaf_max_rows_bytes_to_read
|
||||
01476_right_full_join_switch
|
||||
01477_lc_in_merge_join_left_key
|
||||
01487_distributed_in_not_default_db
|
||||
01495_subqueries_in_with_statement
|
||||
01504_rocksdb
|
||||
|
@ -23,6 +23,7 @@ from get_robot_token import get_best_robot_token
|
||||
from pr_info import NeedsDataType, PRInfo
|
||||
from commit_status_helper import (
|
||||
RerunHelper,
|
||||
format_description,
|
||||
get_commit,
|
||||
post_commit_status,
|
||||
update_mergeable_check,
|
||||
@ -269,14 +270,20 @@ def main():
|
||||
if build_result.status == "success":
|
||||
ok_groups += 1
|
||||
|
||||
if ok_groups == 0 or some_builds_are_missing:
|
||||
summary_status = "error"
|
||||
# Check if there are no builds at all, do not override bad status
|
||||
if summary_status == "success":
|
||||
if some_builds_are_missing:
|
||||
summary_status = "pending"
|
||||
elif ok_groups == 0:
|
||||
summary_status = "error"
|
||||
|
||||
addition = ""
|
||||
if some_builds_are_missing:
|
||||
addition = f"({len(build_reports)} of {required_builds} builds are OK)"
|
||||
addition = f" ({len(build_reports)} of {required_builds} builds are OK)"
|
||||
|
||||
description = f"{ok_groups}/{total_groups} artifact groups are OK {addition}"
|
||||
description = format_description(
|
||||
f"{ok_groups}/{total_groups} artifact groups are OK{addition}"
|
||||
)
|
||||
|
||||
post_commit_status(
|
||||
commit, summary_status, url, description, build_check_name, pr_info
|
||||
|
@ -70,9 +70,12 @@ This pull-request will be merged automatically as it reaches the mergeable state
|
||||
|
||||
### If the PR was closed and then reopened
|
||||
|
||||
If it stuck, check {pr_url} for `{label_backports_created}` and delete it if \
|
||||
If it stuck, check {pr_url} for `{backport_created_label}` and delete it if \
|
||||
necessary. Manually merging will do nothing, since `{label_backports_created}` \
|
||||
prevents the original PR {pr_url} from being processed.
|
||||
|
||||
If you want to recreate the PR: delete the `{label_cherrypick}` label and delete this branch.
|
||||
You may also need to delete the `{label_backports_created}` label from the original PR.
|
||||
"""
|
||||
BACKPORT_DESCRIPTION = """This pull-request is a last step of an automated \
|
||||
backporting.
|
||||
@ -82,7 +85,13 @@ close it.
|
||||
"""
|
||||
REMOTE = ""
|
||||
|
||||
def __init__(self, name: str, pr: PullRequest, repo: Repository):
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
pr: PullRequest,
|
||||
repo: Repository,
|
||||
backport_created_label: str = Labels.BACKPORTS_CREATED,
|
||||
):
|
||||
self.name = name
|
||||
self.pr = pr
|
||||
self.repo = repo
|
||||
@ -93,6 +102,8 @@ close it.
|
||||
self.backport_pr = None # type: Optional[PullRequest]
|
||||
self._backported = False
|
||||
|
||||
self.backport_created_label = backport_created_label
|
||||
|
||||
self.git_prefix = ( # All commits to cherrypick are done as robot-clickhouse
|
||||
"git -c user.email=robot-clickhouse@users.noreply.github.com "
|
||||
"-c user.name=robot-clickhouse -c commit.gpgsign=false"
|
||||
@ -226,7 +237,8 @@ close it.
|
||||
body=self.CHERRYPICK_DESCRIPTION.format(
|
||||
pr_number=self.pr.number,
|
||||
pr_url=self.pr.html_url,
|
||||
label_backports_created=Labels.BACKPORTS_CREATED,
|
||||
backport_created_label=self.backport_created_label,
|
||||
label_cherrypick=Labels.CHERRYPICK,
|
||||
),
|
||||
base=self.backport_branch,
|
||||
head=self.cherrypick_branch,
|
||||
@ -459,11 +471,12 @@ class Backport:
|
||||
pr_labels = [label.name for label in pr.labels]
|
||||
if self.must_create_backport_label in pr_labels:
|
||||
branches = [
|
||||
ReleaseBranch(br, pr, self.repo) for br in self.release_branches
|
||||
ReleaseBranch(br, pr, self.repo, self.backport_created_label)
|
||||
for br in self.release_branches
|
||||
] # type: List[ReleaseBranch]
|
||||
else:
|
||||
branches = [
|
||||
ReleaseBranch(br, pr, self.repo)
|
||||
ReleaseBranch(br, pr, self.repo, self.backport_created_label)
|
||||
for br in [
|
||||
label.split("-", 1)[0][1:] # v21.8-must-backport
|
||||
for label in pr_labels
|
||||
@ -492,6 +505,7 @@ class Backport:
|
||||
)
|
||||
bp_cp_prs = self.gh.get_pulls_from_search(
|
||||
query=f"type:pr repo:{self._repo_name} {query_suffix}",
|
||||
label=f"{Labels.BACKPORT},{Labels.CHERRYPICK}",
|
||||
)
|
||||
for br in branches:
|
||||
br.pop_prs(bp_cp_prs)
|
||||
|
@ -1,9 +1,15 @@
|
||||
#!/usr/bin/env bash
|
||||
# The script is downloaded the AWS image builder Task Orchestrator and Executor (AWSTOE)
|
||||
# We can't use `user data script` because cloud-init does not check the exit code
|
||||
# The script is downloaded in the component named ci-infrastructure-prepare in us-east-1
|
||||
# The link there must be adjusted to a particular RAW link, e.g.
|
||||
# https://github.com/ClickHouse/ClickHouse/raw/653da5f00219c088af66d97a8f1ea3e35e798268/tests/ci/worker/prepare-ci-ami.sh
|
||||
|
||||
set -xeuo pipefail
|
||||
|
||||
echo "Running prepare script"
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
export RUNNER_VERSION=2.298.2
|
||||
export RUNNER_VERSION=2.304.0
|
||||
export RUNNER_HOME=/home/ubuntu/actions-runner
|
||||
|
||||
deb_arch() {
|
||||
@ -56,7 +62,7 @@ echo "deb [arch=$(deb_arch) signed-by=/usr/share/keyrings/docker-archive-keyring
|
||||
|
||||
apt-get update
|
||||
|
||||
apt-get install --yes --no-install-recommends docker-ce docker-ce-cli containerd.io
|
||||
apt-get install --yes --no-install-recommends docker-ce docker-buildx-plugin docker-ce-cli containerd.io
|
||||
|
||||
usermod -aG docker ubuntu
|
||||
|
||||
@ -75,6 +81,9 @@ cat <<EOT > /etc/docker/daemon.json
|
||||
}
|
||||
EOT
|
||||
|
||||
# Increase the limit on number of virtual memory mappings to aviod 'Cannot mmap' error
|
||||
echo "vm.max_map_count = 2097152" > /etc/sysctl.d/01-increase-map-counts.conf
|
||||
|
||||
systemctl restart docker
|
||||
|
||||
# buildx builder is user-specific
|
||||
@ -97,7 +106,7 @@ chown -R ubuntu:ubuntu $RUNNER_HOME
|
||||
|
||||
cd /home/ubuntu
|
||||
curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "awscliv2.zip"
|
||||
unzip awscliv2.zip
|
||||
unzip -q awscliv2.zip
|
||||
./aws/install
|
||||
|
||||
rm -rf /home/ubuntu/awscliv2.zip /home/ubuntu/aws
|
||||
@ -118,3 +127,6 @@ gpg --verify /tmp/amazon-cloudwatch-agent.deb.sig
|
||||
dpkg -i /tmp/amazon-cloudwatch-agent.deb
|
||||
aws ssm get-parameter --region us-east-1 --name AmazonCloudWatch-github-runners --query 'Parameter.Value' --output text > /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json
|
||||
systemctl enable amazon-cloudwatch-agent.service
|
||||
|
||||
# The following line is used in aws TOE check.
|
||||
touch /var/tmp/clickhouse-ci-ami.success
|
@ -126,6 +126,7 @@ TRUSTED_CONTRIBUTORS = {
|
||||
"aalexfvk",
|
||||
"MikhailBurdukov",
|
||||
"tsolodov", # ClickHouse Employee
|
||||
"kitaisreal",
|
||||
]
|
||||
}
|
||||
|
||||
|
@ -1963,9 +1963,9 @@ class ClickHouseCluster:
|
||||
return output
|
||||
|
||||
def copy_file_to_container(self, container_id, local_path, dest_path):
|
||||
with open(local_path, "r") as fdata:
|
||||
with open(local_path, "rb") as fdata:
|
||||
data = fdata.read()
|
||||
encodedBytes = base64.b64encode(data.encode("utf-8"))
|
||||
encodedBytes = base64.b64encode(data)
|
||||
encodedStr = str(encodedBytes, "utf-8")
|
||||
self.exec_in_container(
|
||||
container_id,
|
||||
@ -1974,7 +1974,6 @@ class ClickHouseCluster:
|
||||
"-c",
|
||||
"echo {} | base64 --decode > {}".format(encodedStr, dest_path),
|
||||
],
|
||||
user="root",
|
||||
)
|
||||
|
||||
def wait_for_url(
|
||||
|
@ -1,6 +1,7 @@
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import importlib
|
||||
|
||||
|
||||
# Starts simple HTTP servers written in Python.
|
||||
@ -65,3 +66,28 @@ def start_mock_servers(cluster, script_dir, mocks, timeout=100):
|
||||
attempt += 1
|
||||
|
||||
logging.info(f"Mock {server_names_with_desc} started")
|
||||
|
||||
|
||||
# The same as start_mock_servers, but
|
||||
# import servers from central directory tests/integration/helpers
|
||||
# and return the control instance
|
||||
def start_s3_mock(cluster, mock_name, port, timeout=100):
|
||||
script_dir = os.path.join(os.path.dirname(__file__), "s3_mocks")
|
||||
registered_servers = [
|
||||
mock
|
||||
for mock in os.listdir(script_dir)
|
||||
if os.path.isfile(os.path.join(script_dir, mock))
|
||||
]
|
||||
|
||||
file_name = mock_name + ".py"
|
||||
if file_name not in registered_servers:
|
||||
raise KeyError(
|
||||
f"Can't run s3 mock `{mock_name}`. No file `{file_name}` in directory `{script_dir}`"
|
||||
)
|
||||
|
||||
start_mock_servers(cluster, script_dir, [(file_name, "resolver", port)], timeout)
|
||||
|
||||
fmt = importlib.import_module("." + mock_name, "helpers.s3_mocks")
|
||||
control = getattr(fmt, "MockControl")(cluster, "resolver", port)
|
||||
|
||||
return control
|
||||
|
0
tests/integration/helpers/s3_mocks/__init__.py
Normal file
0
tests/integration/helpers/s3_mocks/__init__.py
Normal file
@ -12,7 +12,75 @@ UPSTREAM_HOST = "minio1"
|
||||
UPSTREAM_PORT = 9001
|
||||
|
||||
|
||||
class ServerRuntime:
|
||||
class MockControl:
|
||||
def __init__(self, cluster, container, port):
|
||||
self._cluster = cluster
|
||||
self._container = container
|
||||
self._port = port
|
||||
|
||||
def reset(self):
|
||||
response = self._cluster.exec_in_container(
|
||||
self._cluster.get_container_id(self._container),
|
||||
[
|
||||
"curl",
|
||||
"-s",
|
||||
f"http://localhost:{self._port}/mock_settings/reset",
|
||||
],
|
||||
nothrow=True,
|
||||
)
|
||||
assert response == "OK"
|
||||
|
||||
def setup_fail_upload(self, part_length):
|
||||
response = self._cluster.exec_in_container(
|
||||
self._cluster.get_container_id(self._container),
|
||||
[
|
||||
"curl",
|
||||
"-s",
|
||||
f"http://localhost:{self._port}/mock_settings/error_at_put?when_length_bigger={part_length}",
|
||||
],
|
||||
nothrow=True,
|
||||
)
|
||||
assert response == "OK"
|
||||
|
||||
def setup_fake_upload(self, part_length):
|
||||
response = self._cluster.exec_in_container(
|
||||
self._cluster.get_container_id(self._container),
|
||||
[
|
||||
"curl",
|
||||
"-s",
|
||||
f"http://localhost:{self._port}/mock_settings/fake_put?when_length_bigger={part_length}",
|
||||
],
|
||||
nothrow=True,
|
||||
)
|
||||
assert response == "OK"
|
||||
|
||||
def setup_slow_answers(
|
||||
self, minimal_length=0, timeout=None, probability=None, count=None
|
||||
):
|
||||
url = (
|
||||
f"http://localhost:{self._port}/"
|
||||
f"mock_settings/slow_put"
|
||||
f"?minimal_length={minimal_length}"
|
||||
)
|
||||
|
||||
if timeout is not None:
|
||||
url += f"&timeout={timeout}"
|
||||
|
||||
if probability is not None:
|
||||
url += f"&probability={probability}"
|
||||
|
||||
if count is not None:
|
||||
url += f"&count={count}"
|
||||
|
||||
response = self._cluster.exec_in_container(
|
||||
self._cluster.get_container_id(self._container),
|
||||
["curl", "-s", url],
|
||||
nothrow=True,
|
||||
)
|
||||
assert response == "OK"
|
||||
|
||||
|
||||
class _ServerRuntime:
|
||||
class SlowPut:
|
||||
def __init__(
|
||||
self, probability_=None, timeout_=None, minimal_length_=None, count_=None
|
||||
@ -34,11 +102,11 @@ class ServerRuntime:
|
||||
if content_length > self.minimal_length:
|
||||
if self.count > 0:
|
||||
if (
|
||||
runtime.slow_put.probability == 1
|
||||
or random.random() <= runtime.slow_put.probability
|
||||
_runtime.slow_put.probability == 1
|
||||
or random.random() <= _runtime.slow_put.probability
|
||||
):
|
||||
self.count -= 1
|
||||
return runtime.slow_put.timeout
|
||||
return _runtime.slow_put.timeout
|
||||
return None
|
||||
|
||||
def __init__(self):
|
||||
@ -65,10 +133,10 @@ class ServerRuntime:
|
||||
self.slow_put = None
|
||||
|
||||
|
||||
runtime = ServerRuntime()
|
||||
_runtime = _ServerRuntime()
|
||||
|
||||
|
||||
def and_then(value, func):
|
||||
def _and_then(value, func):
|
||||
assert callable(func)
|
||||
return None if value is None else func(value)
|
||||
|
||||
@ -153,28 +221,28 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
|
||||
|
||||
if path[1] == "error_at_put":
|
||||
params = urllib.parse.parse_qs(parts.query, keep_blank_values=False)
|
||||
runtime.error_at_put_when_length_bigger = int(
|
||||
_runtime.error_at_put_when_length_bigger = int(
|
||||
params.get("when_length_bigger", [1024 * 1024])[0]
|
||||
)
|
||||
return self._ok()
|
||||
if path[1] == "fake_put":
|
||||
params = urllib.parse.parse_qs(parts.query, keep_blank_values=False)
|
||||
runtime.fake_put_when_length_bigger = int(
|
||||
_runtime.fake_put_when_length_bigger = int(
|
||||
params.get("when_length_bigger", [1024 * 1024])[0]
|
||||
)
|
||||
return self._ok()
|
||||
if path[1] == "slow_put":
|
||||
params = urllib.parse.parse_qs(parts.query, keep_blank_values=False)
|
||||
runtime.slow_put = ServerRuntime.SlowPut(
|
||||
minimal_length_=and_then(params.get("minimal_length", [None])[0], int),
|
||||
probability_=and_then(params.get("probability", [None])[0], float),
|
||||
timeout_=and_then(params.get("timeout", [None])[0], float),
|
||||
count_=and_then(params.get("count", [None])[0], int),
|
||||
_runtime.slow_put = _ServerRuntime.SlowPut(
|
||||
minimal_length_=_and_then(params.get("minimal_length", [None])[0], int),
|
||||
probability_=_and_then(params.get("probability", [None])[0], float),
|
||||
timeout_=_and_then(params.get("timeout", [None])[0], float),
|
||||
count_=_and_then(params.get("count", [None])[0], int),
|
||||
)
|
||||
self.log_message("set slow put %s", runtime.slow_put)
|
||||
self.log_message("set slow put %s", _runtime.slow_put)
|
||||
return self._ok()
|
||||
if path[1] == "reset":
|
||||
runtime.reset()
|
||||
_runtime.reset()
|
||||
return self._ok()
|
||||
|
||||
return self._error("_mock_settings: wrong command")
|
||||
@ -191,14 +259,14 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
|
||||
def do_PUT(self):
|
||||
content_length = int(self.headers.get("Content-Length", 0))
|
||||
|
||||
if runtime.slow_put is not None:
|
||||
timeout = runtime.slow_put.get_timeout(content_length)
|
||||
if _runtime.slow_put is not None:
|
||||
timeout = _runtime.slow_put.get_timeout(content_length)
|
||||
if timeout is not None:
|
||||
self.log_message("slow put %s", timeout)
|
||||
time.sleep(timeout)
|
||||
|
||||
if runtime.error_at_put_when_length_bigger is not None:
|
||||
if content_length > runtime.error_at_put_when_length_bigger:
|
||||
if _runtime.error_at_put_when_length_bigger is not None:
|
||||
if content_length > _runtime.error_at_put_when_length_bigger:
|
||||
return self._error(
|
||||
'<?xml version="1.0" encoding="UTF-8"?>'
|
||||
"<Error>"
|
||||
@ -211,9 +279,10 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
|
||||
parts = urllib.parse.urlsplit(self.path)
|
||||
params = urllib.parse.parse_qs(parts.query, keep_blank_values=False)
|
||||
upload_id = params.get("uploadId", [None])[0]
|
||||
if runtime.fake_put_when_length_bigger is not None and upload_id is not None:
|
||||
if content_length > runtime.fake_put_when_length_bigger:
|
||||
runtime.register_fake_upload(upload_id, parts.path)
|
||||
if _runtime.fake_put_when_length_bigger is not None:
|
||||
if content_length > _runtime.fake_put_when_length_bigger:
|
||||
if upload_id is not None:
|
||||
_runtime.register_fake_upload(upload_id, parts.path)
|
||||
return self._fake_put_ok()
|
||||
|
||||
return self._redirect()
|
||||
@ -223,7 +292,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
|
||||
params = urllib.parse.parse_qs(parts.query, keep_blank_values=False)
|
||||
upload_id = params.get("uploadId", [None])[0]
|
||||
|
||||
if runtime.is_fake_upload(upload_id, parts.path):
|
||||
if _runtime.is_fake_upload(upload_id, parts.path):
|
||||
return self._fake_post_ok(parts.path)
|
||||
|
||||
return self._redirect()
|
||||
@ -235,9 +304,10 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
|
||||
self._redirect()
|
||||
|
||||
|
||||
class ThreadedHTTPServer(socketserver.ThreadingMixIn, http.server.HTTPServer):
|
||||
class _ThreadedHTTPServer(socketserver.ThreadingMixIn, http.server.HTTPServer):
|
||||
"""Handle requests in a separate thread."""
|
||||
|
||||
|
||||
httpd = ThreadedHTTPServer(("0.0.0.0", int(sys.argv[1])), RequestHandler)
|
||||
httpd.serve_forever()
|
||||
if __name__ == "__main__":
|
||||
httpd = _ThreadedHTTPServer(("0.0.0.0", int(sys.argv[1])), RequestHandler)
|
||||
httpd.serve_forever()
|
@ -7,25 +7,25 @@
|
||||
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<s3>
|
||||
<broken_s3>
|
||||
<type>s3</type>
|
||||
<endpoint>http://minio1:9001/root/data/</endpoint>
|
||||
<endpoint>http://resolver:8083/root/data/</endpoint>
|
||||
<access_key_id>minio</access_key_id>
|
||||
<secret_access_key>minio123</secret_access_key>
|
||||
</s3>
|
||||
</broken_s3>
|
||||
</disks>
|
||||
|
||||
<policies>
|
||||
<s3>
|
||||
<broken_s3>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
<disk>broken_s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3>
|
||||
</broken_s3>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
<merge_tree>
|
||||
<storage_policy>s3</storage_policy>
|
||||
<storage_policy>broken_s3</storage_policy>
|
||||
</merge_tree>
|
||||
</clickhouse>
|
||||
|
@ -1,12 +1,10 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
import pytest
|
||||
from helpers.mock_servers import start_s3_mock
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
@ -32,12 +30,23 @@ def cluster():
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
def test_paranoid_check_in_logs(cluster):
|
||||
@pytest.fixture(scope="module")
|
||||
def init_broken_s3(cluster):
|
||||
yield start_s3_mock(cluster, "broken_s3", "8083")
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def broken_s3(init_broken_s3):
|
||||
init_broken_s3.reset()
|
||||
yield init_broken_s3
|
||||
|
||||
|
||||
def test_upload_after_check_works(cluster, broken_s3):
|
||||
node = cluster.instances["node"]
|
||||
|
||||
node.query(
|
||||
"""
|
||||
CREATE TABLE s3_failover_test (
|
||||
CREATE TABLE s3_upload_after_check_works (
|
||||
id Int64,
|
||||
data String
|
||||
) ENGINE=MergeTree()
|
||||
@ -45,8 +54,12 @@ def test_paranoid_check_in_logs(cluster):
|
||||
"""
|
||||
)
|
||||
|
||||
node.query("INSERT INTO s3_failover_test VALUES (1, 'Hello')")
|
||||
broken_s3.setup_fake_upload(1)
|
||||
|
||||
assert node.contains_in_log("exists after upload")
|
||||
error = node.query_and_get_error(
|
||||
"INSERT INTO s3_upload_after_check_works VALUES (1, 'Hello')"
|
||||
)
|
||||
|
||||
assert node.query("SELECT * FROM s3_failover_test ORDER BY id") == "1\tHello\n"
|
||||
assert "Code: 499" in error, error
|
||||
assert "Immediately after upload" in error, error
|
||||
assert "suddenly disappeared" in error, error
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user