Merge branch 'master' into vdimir/direct-dict-async-read

This commit is contained in:
Vladimir C 2023-03-29 11:58:08 +02:00 committed by GitHub
commit 570426141e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
62 changed files with 2069 additions and 255 deletions

View File

@ -466,9 +466,8 @@ JSON::Pos JSON::searchField(const char * data, size_t size) const
{
if (!it->hasEscapes())
{
if (static_cast<int>(size) + 2 > it->dataEnd() - it->data())
continue;
if (!strncmp(data, it->data() + 1, size))
const auto current_name = it->getRawName();
if (current_name.size() == size && 0 == memcmp(current_name.data(), data, size))
break;
}
else

@ -1 +1 @@
Subproject commit 400ad7152a0c7ee07756d96ab4f6a8f6d1080916
Subproject commit 20598079891d27ef1a3ad3f66bbfa3f983c25268

View File

@ -109,8 +109,7 @@ mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/c
# Install and start new server
install_packages package_folder
# Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables).
export ZOOKEEPER_FAULT_INJECTION=0
export ZOOKEEPER_FAULT_INJECTION=1
configure
start 500
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \

1
docs/.gitignore vendored
View File

@ -1 +1,2 @@
build
clickhouse-docs

View File

@ -40,6 +40,8 @@ The documentation contains information about all the aspects of the ClickHouse l
At the moment, [documentation](https://clickhouse.com/docs) exists in English, Russian, and Chinese. We store the reference documentation besides the ClickHouse source code in the [GitHub repository](https://github.com/ClickHouse/ClickHouse/tree/master/docs), and user guides in a separate repo [Clickhouse/clickhouse-docs](https://github.com/ClickHouse/clickhouse-docs).
To get the latter launch the `get-clickhouse-docs.sh` script.
Each language lies in the corresponding folder. Files that are not translated from English are symbolic links to the English ones.
<a name="how-to-contribute"/>

View File

@ -456,33 +456,35 @@ Conditions in the `WHERE` clause contains calls of the functions that operate wi
Indexes of type `set` can be utilized by all functions. The other index types are supported as follows:
| Function (operator) / Index | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter |
|------------------------------------------------------------------------------------------------------------|-------------|--------|------------|------------|--------------|
| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notEquals(!=, &lt;&gt;)](/docs/en/sql-reference/functions/comparison-functions.md/#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [like](/docs/en/sql-reference/functions/string-search-functions.md/#function-like) | ✔ | ✔ | ✔ | ✔ | ✗ |
| [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ |
| [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ |
| [endsWith](/docs/en/sql-reference/functions/string-functions.md/#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ |
| [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ |
| [in](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notIn](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [empty](/docs/en/sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [has](/docs/en/sql-reference/functions/array-functions#function-has) | ✗ | ✗ | ✔ | ✔ | ✔ |
| [hasAny](/docs/en/sql-reference/functions/array-functions#function-hasAny) | ✗ | ✗ | ✗ | ✗ | ✔ |
| [hasAll](/docs/en/sql-reference/functions/array-functions#function-hasAll) | ✗ | ✗ | ✗ | ✗ | ✔ |
| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ |
| hasTokenOrNull | ✗ | ✗ | ✗ | ✔ | ✗ |
| hasTokenCaseInsensitive | ✗ | ✗ | ✗ | ✔ | ✗ |
| hasTokenCaseInsensitiveOrNull | ✗ | ✗ | ✗ | ✔ | ✗ |
| Function (operator) / Index | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter | inverted |
|------------------------------------------------------------------------------------------------------------|-------------|--------|------------|------------|--------------|----------|
| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notEquals(!=, &lt;&gt;)](/docs/en/sql-reference/functions/comparison-functions.md/#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ |
| [like](/docs/en/sql-reference/functions/string-search-functions.md/#function-like) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ |
| [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ |
| [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ |
| [endsWith](/docs/en/sql-reference/functions/string-functions.md/#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | ✔ |
| [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | ✔ |
| [in](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notIn](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ |
| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ |
| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ |
| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ |
| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ |
| [empty](/docs/en/sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ |
| [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ |
| [has](/docs/en/sql-reference/functions/array-functions#function-has) | ✗ | ✗ | ✔ | ✔ | ✔ | ✔ |
| [hasAny](/docs/en/sql-reference/functions/array-functions#function-hasAny) | ✗ | ✗ | ✗ | ✗ | ✔ | ✗ |
| [hasAll](/docs/en/sql-reference/functions/array-functions#function-hasAll) | ✗ | ✗ | ✗ | ✗ | ✔ | ✗ |
| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | ✔ |
| hasTokenOrNull | ✗ | ✗ | ✗ | ✔ | ✗ | ✔ |
| hasTokenCaseInsensitive (*) | ✗ | ✗ | ✗ | ✔ | ✗ | ✗ |
| hasTokenCaseInsensitiveOrNull (*) | ✗ | ✗ | ✗ | ✔ | ✗ | ✗ |
Functions with a constant argument that is less than ngram size cant be used by `ngrambf_v1` for query optimization.
(*) For `hasTokenCaseInsensitve` and `hasTokenCaseInsensitive` to be effective, the `tokenbf_v1` index must be created on lowercased data, for example `INDEX idx (lower(str_col)) TYPE tokenbf_v1(512, 3, 0)`.
:::note
Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes can not be used for optimizing queries where the result of a function is expected to be false.

View File

@ -0,0 +1,472 @@
---
slug: /en/getting-started/example-datasets/amazon-reviews
sidebar_label: Amazon customer reviews
---
# Amazon customer reviews dataset
[**Amazon Customer Reviews**](https://s3.amazonaws.com/amazon-reviews-pds/readme.html) (a.k.a. Product Reviews) is one of Amazons iconic products. In a period of over two decades since the first review in 1995, millions of Amazon customers have contributed over a hundred million reviews to express opinions and describe their experiences regarding products on the Amazon.com website. This makes Amazon Customer Reviews a rich source of information for academic researchers in the fields of Natural Language Processing (NLP), Information Retrieval (IR), and Machine Learning (ML), amongst others. By accessing the dataset, you agree to the [license terms](https://s3.amazonaws.com/amazon-reviews-pds/license.txt).
The data is in a tab-separated format in gzipped files are up in AWS S3. Let's walk through the steps to insert it into ClickHouse.
:::note
The queries below were executed on a **Production** instance of [ClickHouse Cloud](https://clickhouse.cloud).
:::
1. Without inserting the data into ClickHouse, we can query it in place. Let's grab some rows so we can see what they look like:
```sql
SELECT *
FROM s3('https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_Wireless_v1_00.tsv.gz',
'TabSeparatedWithNames',
'marketplace String,
customer_id Int64,
review_id String,
product_id String,
product_parent Int64,
product_title String,
product_category String,
star_rating Int64,
helpful_votes Int64,
total_votes Int64,
vine Bool,
verified_purchase Bool,
review_headline String,
review_body String,
review_date Date'
)
LIMIT 10;
```
The rows look like:
```response
┌─marketplace─┬─customer_id─┬─review_id──────┬─product_id─┬─product_parent─┬─product_title──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─product_category─┬─star_rating─┬─helpful_votes─┬─total_votes─┬─vine──┬─verified_purchase─┬─review_headline───────────┬─review_body────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─review_date─┐
│ US │ 16414143 │ R3W4P9UBGNGH1U │ B00YL0EKWE │ 852431543 │ LG G4 Case Hard Transparent Slim Clear Cover for LG G4 │ Wireless │ 2 │ 1 │ 3 │ false │ true │ Looks good, functions meh │ 2 issues - Once I turned on the circle apps and installed this case, my battery drained twice as fast as usual. I ended up turning off the circle apps, which kind of makes the case just a case... with a hole in it. Second, the wireless charging doesn't work. I have a Motorola 360 watch and a Qi charging pad. The watch charges fine but this case doesn't. But hey, it looks nice. │ 2015-08-31 │
│ US │ 50800750 │ R15V54KBMTQWAY │ B00XK95RPQ │ 516894650 │ Selfie Stick Fiblastiq&trade; Extendable Wireless Bluetooth Selfie Stick with built-in Bluetooth Adjustable Phone Holder │ Wireless │ 4 │ 0 │ 0 │ false │ false │ A fun little gadget │ Im embarrassed to admit that until recently, I have had a very negative opinion about “selfie sticks” aka “monopods” aka “narcissticks.” But having reviewed a number of them recently, theyre growing on me. This one is pretty nice and simple to set up and with easy instructions illustrated on the back of the box (not sure why some reviewers have stated that there are no instructions when they are clearly printed on the box unless they received different packaging than I did). Once assembled, the pairing via bluetooth and use of the stick are easy and intuitive. Nothing to it.<br /><br />The stick comes with a USB charging cable but arrived with a charge so you can use it immediately, though its probably a good idea to charge it right away so that you have no interruption of use out of the box. Make sure the stick is switched to on (it will light up) and extend your stick to the length you desire up to about a yards length and snap away.<br /><br />The phone clamp held the phone sturdily so I wasnt worried about it slipping out. But the longer you extend the stick, the harder it is to maneuver. But that will happen with any stick and is not specific to this one in particular.<br /><br />Two things that could improve this: 1) add the option to clamp this in portrait orientation instead of having to try and hold the stick at the portrait angle, which makes it feel unstable; 2) add the opening for a tripod so that this can be used to sit upright on a table for skyping and facetime eliminating the need to hold the phone up with your hand, causing fatigue.<br /><br />But other than that, this is a nice quality monopod for a variety of picture taking opportunities.<br /><br />I received a sample in exchange for my honest opinion. │ 2015-08-31 │
│ US │ 15184378 │ RY8I449HNXSVF │ B00SXRXUKO │ 984297154 │ Tribe AB40 Water Resistant Sports Armband with Key Holder for 4.7-Inch iPhone 6S/6/5/5S/5C, Galaxy S4 + Screen Protector - Dark Pink │ Wireless │ 5 │ 0 │ 0 │ false │ true │ Five Stars │ Fits iPhone 6 well │ 2015-08-31 │
│ US │ 10203548 │ R18TLJYCKJFLSR │ B009V5X1CE │ 279912704 │ RAVPower® Element 10400mAh External Battery USB Portable Charger (Dual USB Outputs, Ultra Compact Design), Travel Charger for iPhone 6,iPhone 6 plus,iPhone 5, 5S, 5C, 4S, 4, iPad Air, 4, 3, 2, Mini 2 (Apple adapters not included); Samsung Galaxy S5, S4, S3, S2, Note 3, Note 2; HTC One, EVO, Thunderbolt, Incredible, Droid DNA, Motorola ATRIX, Droid, Moto X, Google Glass, Nexus 4, Nexus 5, Nexus 7, │ Wireless │ 5 │ 0 │ 0 │ false │ true │ Great charger │ Great charger. I easily get 3+ charges on a Samsung Galaxy 3. Works perfectly for camping trips or long days on the boat. │ 2015-08-31 │
│ US │ 488280 │ R1NK26SWS53B8Q │ B00D93OVF0 │ 662791300 │ Fosmon Micro USB Value Pack Bundle for Samsung Galaxy Exhilarate - Includes Home / Travel Charger, Car / Vehicle Charger and USB Cable │ Wireless │ 5 │ 0 │ 0 │ false │ true │ Five Stars │ Great for the price :-) │ 2015-08-31 │
│ US │ 13334021 │ R11LOHEDYJALTN │ B00XVGJMDQ │ 421688488 │ iPhone 6 Case, Vofolen Impact Resistant Protective Shell iPhone 6S Wallet Cover Shockproof Rubber Bumper Case Anti-scratches Hard Cover Skin Card Slot Holder for iPhone 6 6S │ Wireless │ 5 │ 0 │ 0 │ false │ true │ Five Stars │ Great Case, better customer service! │ 2015-08-31 │
│ US │ 27520697 │ R3ALQVQB2P9LA7 │ B00KQW1X1C │ 554285554 │ Nokia Lumia 630 RM-978 White Factory Unlocked - International Version No Warranty │ Wireless │ 4 │ 0 │ 0 │ false │ true │ Four Stars │ Easy to set up and use. Great functions for the price │ 2015-08-31 │
│ US │ 48086021 │ R3MWLXLNO21PDQ │ B00IP1MQNK │ 488006702 │ Lumsing 10400mah external battery │ Wireless │ 5 │ 0 │ 0 │ false │ true │ Five Stars │ Works great │ 2015-08-31 │
│ US │ 12738196 │ R2L15IS24CX0LI │ B00HVORET8 │ 389677711 │ iPhone 5S Battery Case - iPhone 5 Battery Case , Maxboost Atomic S [MFI Certified] External Protective Battery Charging Case Power Bank Charger All Versions of Apple iPhone 5/5S [Juice Battery Pack] │ Wireless │ 5 │ 0 │ 0 │ false │ true │ So far so good │ So far so good. It is essentially identical to the one it replaced from another company. That one stopped working after 7 months so I am a bit apprehensive about this one. │ 2015-08-31 │
│ US │ 15867807 │ R1DJ8976WPWVZU │ B00HX3G6J6 │ 299654876 │ HTC One M8 Screen Protector, Skinomi TechSkin Full Coverage Screen Protector for HTC One M8 Clear HD Anti-Bubble Film │ Wireless │ 3 │ 0 │ 0 │ false │ true │ seems durable but these are always harder to get on ... │ seems durable but these are always harder to get on right than people make them out to be. also send to curl up at the edges after a while. with today's smartphones, you hardly need screen protectors anyway. │ 2015-08-31 │
└─────────────┴─────────────┴────────────────┴────────────┴────────────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴─────────────┴───────────────┴─────────────┴───────┴───────────────────┴─────────────────────────────────────────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────┘
```
:::note
Normally you would not need to pass in the schema into the `s3` table function - ClickHouse can infer the names and data types of the columns. However, this particular dataset uses a non-standard tab-separated format, but the `s3` function seems to work fine with this non-standard format if you include the schema.
:::
2. Let's define a new table named `amazon_reviews`. We'll optimize some of the column data types - and choose a primary key (the `ORDER BY` clause):
```sql
CREATE TABLE amazon_reviews
(
review_date Date,
marketplace LowCardinality(String),
customer_id UInt64,
review_id String,
product_id String,
product_parent UInt64,
product_title String,
product_category LowCardinality(String),
star_rating UInt8,
helpful_votes UInt32,
total_votes UInt32,
vine FixedString(1),
verified_purchase FixedString(1),
review_headline String,
review_body String
)
ENGINE = MergeTree
ORDER BY (marketplace, review_date, product_category);
```
3. We are now ready to insert the data into ClickHouse. Before we do, check out the [list of files in the dataset](https://s3.amazonaws.com/amazon-reviews-pds/tsv/index.txt) and decide which ones you want to include.
4. We will insert all of the US reviews - which is about 151M rows. The following `INSERT` command uses the `s3Cluster` table function, which allows the processing of mulitple S3 files in parallel using all the nodes of your cluster. We also use a wildcard to insert any file that starts with the name `https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_`:
```sql
INSERT INTO amazon_reviews
SELECT
* REPLACE(vine = 'Y' AS vine, verified_purchase = 'Y' AS verified_purchase)
FROM s3Cluster(
'default',
'https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_*.tsv.gz',
'TSVWithNames',
'review_date Date,
marketplace LowCardinality(String),
customer_id UInt64,
review_id String,
product_id String,
product_parent UInt64,
product_title String,
product_category LowCardinality(String),
star_rating UInt8,
helpful_votes UInt32,
total_votes UInt32,
vine FixedString(1),
verified_purchase FixedString(1),
review_headline String,
review_body String'
);
```
:::tip
In ClickHouse Cloud, there is a cluster named `default`. Change `default` to the name of your cluster...or use the `s3` table function (instead of `s3Cluster`) if you do not have a cluster.
:::
5. That query doesn't take long - within 5 minutes or so you should see all the rows inserted:
```sql
SELECT formatReadableQuantity(count())
FROM amazon_reviews
```
```response
┌─formatReadableQuantity(count())─┐
│ 150.96 million │
└─────────────────────────────────┘
```
6. Let's see how much space our data is using:
```sql
SELECT
disk_name,
formatReadableSize(sum(data_compressed_bytes) AS size) AS compressed,
formatReadableSize(sum(data_uncompressed_bytes) AS usize) AS uncompressed,
round(usize / size, 2) AS compr_rate,
sum(rows) AS rows,
count() AS part_count
FROM system.parts
WHERE (active = 1) AND (table = 'amazon_reviews')
GROUP BY disk_name
ORDER BY size DESC;
```
The original data was about 70G, but compressed in ClickHouse it takes up about 30G:
```response
┌─disk_name─┬─compressed─┬─uncompressed─┬─compr_rate─┬──────rows─┬─part_count─┐
│ s3disk │ 30.00 GiB │ 70.61 GiB │ 2.35 │ 150957260 │ 9 │
└───────────┴────────────┴──────────────┴────────────┴───────────┴────────────┘
```
7. Let's run some queries...here are the top 10 most-helpful reviews on Amazon:
```sql
SELECT
product_title,
review_headline
FROM amazon_reviews
ORDER BY helpful_votes DESC
LIMIT 10;
```
Notice the query has to process all 151M rows, and it takes about 17 seconds:
```response
┌─product_title────────────────────────────────────────────────────────────────────────────┬─review_headline───────────────────────────────────────────────────────┐
│ Kindle: Amazon's Original Wireless Reading Device (1st generation) │ Why and how the Kindle changes everything │
│ BIC Cristal For Her Ball Pen, 1.0mm, Black, 16ct (MSLP16-Blk) │ FINALLY! │
│ The Mountain Kids 100% Cotton Three Wolf Moon T-Shirt │ Dual Function Design │
│ Kindle Keyboard 3G, Free 3G + Wi-Fi, 6" E Ink Display │ Kindle vs. Nook (updated) │
│ Kindle Fire HD 7", Dolby Audio, Dual-Band Wi-Fi │ You Get What You Pay For │
│ Kindle Fire (Previous Generation - 1st) │ A great device WHEN you consider price and function, with a few flaws │
│ Fifty Shades of Grey: Book One of the Fifty Shades Trilogy (Fifty Shades of Grey Series) │ Did a teenager write this??? │
│ Wheelmate Laptop Steering Wheel Desk │ Perfect for an Starfleet Helmsman │
│ Kindle Wireless Reading Device (6" Display, U.S. Wireless) │ BEWARE of the SIGNIFICANT DIFFERENCES between Kindle 1 and Kindle 2! │
│ Tuscan Dairy Whole Vitamin D Milk, Gallon, 128 oz │ Make this your only stock and store │
└──────────────────────────────────────────────────────────────────────────────────────────┴───────────────────────────────────────────────────────────────────────┘
10 rows in set. Elapsed: 17.595 sec. Processed 150.96 million rows, 15.36 GB (8.58 million rows/s., 872.89 MB/s.)
```
8. Here are the top 10 products in Amazon with the most reviews:
```sql
SELECT
any(product_title),
count()
FROM amazon_reviews
GROUP BY product_id
ORDER BY 2 DESC
LIMIT 10;
```
```response
┌─any(product_title)────────────────────────────┬─count()─┐
│ Candy Crush Saga │ 50051 │
│ The Secret Society® - Hidden Mystery │ 41255 │
│ Google Chromecast HDMI Streaming Media Player │ 35977 │
│ Minecraft │ 35129 │
│ Bosch Season 1 │ 33610 │
│ Gone Girl: A Novel │ 33240 │
│ Subway Surfers │ 32328 │
│ The Fault in Our Stars │ 30149 │
│ Amazon.com eGift Cards │ 28879 │
│ Crossy Road │ 28111 │
└───────────────────────────────────────────────┴─────────┘
10 rows in set. Elapsed: 16.684 sec. Processed 195.05 million rows, 20.86 GB (11.69 million rows/s., 1.25 GB/s.)
```
9. Here are the average review ratings per month for each product (an actual [Amazon job interview question](https://datalemur.com/questions/sql-avg-review-ratings)!):
```sql
SELECT
toYYYYMM(review_date) AS month,
any(product_title),
avg(star_rating) AS avg_stars
FROM amazon_reviews
GROUP BY
month,
product_id
ORDER BY
month DESC,
product_id ASC
LIMIT 20;
```
It calculates all the monthly averages for each product, but we only returned 20 rows:
```response
┌──month─┬─any(product_title)──────────────────────────────────────────────────────────────────────┬─avg_stars─┐
│ 201508 │ Mystiqueshapes Girls Ballet Tutu Neon Lime Green │ 4 │
│ 201508 │ Adult Ballet Tutu Yellow │ 5 │
│ 201508 │ The Way Things Work: An Illustrated Encyclopedia of Technology │ 5 │
│ 201508 │ Hilda Boswell's Treasury of Poetry │ 5 │
│ 201508 │ Treasury of Poetry │ 5 │
│ 201508 │ Uncle Remus Stories │ 5 │
│ 201508 │ The Book of Daniel │ 5 │
│ 201508 │ Berenstains' B Book │ 5 │
│ 201508 │ The High Hills (Brambly Hedge) │ 4.5 │
│ 201508 │ Fuzzypeg Goes to School (The Little Grey Rabbit library) │ 5 │
│ 201508 │ Dictionary in French: The Cat in the Hat (Beginner Series) │ 5 │
│ 201508 │ Windfallen │ 5 │
│ 201508 │ The Monk Who Sold His Ferrari: A Remarkable Story About Living Your Dreams │ 5 │
│ 201508 │ Illustrissimi: The Letters of Pope John Paul I │ 5 │
│ 201508 │ Social Contract: A Personal Inquiry into the Evolutionary Sources of Order and Disorder │ 5 │
│ 201508 │ Mexico The Beautiful Cookbook: Authentic Recipes from the Regions of Mexico │ 4.5 │
│ 201508 │ Alanbrooke │ 5 │
│ 201508 │ Back to Cape Horn │ 4 │
│ 201508 │ Ovett: An Autobiography (Willow books) │ 5 │
│ 201508 │ The Birds of West Africa (Collins Field Guides) │ 4 │
└────────┴─────────────────────────────────────────────────────────────────────────────────────────┴───────────┘
20 rows in set. Elapsed: 55.529 sec. Processed 252.02 million rows, 35.58 GB (4.54 million rows/s., 640.79 MB/s.)
```
10. Here are the total number of votes per product category. This query is fast because `product_category` is in the primary key:
```sql
SELECT
sum(total_votes),
product_category
FROM amazon_reviews
GROUP BY product_category
ORDER BY 1 DESC;
```
```response
┌─sum(total_votes)─┬─product_category─────────┐
│ 103877874 │ Books │
│ 25330411 │ Digital_Ebook_Purchase │
│ 23065953 │ Video DVD │
│ 18048069 │ Music │
│ 17292294 │ Mobile_Apps │
│ 15977124 │ Health & Personal Care │
│ 13554090 │ PC │
│ 13065746 │ Kitchen │
│ 12537926 │ Home │
│ 11067538 │ Beauty │
│ 10418643 │ Wireless │
│ 9089085 │ Toys │
│ 9071484 │ Sports │
│ 7335647 │ Electronics │
│ 6885504 │ Apparel │
│ 6710085 │ Video Games │
│ 6556319 │ Camera │
│ 6305478 │ Lawn and Garden │
│ 5954422 │ Office Products │
│ 5339437 │ Home Improvement │
│ 5284343 │ Outdoors │
│ 5125199 │ Pet Products │
│ 4733251 │ Grocery │
│ 4697750 │ Shoes │
│ 4666487 │ Automotive │
│ 4361518 │ Digital_Video_Download │
│ 4033550 │ Tools │
│ 3559010 │ Baby │
│ 3317662 │ Home Entertainment │
│ 2559501 │ Video │
│ 2204328 │ Furniture │
│ 2157587 │ Musical Instruments │
│ 1881662 │ Software │
│ 1676081 │ Jewelry │
│ 1499945 │ Watches │
│ 1224071 │ Digital_Music_Purchase │
│ 847918 │ Luggage │
│ 503939 │ Major Appliances │
│ 392001 │ Digital_Video_Games │
│ 348990 │ Personal_Care_Appliances │
│ 321372 │ Digital_Software │
│ 169585 │ Mobile_Electronics │
│ 72970 │ Gift Card │
└──────────────────┴──────────────────────────┘
43 rows in set. Elapsed: 0.423 sec. Processed 150.96 million rows, 756.20 MB (356.70 million rows/s., 1.79 GB/s.)
```
11. Let's find the products with the word **"awful"** occurring most frequently in the review. This is a big task - over 151M strings have to be parsed looking for a single word:
```sql
SELECT
product_id,
any(product_title),
avg(star_rating),
count() AS count
FROM amazon_reviews
WHERE position(review_body, 'awful') > 0
GROUP BY product_id
ORDER BY count DESC
LIMIT 50;
```
The query takes a couple of minutes, but the results are a fun read:
```response
┌─product_id─┬─any(product_title)───────────────────────────────────────────────────────────────────────┬───avg(star_rating)─┬─count─┐
│ 0345803485 │ Fifty Shades of Grey: Book One of the Fifty Shades Trilogy (Fifty Shades of Grey Series) │ 1.3870967741935485 │ 248 │
│ B007J4T2G8 │ Fifty Shades of Grey (Fifty Shades, Book 1) │ 1.4439834024896265 │ 241 │
│ B006LSZECO │ Gone Girl: A Novel │ 2.2986425339366514 │ 221 │
│ B00008OWZG │ St. Anger │ 1.6565656565656566 │ 198 │
│ B00BD99JMW │ Allegiant (Divergent Trilogy, Book 3) │ 1.8342541436464088 │ 181 │
│ B0000YUXI0 │ Mavala Switzerland Mavala Stop Nail Biting │ 4.473684210526316 │ 171 │
│ B004S8F7QM │ Cards Against Humanity │ 4.753012048192771 │ 166 │
│ 031606792X │ Breaking Dawn (The Twilight Saga, Book 4) │ 1.796875 │ 128 │
│ 006202406X │ Allegiant (Divergent Series) │ 1.4242424242424243 │ 99 │
│ B0051VVOB2 │ Kindle Fire (Previous Generation - 1st) │ 2.7448979591836733 │ 98 │
│ B00I3MP3SG │ Pilot │ 1.8762886597938144 │ 97 │
│ 030758836X │ Gone Girl │ 2.15625 │ 96 │
│ B0009X29WK │ Precious Cat Ultra Premium Clumping Cat Litter │ 3.0759493670886076 │ 79 │
│ B00JB3MVCW │ Noah │ 1.2027027027027026 │ 74 │
│ B00BAXFECK │ The Goldfinch: A Novel (Pulitzer Prize for Fiction) │ 2.643835616438356 │ 73 │
│ B00N28818A │ Amazon Prime Video │ 1.4305555555555556 │ 72 │
│ B007FTE2VW │ SimCity - Limited Edition │ 1.2794117647058822 │ 68 │
│ 0439023513 │ Mockingjay (The Hunger Games) │ 2.6417910447761193 │ 67 │
│ B00178630A │ Diablo III - PC/Mac │ 1.671875 │ 64 │
│ B000OCEWGW │ Liquid Ass │ 4.8125 │ 64 │
│ B005ZOBNOI │ The Fault in Our Stars │ 4.316666666666666 │ 60 │
│ B00L9B7IKE │ The Girl on the Train: A Novel │ 2.0677966101694913 │ 59 │
│ B007S6Y6VS │ Garden of Life Raw Organic Meal │ 2.8793103448275863 │ 58 │
│ B0064X7B4A │ Words With Friends │ 2.2413793103448274 │ 58 │
│ B003WUYPPG │ Unbroken: A World War II Story of Survival, Resilience, and Redemption │ 4.620689655172414 │ 58 │
│ B00006HBUJ │ Star Wars: Episode II - Attack of the Clones (Widescreen Edition) │ 2.2982456140350878 │ 57 │
│ B000XUBFE2 │ The Book Thief │ 4.526315789473684 │ 57 │
│ B0006399FS │ How to Dismantle an Atomic Bomb │ 1.9821428571428572 │ 56 │
│ B003ZSJ212 │ Star Wars: The Complete Saga (Episodes I-VI) (Packaging May Vary) [Blu-ray] │ 2.309090909090909 │ 55 │
│ 193700788X │ Dead Ever After (Sookie Stackhouse/True Blood) │ 1.5185185185185186 │ 54 │
│ B004FYEZMQ │ Mass Effect 3 │ 2.056603773584906 │ 53 │
│ B000CFYAMC │ The Room │ 3.9615384615384617 │ 52 │
│ B0031JK95S │ Garden of Life Raw Organic Meal │ 3.3137254901960786 │ 51 │
│ B0012JY4G4 │ Color Oops Hair Color Remover Extra Strength 1 Each │ 3.9019607843137254 │ 51 │
│ B007VTVRFA │ SimCity - Limited Edition │ 1.2040816326530612 │ 49 │
│ B00CE18P0K │ Pilot │ 1.7142857142857142 │ 49 │
│ 0316015849 │ Twilight (The Twilight Saga, Book 1) │ 1.8979591836734695 │ 49 │
│ B00DR0PDNE │ Google Chromecast HDMI Streaming Media Player │ 2.5416666666666665 │ 48 │
│ B000056OWC │ The First Years: 4-Stage Bath System │ 1.2127659574468086 │ 47 │
│ B007IXWKUK │ Fifty Shades Darker (Fifty Shades, Book 2) │ 1.6304347826086956 │ 46 │
│ 1892112000 │ To Train Up a Child │ 1.4130434782608696 │ 46 │
│ 043935806X │ Harry Potter and the Order of the Phoenix (Book 5) │ 3.977272727272727 │ 44 │
│ B00BGO0Q9O │ Fitbit Flex Wireless Wristband with Sleep Function, Black │ 1.9318181818181819 │ 44 │
│ B003XF1XOQ │ Mockingjay (Hunger Games Trilogy, Book 3) │ 2.772727272727273 │ 44 │
│ B00DD2B52Y │ Spring Breakers │ 1.2093023255813953 │ 43 │
│ B0064X7FVE │ The Weather Channel: Forecast, Radar & Alerts │ 1.5116279069767442 │ 43 │
│ B0083PWAPW │ Kindle Fire HD 7", Dolby Audio, Dual-Band Wi-Fi │ 2.627906976744186 │ 43 │
│ B00192KCQ0 │ Death Magnetic │ 3.5714285714285716 │ 42 │
│ B007S6Y74O │ Garden of Life Raw Organic Meal │ 3.292682926829268 │ 41 │
│ B0052QYLUM │ Infant Optics DXR-5 Portable Video Baby Monitor │ 2.1463414634146343 │ 41 │
└────────────┴──────────────────────────────────────────────────────────────────────────────────────────┴────────────────────┴───────┘
50 rows in set. Elapsed: 60.052 sec. Processed 150.96 million rows, 68.93 GB (2.51 million rows/s., 1.15 GB/s.)
```
12. We can run the same query again, except this time we search for **awesome** in the reviews:
```sql
SELECT
product_id,
any(product_title),
avg(star_rating),
count() AS count
FROM amazon_reviews
WHERE position(review_body, 'awesome') > 0
GROUP BY product_id
ORDER BY count DESC
LIMIT 50;
```
It runs quite a bit faster - which means the cache is helping us out here:
```response
┌─product_id─┬─any(product_title)────────────────────────────────────────────────────┬───avg(star_rating)─┬─count─┐
│ B00992CF6W │ Minecraft │ 4.848130353039482 │ 4787 │
│ B009UX2YAC │ Subway Surfers │ 4.866720955483171 │ 3684 │
│ B00QW8TYWO │ Crossy Road │ 4.935217903415784 │ 2547 │
│ B00DJFIMW6 │ Minion Rush: Despicable Me Official Game │ 4.850450450450451 │ 2220 │
│ B00AREIAI8 │ My Horse │ 4.865313653136531 │ 2168 │
│ B00I8Q77Y0 │ Flappy Wings (not Flappy Bird) │ 4.8246561886051085 │ 2036 │
│ B0054JZC6E │ 101-in-1 Games │ 4.792542016806722 │ 1904 │
│ B00G5LQ5MU │ Escape The Titanic │ 4.724673710379117 │ 1609 │
│ B0086700CM │ Temple Run │ 4.87636130685458 │ 1561 │
│ B009HKL4B8 │ The Sims Freeplay │ 4.763942931258106 │ 1542 │
│ B00I6IKSZ0 │ Pixel Gun 3D (Pocket Edition) - multiplayer shooter with skin creator │ 4.849894291754757 │ 1419 │
│ B006OC2ANS │ BLOOD & GLORY │ 4.8561538461538465 │ 1300 │
│ B00FATEJYE │ Injustice: Gods Among Us (Kindle Tablet Edition) │ 4.789265982636149 │ 1267 │
│ B00B2V66VS │ Temple Run 2 │ 4.764705882352941 │ 1173 │
│ B00JOT3HQ2 │ Geometry Dash Lite │ 4.909747292418772 │ 1108 │
│ B00DUGCLY4 │ Guess The Emoji │ 4.813606710158434 │ 1073 │
│ B00DR0PDNE │ Google Chromecast HDMI Streaming Media Player │ 4.607276119402985 │ 1072 │
│ B00FAPF5U0 │ Candy Crush Saga │ 4.825757575757576 │ 1056 │
│ B0051VVOB2 │ Kindle Fire (Previous Generation - 1st) │ 4.600407747196738 │ 981 │
│ B007JPG04E │ FRONTLINE COMMANDO │ 4.8125 │ 912 │
│ B00PTB7B34 │ Call of Duty®: Heroes │ 4.876404494382022 │ 890 │
│ B00846GKTW │ Style Me Girl - Free 3D Fashion Dressup │ 4.785714285714286 │ 882 │
│ B004S8F7QM │ Cards Against Humanity │ 4.931034482758621 │ 754 │
│ B00FAX6XQC │ DEER HUNTER CLASSIC │ 4.700272479564033 │ 734 │
│ B00PSGW79I │ Buddyman: Kick │ 4.888736263736264 │ 728 │
│ B00CTQ6SIG │ The Simpsons: Tapped Out │ 4.793948126801153 │ 694 │
│ B008JK6W5K │ Logo Quiz │ 4.782106782106782 │ 693 │
│ B00EDTSKLU │ Geometry Dash │ 4.942028985507246 │ 690 │
│ B00CSR2J9I │ Hill Climb Racing │ 4.880059970014993 │ 667 │
│ B005ZXWMUS │ Netflix │ 4.722306525037936 │ 659 │
│ B00CRFAAYC │ Fab Tattoo Artist FREE │ 4.907435508345979 │ 659 │
│ B00DHQHQCE │ Battle Beach │ 4.863287250384024 │ 651 │
│ B00BGA9WK2 │ PlayStation 4 500GB Console [Old Model] │ 4.688751926040061 │ 649 │
│ B008Y7SMQU │ Logo Quiz - Fun Plus Free │ 4.7888 │ 625 │
│ B0083PWAPW │ Kindle Fire HD 7", Dolby Audio, Dual-Band Wi-Fi │ 4.593900481540931 │ 623 │
│ B008XG1X18 │ Pinterest │ 4.8148760330578515 │ 605 │
│ B007SYWFRM │ Ice Age Village │ 4.8566666666666665 │ 600 │
│ B00K7WGUKA │ Don't Tap The White Tile (Piano Tiles) │ 4.922689075630252 │ 595 │
│ B00BWYQ9YE │ Kindle Fire HDX 7", HDX Display (Previous Generation - 3rd) │ 4.649913344887349 │ 577 │
│ B00IZLM8MY │ High School Story │ 4.840425531914893 │ 564 │
│ B004MC8CA2 │ Bible │ 4.884476534296029 │ 554 │
│ B00KNWYDU8 │ Dragon City │ 4.861111111111111 │ 540 │
│ B009ZKSPDK │ Survivalcraft │ 4.738317757009346 │ 535 │
│ B00A4O6NMG │ My Singing Monsters │ 4.845559845559846 │ 518 │
│ B002MQYOFW │ The Hunger Games (Hunger Games Trilogy, Book 1) │ 4.846899224806202 │ 516 │
│ B005ZFOOE8 │ iHeartRadio Free Music & Internet Radio │ 4.837301587301587 │ 504 │
│ B00AIUUXHC │ Hungry Shark Evolution │ 4.846311475409836 │ 488 │
│ B00E8KLWB4 │ The Secret Society® - Hidden Mystery │ 4.669438669438669 │ 481 │
│ B006D1ONE4 │ Where's My Water? │ 4.916317991631799 │ 478 │
│ B00G6ZTM3Y │ Terraria │ 4.728421052631579 │ 475 │
└────────────┴───────────────────────────────────────────────────────────────────────┴────────────────────┴───────┘
50 rows in set. Elapsed: 33.954 sec. Processed 150.96 million rows, 68.95 GB (4.45 million rows/s., 2.03 GB/s.)
```

View File

@ -0,0 +1,172 @@
---
slug: /en/getting-started/example-datasets/environmental-sensors
sidebar_label: Environmental Sensors Data
---
# Environmental Sensors Data
[Sensor.Community](https://sensor.community/en/) is a contributors-driven global sensor network that creates Open Environmental Data. The data is collected from sensors all over the globe. Anyone can purchase a sensor and place it wherever they like. The APIs to download the data is in [GitHub](https://github.com/opendata-stuttgart/meta/wiki/APIs) and the data is freely available under the [Database Contents License (DbCL)](https://opendatacommons.org/licenses/dbcl/1-0/).
:::important
The dataset has over 20 billion records, so be careful just copying-and-pasting the commands below unless your resources can handle that type of volume. The commands below were executed on a **Production** instance of [ClickHouse Cloud](https://clickhouse.cloud).
:::
1. The data is in S3, so we can use the `s3` table function to create a table from the files. We can also query the data in place. Let's look at a few rows before attempting to insert it into ClickHouse:
```sql
SELECT *
FROM s3(
'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/sensors/monthly/2019-06_bmp180.csv.zst',
'CSVWithNames'
)
LIMIT 10
SETTINGS format_csv_delimiter = ';';
```
The data is in CSV files but uses a semi-colon for the delimiter. The rows look like:
```response
┌─sensor_id─┬─sensor_type─┬─location─┬────lat─┬────lon─┬─timestamp───────────┬──pressure─┬─altitude─┬─pressure_sealevel─┬─temperature─┐
│ 9119 │ BMP180 │ 4594 │ 50.994 │ 7.126 │ 2019-06-01T00:00:00 │ 101471 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 19.9 │
│ 21210 │ BMP180 │ 10762 │ 42.206 │ 25.326 │ 2019-06-01T00:00:00 │ 99525 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 19.3 │
│ 19660 │ BMP180 │ 9978 │ 52.434 │ 17.056 │ 2019-06-01T00:00:04 │ 101570 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 15.3 │
│ 12126 │ BMP180 │ 6126 │ 57.908 │ 16.49 │ 2019-06-01T00:00:05 │ 101802.56 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 8.07 │
│ 15845 │ BMP180 │ 8022 │ 52.498 │ 13.466 │ 2019-06-01T00:00:05 │ 101878 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 23 │
│ 16415 │ BMP180 │ 8316 │ 49.312 │ 6.744 │ 2019-06-01T00:00:06 │ 100176 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 14.7 │
│ 7389 │ BMP180 │ 3735 │ 50.136 │ 11.062 │ 2019-06-01T00:00:06 │ 98905 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 12.1 │
│ 13199 │ BMP180 │ 6664 │ 52.514 │ 13.44 │ 2019-06-01T00:00:07 │ 101855.54 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 19.74 │
│ 12753 │ BMP180 │ 6440 │ 44.616 │ 2.032 │ 2019-06-01T00:00:07 │ 99475 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 17 │
│ 16956 │ BMP180 │ 8594 │ 52.052 │ 8.354 │ 2019-06-01T00:00:08 │ 101322 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 17.2 │
└───────────┴─────────────┴──────────┴────────┴───────┴─────────────────────┴──────────┴──────────┴───────────────────┴─────────────┘
```
2. We will use the following `MergeTree` table to store the data in ClickHouse:
```sql
CREATE TABLE sensors
(
sensor_id UInt16,
sensor_type Enum('BME280', 'BMP180', 'BMP280', 'DHT22', 'DS18B20', 'HPM', 'HTU21D', 'PMS1003', 'PMS3003', 'PMS5003', 'PMS6003', 'PMS7003', 'PPD42NS', 'SDS011'),
location UInt32,
lat Float32,
lon Float32,
timestamp DateTime,
P1 Float32,
P2 Float32,
P0 Float32,
durP1 Float32,
ratioP1 Float32,
durP2 Float32,
ratioP2 Float32,
pressure Float32,
altitude Float32,
pressure_sealevel Float32,
temperature Float32,
humidity Float32,
date Date MATERIALIZED toDate(timestamp)
)
ENGINE = MergeTree
ORDER BY (timestamp, sensor_id);
```
3. ClickHouse Cloud services have a cluster named `default`. We will use the `s3Cluster` table function, which reads S3 files in parallel from the nodes in your cluster. (If you do not have a cluster, just use the `s3` function and remove the cluster name.)
This query will take a while - it's about 1.67T of data uncompressed:
```sql
INSERT INTO sensors
SELECT *
FROM s3Cluster(
'default',
'https://clickhouse-public-datasets.s3.amazonaws.com/sensors/monthly/*.csv.zst',
'CSVWithNames',
$$ sensor_id UInt16,
sensor_type String,
location UInt32,
lat Float32,
lon Float32,
timestamp DateTime,
P1 Float32,
P2 Float32,
P0 Float32,
durP1 Float32,
ratioP1 Float32,
durP2 Float32,
ratioP2 Float32,
pressure Float32,
altitude Float32,
pressure_sealevel Float32,
temperature Float32,
humidity Float32 $$
)
SETTINGS
format_csv_delimiter = ';',
input_format_allow_errors_ratio = '0.5',
input_format_allow_errors_num = 10000,
input_format_parallel_parsing = 0,
date_time_input_format = 'best_effort',
max_insert_threads = 32,
parallel_distributed_insert_select = 1;
```
Here is the response - showing the number of rows and the speed of processing. It is input at a rate of over 6M rows per second!
```response
0 rows in set. Elapsed: 3419.330 sec. Processed 20.69 billion rows, 1.67 TB (6.05 million rows/s., 488.52 MB/s.)
```
4. Let's see how much storage disk is needed for the `sensors` table:
```sql
SELECT
disk_name,
formatReadableSize(sum(data_compressed_bytes) AS size) AS compressed,
formatReadableSize(sum(data_uncompressed_bytes) AS usize) AS uncompressed,
round(usize / size, 2) AS compr_rate,
sum(rows) AS rows,
count() AS part_count
FROM system.parts
WHERE (active = 1) AND (table = 'sensors')
GROUP BY
disk_name
ORDER BY size DESC;
```
The 1.67T is compressed down to 1.30T, and there are 20.69 billion rows:
```response
┌─disk_name─┬─compressed─┬─uncompressed─┬─compr_rate─┬────────rows─┬─part_count─┐
│ s3disk │ 310.21 GiB │ 1.30 TiB │ 4.29 │ 20693971809 │ 472 │
└───────────┴────────────┴──────────────┴────────────┴─────────────┴────────────┘
```
5. Let's analyze the data now that it's in ClickHouse. Notice the quantity of data increases over time as more sensors are deployed:
```sql
SELECT
date,
count()
FROM sensors
GROUP BY date
ORDER BY date ASC;
```
We can create a chart in the SQL Console to visualize the results:
![Number of events per day](./images/sensors_01.png)
6. This query counts the number of overly hot and humid days:
```sql
WITH
toYYYYMMDD(timestamp) AS day
SELECT day, count() FROM sensors
WHERE temperature >= 40 AND temperature <= 50 AND humidity >= 90
GROUP BY day
ORDER BY day asc;
```
Here's a visualization of the result:
![Hot and humid days](./images/sensors_02.png)

Binary file not shown.

After

Width:  |  Height:  |  Size: 418 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 204 KiB

View File

@ -383,3 +383,19 @@ Data can be restored from backup using the `ALTER TABLE ... ATTACH PARTITION ...
For more information about queries related to partition manipulations, see the [ALTER documentation](../sql-reference/statements/alter/partition.md#alter_manipulations-with-partitions).
A third-party tool is available to automate this approach: [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup).
## Settings to disallow concurrent backup/restore
To disallow concurrent backup/restore, you can use these settings respectively.
```xml
<clickhouse>
<backups>
<allow_concurrent_backups>false</allow_concurrent_backups>
<allow_concurrent_restores>false</allow_concurrent_restores>
</backups>
</clickhouse>
```
The default value for both is true, so by default concurrent backup/restores are allowed.
When these settings are false on a cluster, only 1 backup/restore is allowed to run on a cluster at a time.

View File

@ -14,6 +14,10 @@ Columns:
- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query_log](../system-tables/query_log.md) system table.
- `trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A [stack trace](https://en.wikipedia.org/wiki/Stack_trace) which represents a list of physical addresses where the called methods are stored.
:::tip
Check out the Knowledge Base for some handy queries, including [how to see what threads are currently running](https://clickhouse.com/docs/knowledgebase/find-expensive-queries) and [useful queries for troubleshooting](https://clickhouse.com/docs/knowledgebase/useful-queries-for-troubleshooting).
:::
**Example**
Enabling introspection functions:

View File

@ -737,6 +737,44 @@ Result:
└────────────┴───────┘
```
## toDecimalString
Converts a numeric value to String with the number of fractional digits in the output specified by the user.
**Syntax**
``` sql
toDecimalString(number, scale)
```
**Parameters**
- `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md),
- `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
* Maximum scale for [Decimal](/docs/en/sql-reference/data-types/decimal.md) and [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal),
* Maximum scale for [Float](/docs/en/sql-reference/data-types/float.md) is 60.
**Returned value**
- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale).
The number is rounded up or down according to common arithmetics in case requested scale is smaller than original number's scale.
**Example**
Query:
``` sql
SELECT toDecimalString(CAST('64.32', 'Float64'), 5);
```
Result:
```response
┌toDecimalString(CAST('64.32', 'Float64'), 5)─┐
│ 64.32000 │
└─────────────────────────────────────────────┘
```
## reinterpretAsUInt(8\|16\|32\|64)
## reinterpretAsInt(8\|16\|32\|64)

View File

@ -0,0 +1,31 @@
#!/usr/bin/env bash
set -e
# The script to clone or update the user-guides documentation repo
# https://github.com/ClickHouse/clickhouse-docs
WORKDIR=$(dirname "$0")
WORKDIR=$(readlink -f "${WORKDIR}")
cd "$WORKDIR"
if [ -d "clickhouse-docs" ]; then
git -C clickhouse-docs pull
else
if [ -n "$1" ]; then
url_type="$1"
else
read -rp "Enter the URL type (ssh | https): " url_type
fi
case "$url_type" in
ssh)
git_url=git@github.com:ClickHouse/clickhouse-docs.git
;;
https)
git_url=https://github.com/ClickHouse/clickhouse-docs.git
;;
*)
echo "Url type must be 'ssh' or 'https'"
exit 1
;;
esac
git clone "$git_url" "clickhouse-docs"
fi

View File

@ -553,6 +553,44 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut;
└────────────┴───────┘
```
## toDecimalString
Принимает любой численный тип первым аргументом, возвращает строковое десятичное представление числа с точностью, заданной вторым аргументом.
**Синтаксис**
``` sql
toDecimalString(number, scale)
```
**Параметры**
- `number` — Значение любого числового типа: [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md), [Float](/docs/ru/sql-reference/data-types/float.md), [Decimal](/docs/ru/sql-reference/data-types/decimal.md),
- `scale` — Требуемое количество десятичных знаков после запятой, [UInt8](/docs/ru/sql-reference/data-types/int-uint.md).
* Значение `scale` для типов [Decimal](/docs/ru/sql-reference/data-types/decimal.md) и [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md) должно не превышать 77 (так как это наибольшее количество значимых символов для этих типов),
* Значение `scale` для типа [Float](/docs/ru/sql-reference/data-types/float.md) не должно превышать 60.
**Возвращаемое значение**
- Строка ([String](/docs/en/sql-reference/data-types/string.md)), представляющая собой десятичное представление входного числа с заданной длиной дробной части.
При необходимости число округляется по стандартным правилам арифметики.
**Пример использования**
Запрос:
``` sql
SELECT toDecimalString(CAST('64.32', 'Float64'), 5);
```
Результат:
```response
┌─toDecimalString(CAST('64.32', 'Float64'), 5)┐
│ 64.32000 │
└─────────────────────────────────────────────┘
```
## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264}
## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264}

View File

@ -30,7 +30,6 @@
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/Executors/PushingPipelineExecutor.h>
#include <Core/Block.h>
#include <base/StringRef.h>
#include <Common/DateLUT.h>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/WriteBufferFromFileDescriptor.h>

View File

@ -0,0 +1,354 @@
## clickhouse-obfuscator — a tool for dataset anonymization
### Installation And Usage
```
curl https://clickhouse.com/ | sh
./clickhouse obfuscator --help
```
### Example
```
./clickhouse obfuscator --seed 123 --input-format TSV --output-format TSV \
--structure 'CounterID UInt32, URLDomain String, URL String, SearchPhrase String, Title String' \
< source.tsv > result.tsv
```
### A long, long time ago...
ClickHouse users already know that its biggest advantage is its high-speed processing of analytical queries. But claims like this need to be confirmed with reliable performance testing. That's what we want to talk about today.
![benchmarks.png](https://clickhouse.com/uploads/benchmarks_24f1904cc9.png)
We started running tests in 2013, long before ClickHouse was available as open source. Back then, our main concern was data processing speed for a web analytics product. We started storing this data, which we would later store in ClickHouse, in January 2009. Part of the data had been written to a database starting in 2012, and part was converted from OLAPServer and Metrage (data structures previously used by the solution). For testing, we took the first subset at random from data for 1 billion pageviews. Our web analytics platform didn't have any queries at that point, so we came up with queries that interested us, using all the possible ways to filter, aggregate, and sort the data.
ClickHouse performance was compared with similar systems like Vertica and MonetDB. To avoid bias, testing was performed by an employee who hadn't participated in ClickHouse development, and special cases in the code were not optimized until all the results were obtained. We used the same approach to get a data set for functional testing.
After ClickHouse was released as open source in 2016, people began questioning these tests.
## Shortcomings of tests on private data
Our performance tests:
- Couldn't be reproduced independently because they used private data that can't be published. Some of the functional tests are not available to external users for the same reason.
- Needed further development. The set of tests needed to be substantially expanded in order to isolate performance changes in individual parts of the system.
- Didn't run on a per-commit basis or for individual pull requests. External developers couldn't check their code for performance regressions.
We could solve these problems by throwing out the old tests and writing new ones based on open data, like [flight data for the USA](https://clickhouse.com/docs/en/getting-started/example-datasets/ontime/) and [taxi rides in New York](https://clickhouse.com/docs/en/getting-started/example-datasets/nyc-taxi). Or we could use benchmarks like TPC-H, TPC-DS, and [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema). The disadvantage is that this data was very different from web analytics data, and we would rather keep the test queries.
### Why it's important to use real data
Performance should only be tested on real data from a production environment. Let's look at some examples.
### Example 1
Let's say you fill a database with evenly distributed pseudorandom numbers. Data compression isn't going to work in this case, although data compression is essential to analytical databases. There is no silver bullet solution to the challenge of choosing the right compression algorithm and the right way to integrate it into the system since data compression requires a compromise between the speed of compression and decompression and the potential compression efficiency. But systems that can't compress data are guaranteed losers. If your tests use evenly distributed pseudorandom numbers, this factor is ignored, and the results will be distorted.
Bottom line: Test data must have a realistic compression ratio.
### Example 2
Let's say we are interested in the execution speed of this SQL query:
```sql
SELECT RegionID, uniq(UserID) AS visitors
FROM test.hits
GROUP BY RegionID
ORDER BY visitors DESC
LIMIT 10
```
This was a typical query for web analytics product. What affects the processing speed?
- How `GROUP BY` is executed.
- Which data structure is used for calculating the `uniq` aggregate function.
- How many different RegionIDs there are and how much RAM each state of the `uniq` function requires.
But another important factor is that the amount of data is distributed unevenly between regions. (It probably follows a power law. I put the distribution on a log-log graph, but I can't say for sure.) If this is the case, the states of the `uniq` aggregate function with fewer values must use very little memory. When there are a lot of different aggregation keys, every single byte counts. How can we get generated data that has all these properties? The obvious solution is to use real data.
Many DBMSs implement the HyperLogLog data structure for an approximation of COUNT(DISTINCT), but none of them work very well because this data structure uses a fixed amount of memory. ClickHouse has a function that uses [a combination of three different data structures](https://clickhouse.com/docs/en/sql-reference/aggregate-functions/reference/uniqcombined), depending on the size of the data set.
Bottom line: Test data must represent distribution properties of the real data well enough, meaning cardinality (number of distinct values per column) and cross-column cardinality (number of different values counted across several different columns).
### Example 3
Instead of testing the performance of the ClickHouse DBMS, let's take something simpler, like hash tables. For hash tables, it's essential to choose the right hash function. This is not as important for `std::unordered_map`, because it's a hash table based on chaining, and a prime number is used as the array size. The standard library implementation in GCC and Clang uses a trivial hash function as the default hash function for numeric types. However, `std::unordered_map` is not the best choice when we are looking for maximum speed. With an open-addressing hash table, we can't just use a standard hash function. Choosing the right hash function becomes the deciding factor.
It's easy to find hash table performance tests using random data that don't take the hash functions used into account. Many hash function tests also focus on the calculation speed and certain quality criteria, even though they ignore the data structures used. But the fact is that hash tables and HyperLogLog require different hash function quality criteria.
![alexey_chat.png](https://clickhouse.com/uploads/alexey_chat_3f8db88301.png)
## Challenge
Our goal was to obtain data for testing performance that had the same structure as our web analytics data with all the properties that are important for benchmarks, but in such a way that there remain no traces of real website users in this data. In other words, the data must be anonymized and still preserve its:
* Compression ratio.
* Cardinality (the number of distinct values).
* Mutual cardinality between several different columns.
* Properties of probability distributions that can be used for data modeling (for example, if we believe that regions are distributed according to a power law, then the exponent — the distribution parameter — should be approximately the same for artificial data and for real data).
How can we get a similar compression ratio for the data? If LZ4 is used, substrings in binary data must be repeated at approximately the same distance, and the repetitions must be approximately the same length. For ZSTD, entropy per byte must also coincide.
The ultimate goal was to create a publicly available tool that anyone can use to anonymize their data sets for publication. This would allow us to debug and test performance on other people's data similar to our production data. We would also like the generated data to be interesting.
However, these are very loosely-defined requirements, and we aren't planning to write up a formal problem statement or specification for this task.
## Possible solutions
I don't want to make it sound like this problem was particularly important. It was never actually included in planning, and no one had intentions to work on it. I hoped that an idea would come up someday, and suddenly I would be in a good mood and be able to put everything else off until later.
### Explicit probabilistic models
- We want to preserve the continuity of time series data. This means that for some types of data, we need to model the difference between neighboring values rather than the value itself.
- To model "joint cardinality" of columns, we would also have to explicitly reflect dependencies between columns. For instance, there are usually very few IP addresses per user ID, so to generate an IP address, we would have to use a hash value of the user ID as a seed and add a small amount of other pseudorandom data.
- We weren't sure how to express the dependency that the same user frequently visits URLs with matching domains at approximately the same time.
All this can be written in a C++ "script" with the distributions and dependencies hard coded. However, Markov models are obtained from a combination of statistics with smoothing and adding noise. I started writing a script like this, but after writing explicit models for ten columns, it became unbearably boring — and the "hits" table in the web analytics product had more than 100 columns way back in 2012.
```c++
EventTime.day(std::discrete_distribution<>({
0, 0, 13, 30, 0, 14, 42, 5, 6, 31, 17, 0, 0, 0, 0, 23, 10, ...})(random));
EventTime.hour(std::discrete_distribution<>({
13, 7, 4, 3, 2, 3, 4, 6, 10, 16, 20, 23, 24, 23, 18, 19, 19, ...})(random));
EventTime.minute(std::uniform_int_distribution<UInt8>(0, 59)(random));
EventTime.second(std::uniform_int_distribution<UInt8>(0, 59)(random));
UInt64 UserID = hash(4, powerLaw(5000, 1.1));
UserID = UserID / 10000000000ULL * 10000000000ULL
+ static_cast<time_t>(EventTime) + UserID % 1000000;
random_with_seed.seed(powerLaw(5000, 1.1));
auto get_random_with_seed = [&]{ return random_with_seed(); };
```
Advantages:
- Conceptual simplicity.
Disadvantages:
- A large amount of work is required.
- The solution only applies to one type of data.
And I preferred a more general solution that can be used for obfuscating any dataset.
In any case, this solution could be improved. Instead of manually selecting models, we could implement a catalog of models and choose the best among them (best fit plus some form of regularization). Or maybe we could use Markov models for all types of fields, not just for text. Dependencies between data could also be extracted automatically. This would require calculating the [relative entropy](https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence) (the relative amount of information) between columns. A simpler alternative is to calculate relative cardinalities for each pair of columns (something like "how many different values of A are there on average for a fixed value B"). For instance, this will make it clear that `URLDomain` fully depends on the `URL`, and not vice versa.
But I also rejected this idea because there are too many factors to consider, and it would take too long to write.
### Neural networks
As I've already mentioned, this task wasn't high on the priority list — no one was even thinking about trying to solve it. But as luck would have it, our colleague Ivan Puzirevsky was teaching at the Higher School of Economics. He asked me if I had any interesting problems that would work as suitable thesis topics for his students. When I offered him this one, he assured me it had potential. So I handed this challenge off to a nice guy "off the street" Sharif (he did have to sign an NDA to access the data, though).
I shared all my ideas with him but emphasized that there were no restrictions on how the problem could be solved, and a good option would be to try approaches that I know nothing about, like using LSTM to generate a text dump of data. This seemed promising after coming across the article [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/).
The first challenge is that we need to generate structured data, not just text. But it wasn't clear whether a recurrent neural network could generate data with the desired structure. There are two ways to solve this. The first solution is to use separate models for generating the structure and the "filler", and only use the neural network for generating values. But this approach was postponed and then never completed. The second solution is to simply generate a TSV dump as text. Experience has shown that some of the rows in the text won't match the structure, but these rows can be thrown out when loading the data.
The second challenge is that the recurrent neural network generates a sequence of data, and thus dependencies in data must follow in the order of the sequence. But in our data, the order of columns can potentially be in reverse to dependencies between them. We didn't do anything to resolve this problem.
As summer approached, we had the first working Python script that generated data. The data quality seemed decent at first glance:
![python_script.jpg](https://clickhouse.com/uploads/python_script_810d491dfb.jpg)
However, we did run into some difficulties:
1. The size of the model was about a gigabyte. We tried to create a model for data that was several gigabytes in size (for a start). The fact that the resulting model is so large raised concerns. Would it be possible to extract the real data that it was trained on? Unlikely. But I don't know much about machine learning and neural networks, and I haven't read this developer's Python code, so how can I be sure? There were several articles published at the time about how to compress neural networks without loss of quality, but it wasn't implemented. On the one hand, this doesn't seem to be a serious problem since we can opt out of publishing the model and just publish the generated data. On the other hand, if overfitting occurs, the generated data may contain some part of the source data.
2. On a machine with a single CPU, the data generation speed is approximately 100 rows per second. Our goal was to generate at least a billion rows. Calculations showed that this wouldn't be completed before the date of the thesis defense. It didn't make sense to use additional hardware because the goal was to make a data generation tool that anyone could use.
Sharif tried to analyze the quality of data by comparing statistics. Among other things, he calculated the frequency of different characters occurring in the source data and in the generated data. The result was stunning: the most frequent characters were Ð and Ñ.
Don't worry about Sharif, though. He successfully defended his thesis, and we happily forgot about the whole thing.
### Mutation of compressed data
Let's assume that the problem statement has been reduced to a single point: we need to generate data that has the same compression ratio as the source data, and the data must decompress at the same speed. How can we achieve this? We need to edit compressed data bytes directly! This allows us to change the data without changing the size of the compressed data, plus everything will work fast. I wanted to try out this idea right away, despite the fact that the problem it solves is different from what we started with. But that's how it always is.
So how do we edit a compressed file? Let's say we are only interested in LZ4. LZ4 compressed data is composed of sequences, which in turn are strings of not-compressed bytes (literals), followed by a match copy:
1. Literals (copy the following N bytes as is).
2. Matches with a minimum repeat length of 4 (repeat N bytes in the file at a distance of M).
Source data:
`Hello world Hello.`
Compressed data (arbitrary example):
`literals 12 "Hello world " match 5 12.`
In the compressed file, we leave "match" as-is and change the byte values in "literals". As a result, after decompressing, we get a file in which all repeating sequences at least 4 bytes long are also repeated at the same distance, but they consist of a different set of bytes (basically, the modified file doesn't contain a single byte that was taken from the source file).
But how do we change the bytes? The answer isn't obvious because, in addition to the column types, the data also has its own internal, implicit structure that we would like to preserve. For example, text data is often stored in UTF-8 encoding, and we want the generated data also to be valid UTF-8. I developed a simple heuristic that involves meeting several criteria:
- Null bytes and ASCII control characters are kept as-is.
- Some punctuation characters remain as-is.
- ASCII is converted to ASCII, and for everything else, the most significant bit is preserved (or an explicit set of "if" statements is written for different UTF-8 lengths). In one byte class, a new value is picked uniformly at random.
- Fragments like `https://` are preserved; otherwise, it looks a bit silly.
The only caveat to this approach is that the data model is the source data itself, which means it cannot be published. The model is only fit for generating amounts of data no larger than the source. On the contrary, the previous approaches provide models allowing the generation of data of arbitrary size.
```
http://ljc.she/kdoqdqwpgafe/klwlpm&qw=962788775I0E7bs7OXeAyAx
http://ljc.she/kdoqdqwdffhant.am/wcpoyodjit/cbytjgeoocvdtclac
http://ljc.she/kdoqdqwpgafe/klwlpm&qw=962788775I0E7bs7OXe
http://ljc.she/kdoqdqwdffhant.am/wcpoyodjit/cbytjgeoocvdtclac
http://ljc.she/kdoqdqwdbknvj.s/hmqhpsavon.yf#aortxqdvjja
http://ljc.she/kdoqdqw-bknvj.s/hmqhpsavon.yf#aortxqdvjja
http://ljc.she/kdoqdqwpdtu-Unu-Rjanjna-bbcohu_qxht
http://ljc.she/kdoqdqw-bknvj.s/hmqhpsavon.yf#aortxqdvjja
http://ljc.she/kdoqdqwpdtu-Unu-Rjanjna-bbcohu_qxht
http://ljc.she/kdoqdqw-bknvj.s/hmqhpsavon.yf#aortxqdvjja
http://ljc.she/kdoqdqwpdtu-Unu-Rjanjna-bbcohu-702130
```
The results were positive, and the data was interesting, but something wasn't quite right. The URLs kept the same structure, but in some of them, it was too easy to recognize the original terms, such as "avito" (a popular marketplace in Russia), so I created a heuristic that swapped some of the bytes around.
There were other concerns as well. For example, sensitive information could possibly reside in a FixedString column in binary representation and potentially consist of ASCII control characters and punctuation, which I decided to preserve. However, I didn't take data types into consideration.
Another problem is that if a column stores data in the "length, value" format (this is how String columns are stored), how do I ensure that the length remains correct after the mutation? When I tried to fix this, I immediately lost interest.
### Random permutations
Unfortunately, the problem wasn't solved. We performed a few experiments, and it just got worse. The only thing left was to sit around doing nothing and surf the web randomly since the magic was gone. Luckily, I came across a page that [explained the algorithm](http://fabiensanglard.net/fizzlefade/index.php) for rendering the death of the main character in the game Wolfenstein 3D.
<img src="https://clickhouse.com/uploads/wolfenstein_bb259bd741.gif" alt="wolfenstein.gif" style="width: 764px;">
<br/>
The animation is really well done — the screen fills up with blood. The article explains that this is actually a pseudorandom permutation. A random permutation of a set of elements is a randomly picked bijective (one-to-one) transformation of the set. In other words, a mapping where each and every derived element corresponds to exactly one original element (and vice versa). In other words, it is a way to randomly iterate through all the elements of a data set. And that is exactly the process shown in the picture: each pixel is filled in random order, without any repetition. If we were to just choose a random pixel at each step, it would take a long time to get to the last one.
The game uses a very simple algorithm for pseudorandom permutation called linear feedback shift register ([LFSR](https://en.wikipedia.org/wiki/Linear-feedback_shift_register)). Similar to pseudorandom number generators, random permutations, or rather their families, can be cryptographically strong when parametrized by a key. This is exactly what we needed for our data transformation. However, the details were trickier. For example, cryptographically strong encryption of N bytes to N bytes with a pre-determined key and initialization vector seems like it would work for a pseudorandom permutation of a set of N-byte strings. Indeed, this is a one-to-one transformation, and it appears to be random. But if we use the same transformation for all of our data, the result may be susceptible to cryptoanalysis because the same initialization vector and key value are used multiple times. This is similar to the [Electronic Codebook](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#ECB) mode of operation for a block cipher.
For example, three multiplications and two xorshift operations are used for the [murmurhash](https://github.com/ClickHouse/ClickHouse/blob/master/dbms/src/Common/HashTable/Hash.h#L18) finalizer. This operation is a pseudorandom permutation. However, I should point out that hash functions don't have to be one-to-one (even hashes of N bits to N bits).
Or here's another interesting [example from elementary number theory](https://preshing.com/20121224/how-to-generate-a-sequence-of-unique-random-integers/) from Jeff Preshing's website.
How can we use pseudorandom permutations to solve our problem? We can use them to transform all numeric fields so we can preserve the cardinalities and mutual cardinalities of all combinations of fields. In other words, COUNT(DISTINCT) will return the same value as before the transformation and, furthermore, with any GROUP BY.
It is worth noting that preserving all cardinalities somewhat contradicts our goal of data anonymization. Let's say someone knows that the source data for site sessions contains a user who visited sites from 10 different countries, and they want to find that user in the transformed data. The transformed data also shows that the user visited sites from 10 different countries, which makes it easy to narrow down the search. However, even if they find out what the user was transformed into, it won't be very useful; all of the other data has also been transformed, so they won't be able to figure out what sites the user visited or anything else. But these rules can be applied in a chain. For example, suppose someone knows that the most frequently occurring website in our data is Google, with Yahoo in second place. In that case, they can use the ranking to determine which transformed site identifiers actually mean Yahoo and Google. There's nothing surprising about this since we are working with an informal problem statement, and we are trying to find a balance between the anonymization of data (hiding information) and preserving data properties (disclosure of information). For information about how to approach the data anonymization issue more reliably, read this [article](https://medium.com/georgian-impact-blog/a-brief-introduction-to-differential-privacy-eacf8722283b).
In addition to keeping the original cardinality of values, I also wanted to keep the order of magnitude of the values. What I mean is that if the source data contained numbers under 10, then I want the transformed numbers to also be small. How can we achieve this?
For example, we can divide a set of possible values into size classes and perform permutations within each class separately (maintaining the size classes). The easiest way to do this is to take the nearest power of two or the position of the most significant bit in the number as the size class (these are the same thing). The numbers 0 and 1 will always remain as is. The numbers 2 and 3 will sometimes remain as is (with a probability of 1/2) and will sometimes be swapped (with a probability of 1/2). The set of numbers 1024..2047 will be mapped to one of 1024! (factorial) variants, and so on. For signed numbers, we will keep the sign.
It's also doubtful whether we need a one-to-one function. We can probably just use a cryptographically strong hash function. The transformation won't be one-to-one, but the cardinality will be close to the same.
However, we need a cryptographically strong random permutation so that when we define a key and derive a permutation with that key, restoring the original data from the rearranged data without knowing the key would be difficult.
There is one problem: in addition to knowing nothing about neural networks and machine learning, I am also quite ignorant when it comes to cryptography. That leaves just my courage. I was still reading random web pages and found a link on [Hackers News](https://news.ycombinator.com/item?id=15122540) to a discussion on Fabien Sanglard's page. It had a link to a [blog post](http://antirez.com/news/113) by Redis developer Salvatore Sanfilippo that talked about using a wonderful generic way of getting random permutations, known as a [Feistel network](https://en.wikipedia.org/wiki/Feistel_cipher).
The Feistel network is iterative, consisting of rounds. Each round is a remarkable transformation that allows you to get a one-to-one function from any function. Let's look at how it works.
1. The argument's bits are divided into two halves:
```
arg: xxxxyyyy
arg_l: xxxx
arg_r: yyyy
```
2. The right half replaces the left. In its place, we put the result of XOR on the initial value of the left half and the result of the function applied to the initial value of the right half, like this:
```
res: yyyyzzzz
res_l = yyyy = arg_r
res_r = zzzz = arg_l ^ F(arg_r)
```
There is also a claim that if we use a cryptographically strong pseudorandom function for F and apply a Feistel round at least four times, we'll get a cryptographically strong pseudorandom permutation.
This is like a miracle: we take a function that produces random garbage based on data, insert it into the Feistel network, and we now have a function that produces random garbage based on data, but yet is invertible!
The Feistel network is at the heart of several data encryption algorithms. What we're going to do is something like encryption, only it's really bad. There are two reasons for this:
1. We are encrypting individual values independently and in the same way, similar to the Electronic Codebook mode of operation.
2. We are storing information about the order of magnitude (the nearest power of two) and the sign of the value, which means that some values do not change at all.
This way, we can obfuscate numeric fields while preserving the properties we need. For example, after using LZ4, the compression ratio should remain approximately the same because the duplicate values in the source data will be repeated in the converted data and at the same distances from each other.
### Markov models
Text models are used for data compression, predictive input, speech recognition, and random string generation. A text model is a probability distribution of all possible strings. Let's say we have an imaginary probability distribution of the texts of all the books that humanity could ever write. To generate a string, we just take a random value with this distribution and return the resulting string (a random book that humanity could write). But how do we find out the probability distribution of all possible strings?
First, this would require too much information. There are 256^10 possible strings that are 10 bytes in length, and it would take quite a lot of memory to explicitly write a table with the probability of each string. Second, we don't have enough statistics to accurately assess the distribution.
This is why we use a probability distribution obtained from rough statistics as the text model. For example, we could calculate the probability of each letter occurring in the text and then generate strings by selecting each next letter with the same probability. This primitive model works, but the strings are still very unnatural.
To improve the model slightly, we could also make use of the conditional probability of the letter's occurrence if it is preceded by N-specific letters. N is a pre-set constant. Let's say N = 5, and we are calculating the probability of the letter "e" occurring after the letters "compr". This text model is called an Order-N Markov model.
```
P(cata | cat) = 0.8
P(catb | cat) = 0.05
P(catc | cat) = 0.1
...
```
Let's look at how Markov models work on the website [of Hay Kranen](https://projects.haykranen.nl/markov/demo/). Unlike LSTM neural networks, the models only have enough memory for a small context of fixed-length N, so they generate funny nonsensical texts. Markov models are also used in primitive methods for generating spam, and the generated texts can be easily distinguished from real ones by counting statistics that don't fit the model. There is one advantage: Markov models work much faster than neural networks, which is exactly what we need.
Example for Title (our examples are in Turkish because of the data used):
<blockquote style="font-size: 15px;">
<p>Hyunday Butter'dan anket shluha — Politika head manşetleri | STALKER BOXER Çiftede book — Yanudistkarışmanlı Mı Kanal | League el Digitalika Haberler Haberleri — Haberlerisi — Hotels with Centry'ler Neden babah.com</p>
</blockquote>
We can calculate statistics from the source data, create a Markov model, and generate new data. Note that the model needs smoothing to avoid disclosing information about rare combinations in the source data, but this is not a problem. We use a combination of models from 0 to N. If statistics are insufficient for order N, the N1 model is used instead.
But we still want to preserve the cardinality of data. In other words, if the source data had 123456 unique URL values, the result should have approximately the same number of unique values. We can use a deterministically initialized random number generator to achieve this. The easiest way is to use a hash function and apply it to the original value. In other words, we get a pseudorandom result that is explicitly determined by the original value.
Another requirement is that the source data may have many different URLs that start with the same prefix but aren't identical. For example: `https://www.clickhouse.com/images/cats/?id=xxxxxx`. We want the result to also have URLs that all start with the same prefix, but a different one. For example: http://ftp.google.kz/cgi-bin/index.phtml?item=xxxxxx. As a random number generator for generating the next character using a Markov model, we'll take a hash function from a moving window of 8 bytes at the specified position (instead of taking it from the entire string).
<pre class='code-with-play'>
<div class='code'>
https://www.clickhouse.com/images/cats/?id=12345
^^^^^^^^
distribution: [aaaa][b][cc][dddd][e][ff][ggggg][h]...
hash("images/c") % total_count: ^
</div>
</pre>
It turns out to be exactly what we need. Here's the example of page titles:
<pre class='code-with-play'>
<div class='code'>
PhotoFunia - Haber7 - Have mükemment.net Oynamak içinde şaşıracak haber, Oyunu Oynanılmaz • apród.hu kínálatában - RT Arabic
PhotoFunia - Kinobar.Net - apród: Ingyenes | Posti
PhotoFunia - Peg Perfeo - Castika, Sıradışı Deniz Lokoning Your Code, sire Eminema.tv/
PhotoFunia - TUT.BY - Your Ayakkanın ve Son Dakika Spor,
PhotoFunia - big film izle, Del Meireles offilim, Samsung DealeXtreme Değerler NEWSru.com.tv, Smotri.com Mobile yapmak Okey
PhotoFunia 5 | Galaxy, gt, după ce anal bilgi yarak Ceza RE050A V-Stranç
PhotoFunia :: Miami olacaksını yerel Haberler Oyun Young video
PhotoFunia Monstelli'nin En İyi kisa.com.tr Star Thunder Ekranı
PhotoFunia Seks - Politika,Ekonomi,Spor GTA SANAYİ VE
PhotoFunia Taker-Rating Star TV Resmi Söylenen Yatağa każdy dzież wierzchnie
PhotoFunia TourIndex.Marketime oyunu Oyna Geldolları Mynet Spor,Magazin,Haberler yerel Haberleri ve Solvia, korkusuz Ev SahneTv
PhotoFunia todo in the Gratis Perky Parti'nin yapıyı by fotogram
PhotoFunian Dünyasın takımız halles en kulları - TEZ
</div>
</pre>
## Results
After trying four methods, I got so tired of this problem that it was time just to choose something, make it into a usable tool, and announce the solution. I chose the solution that uses random permutations and Markov models parametrized by a key. It is implemented as the clickhouse-obfuscator program, which is very easy to use. The input is a table dump in any supported format (such as CSV or JSONEachRow), and the command line parameters specify the table structure (column names and types) and the secret key (any string, which you can forget immediately after use). The output is the same number of rows of obfuscated data.
The program is installed with `clickhouse-client`, has no dependencies, and works on almost any flavor of Linux. You can apply it to any database dump, not just ClickHouse. For instance, you can generate test data from MySQL or PostgreSQL databases or create development databases that are similar to your production databases.
```bash
clickhouse-obfuscator \
--seed "$(head -c16 /dev/urandom | base64)" \
--input-format TSV --output-format TSV \
--structure 'CounterID UInt32, URLDomain String, \
URL String, SearchPhrase String, Title String' \
< table.tsv > result.tsv
```
```bash
clickhouse-obfuscator --help
```
Of course, everything isn't so cut and dry because data transformed by this program is almost completely reversible. The question is whether it is possible to perform the reverse transformation without knowing the key. If the transformation used a cryptographic algorithm, this operation would be as difficult as a brute-force search. Although the transformation uses some cryptographic primitives, they are not used in the correct way, and the data is susceptible to certain methods of analysis. To avoid problems, these issues are covered in the documentation for the program (access it using --help).
In the end, we transformed the data set we needed [for functional and performance testing](https://clickhouse.com/docs/en/getting-started/example-datasets/metrica/), and received approval from our data security team to publish.
Our developers and members of our community use this data for real performance testing when optimizing algorithms inside ClickHouse. Third-party users can provide us with their obfuscated data so that we can make ClickHouse even faster for them. We also released an independent open benchmark for hardware and cloud providers on top of this data: [https://benchmark.clickhouse.com/](https://benchmark.clickhouse.com/)

View File

@ -1163,6 +1163,12 @@ void ZooKeeper::setZooKeeperLog(std::shared_ptr<DB::ZooKeeperLog> zk_log_)
zk->setZooKeeperLog(zk_log);
}
void ZooKeeper::setServerCompletelyStarted()
{
if (auto * zk = dynamic_cast<Coordination::ZooKeeper *>(impl.get()))
zk->setServerCompletelyStarted();
}
size_t getFailedOpIndex(Coordination::Error exception_code, const Coordination::Responses & responses)
{

View File

@ -520,6 +520,8 @@ public:
UInt32 getSessionUptime() const { return static_cast<UInt32>(session_uptime.elapsedSeconds()); }
void setServerCompletelyStarted();
private:
friend class EphemeralNodeHolder;

View File

@ -42,6 +42,10 @@ ZooKeeperArgs::ZooKeeperArgs(const Poco::Util::AbstractConfiguration & config, c
{
connection_timeout_ms = config.getInt(config_name + "." + key);
}
else if (key == "enable_fault_injections_during_startup")
{
enable_fault_injections_during_startup = config.getBool(config_name + "." + key);
}
else if (key == "send_fault_probability")
{
send_fault_probability = config.getDouble(config_name + "." + key);
@ -50,6 +54,22 @@ ZooKeeperArgs::ZooKeeperArgs(const Poco::Util::AbstractConfiguration & config, c
{
recv_fault_probability = config.getDouble(config_name + "." + key);
}
else if (key == "send_sleep_probability")
{
send_sleep_probability = config.getDouble(config_name + "." + key);
}
else if (key == "recv_sleep_probability")
{
recv_sleep_probability = config.getDouble(config_name + "." + key);
}
else if (key == "send_sleep_ms")
{
send_sleep_ms = config.getUInt64(config_name + "." + key);
}
else if (key == "recv_sleep_ms")
{
recv_sleep_ms = config.getUInt64(config_name + "." + key);
}
else if (key == "identity")
{
identity = config.getString(config_name + "." + key);

View File

@ -28,8 +28,13 @@ struct ZooKeeperArgs
int32_t connection_timeout_ms = Coordination::DEFAULT_CONNECTION_TIMEOUT_MS;
int32_t session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS;
int32_t operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS;
bool enable_fault_injections_during_startup = false;
double send_fault_probability = 0.0;
double recv_fault_probability = 0.0;
double send_sleep_probability = 0.0;
double recv_sleep_probability = 0.0;
UInt64 send_sleep_ms = 0;
UInt64 recv_sleep_ms = 0;
DB::GetPriorityForLoadBalancing get_priority_load_balancing;
};

View File

@ -1,19 +1,21 @@
#include <Common/ZooKeeper/ZooKeeperImpl.h>
#include <IO/Operators.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context.h>
#include <base/getThreadId.h>
#include <base/sleep.h>
#include <Common/EventNotifier.h>
#include <Common/Exception.h>
#include <Common/ProfileEvents.h>
#include <Common/ZooKeeper/IKeeper.h>
#include <Common/ZooKeeper/ZooKeeperCommon.h>
#include <Common/ZooKeeper/ZooKeeperIO.h>
#include <Common/Exception.h>
#include <Common/EventNotifier.h>
#include <Common/logger_useful.h>
#include <Common/ProfileEvents.h>
#include <Common/setThreadName.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <IO/Operators.h>
#include <IO/WriteBufferFromString.h>
#include <base/getThreadId.h>
#include "Coordination/KeeperConstants.h"
#include "config.h"
@ -342,16 +344,8 @@ ZooKeeper::ZooKeeper(
default_acls.emplace_back(std::move(acl));
}
/// It makes sense (especially, for async requests) to inject a fault in two places:
/// pushRequest (before request is sent) and receiveEvent (after request was executed).
if (0 < args.send_fault_probability && args.send_fault_probability <= 1)
{
send_inject_fault.emplace(args.send_fault_probability);
}
if (0 < args.recv_fault_probability && args.recv_fault_probability <= 1)
{
recv_inject_fault.emplace(args.recv_fault_probability);
}
if (args.enable_fault_injections_during_startup)
setupFaultDistributions();
connect(nodes, args.connection_timeout_ms * 1000);
@ -571,7 +565,6 @@ void ZooKeeper::sendAuth(const String & scheme, const String & data)
static_cast<int32_t>(err), errorMessage(err));
}
void ZooKeeper::sendThread()
{
setThreadName("ZooKeeperSend");
@ -587,6 +580,8 @@ void ZooKeeper::sendThread()
auto now = clock::now();
auto next_heartbeat_time = prev_heartbeat_time + std::chrono::milliseconds(args.session_timeout_ms / 3);
maybeInjectSendSleep();
if (next_heartbeat_time > now)
{
/// Wait for the next request in queue. No more than operation timeout. No more than until next heartbeat time.
@ -659,6 +654,7 @@ void ZooKeeper::receiveThread()
Int64 waited_us = 0;
while (!requests_queue.isFinished())
{
maybeInjectRecvSleep();
auto prev_bytes_received = in->count();
clock::time_point now = clock::now();
@ -728,8 +724,7 @@ void ZooKeeper::receiveEvent()
ZooKeeperResponsePtr response;
UInt64 elapsed_ms = 0;
if (unlikely(recv_inject_fault) && recv_inject_fault.value()(thread_local_rng))
throw Exception(Error::ZSESSIONEXPIRED, "Session expired (fault injected on recv)");
maybeInjectRecvFault();
if (xid == PING_XID)
{
@ -1078,8 +1073,7 @@ void ZooKeeper::pushRequest(RequestInfo && info)
}
}
if (unlikely(send_inject_fault) && send_inject_fault.value()(thread_local_rng))
throw Exception(Error::ZSESSIONEXPIRED, "Session expired (fault injected on send)");
maybeInjectSendFault();
if (!requests_queue.tryPush(std::move(info), args.operation_timeout_ms))
{
@ -1403,4 +1397,61 @@ void ZooKeeper::logOperationIfNeeded(const ZooKeeperRequestPtr &, const ZooKeepe
{}
#endif
void ZooKeeper::setServerCompletelyStarted()
{
if (!args.enable_fault_injections_during_startup)
setupFaultDistributions();
}
void ZooKeeper::setupFaultDistributions()
{
/// It makes sense (especially, for async requests) to inject a fault in two places:
/// pushRequest (before request is sent) and receiveEvent (after request was executed).
if (0 < args.send_fault_probability && args.send_fault_probability <= 1)
{
LOG_INFO(log, "ZK send fault: {}%", args.send_fault_probability * 100);
send_inject_fault.emplace(args.send_fault_probability);
}
if (0 < args.recv_fault_probability && args.recv_fault_probability <= 1)
{
LOG_INFO(log, "ZK recv fault: {}%", args.recv_fault_probability * 100);
recv_inject_fault.emplace(args.recv_fault_probability);
}
if (0 < args.send_sleep_probability && args.send_sleep_probability <= 1)
{
LOG_INFO(log, "ZK send sleep: {}% -> {}ms", args.send_sleep_probability * 100, args.send_sleep_ms);
send_inject_sleep.emplace(args.send_sleep_probability);
}
if (0 < args.recv_sleep_probability && args.recv_sleep_probability <= 1)
{
LOG_INFO(log, "ZK recv sleep: {}% -> {}ms", args.recv_sleep_probability * 100, args.recv_sleep_ms);
recv_inject_sleep.emplace(args.recv_sleep_probability);
}
inject_setup.test_and_set();
}
void ZooKeeper::maybeInjectSendFault()
{
if (unlikely(inject_setup.test() && send_inject_fault && send_inject_fault.value()(thread_local_rng)))
throw Exception(Error::ZSESSIONEXPIRED, "Session expired (fault injected on recv)");
}
void ZooKeeper::maybeInjectRecvFault()
{
if (unlikely(inject_setup.test() && recv_inject_fault && recv_inject_fault.value()(thread_local_rng)))
throw Exception(Error::ZSESSIONEXPIRED, "Session expired (fault injected on recv)");
}
void ZooKeeper::maybeInjectSendSleep()
{
if (unlikely(inject_setup.test() && send_inject_sleep && send_inject_sleep.value()(thread_local_rng)))
sleepForMilliseconds(args.send_sleep_ms);
}
void ZooKeeper::maybeInjectRecvSleep()
{
if (unlikely(inject_setup.test() && recv_inject_sleep && recv_inject_sleep.value()(thread_local_rng)))
sleepForMilliseconds(args.recv_sleep_ms);
}
}

View File

@ -197,13 +197,24 @@ public:
void setZooKeeperLog(std::shared_ptr<DB::ZooKeeperLog> zk_log_);
void setServerCompletelyStarted();
private:
ACLs default_acls;
zkutil::ZooKeeperArgs args;
/// Fault injection
void maybeInjectSendFault();
void maybeInjectRecvFault();
void maybeInjectSendSleep();
void maybeInjectRecvSleep();
void setupFaultDistributions();
std::atomic_flag inject_setup = ATOMIC_FLAG_INIT;
std::optional<std::bernoulli_distribution> send_inject_fault;
std::optional<std::bernoulli_distribution> recv_inject_fault;
std::optional<std::bernoulli_distribution> send_inject_sleep;
std::optional<std::bernoulli_distribution> recv_inject_sleep;
Poco::Net::StreamSocket socket;
/// To avoid excessive getpeername(2) calls.

View File

@ -0,0 +1,14 @@
#include <gtest/gtest.h>
#include <base/JSON.h>
TEST(JSON, searchField)
{
const JSON json = JSON(std::string_view(R"({"k1":1,"k2":{"k3":2,"k4":3,"k":4},"k":5})"));
ASSERT_EQ(json["k1"].getUInt(), 1);
ASSERT_EQ(json["k2"].toString(), R"({"k3":2,"k4":3,"k":4})");
ASSERT_EQ(json["k2"]["k3"].getUInt(), 2);
ASSERT_EQ(json["k2"]["k4"].getUInt(), 3);
ASSERT_EQ(json["k2"]["k"].getUInt(), 4);
ASSERT_EQ(json["k"].getUInt(), 5);
}

View File

@ -69,7 +69,7 @@ T EnumValues<T>::getValue(StringRef field_name, bool try_treat_as_id) const
}
auto hints = this->getHints(field_name.toString());
auto hints_string = !hints.empty() ? ", maybe you meant: " + toString(hints) : "";
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown element '{}' for enum {}", field_name.toString(), hints_string);
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown element '{}' for enum{}", field_name.toString(), hints_string);
}
return it->getMapped();
}

View File

@ -276,6 +276,10 @@ public:
return delegate->getMetadataStorage();
}
DiskPtr getDelegateDiskIfExists() const override
{
return delegate;
}
private:
String wrappedPath(const String & path) const

View File

@ -423,6 +423,8 @@ public:
void markDiskAsCustom() { is_custom_disk = true; }
virtual DiskPtr getDelegateDiskIfExists() const { return nullptr; }
protected:
friend class DiskDecorator;

View File

@ -0,0 +1,22 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionToDecimalString.h>
#include <Functions/IFunction.h>
namespace DB
{
REGISTER_FUNCTION(ToDecimalString)
{
factory.registerFunction<FunctionToDecimalString>(
{
R"(
Returns string representation of a number. First argument is the number of any numeric type,
second argument is the desired number of digits in fractional part. Returns String.
)",
Documentation::Examples{{"toDecimalString", "SELECT toDecimalString(2.1456,2)"}},
Documentation::Categories{"String"}
}, FunctionFactory::CaseInsensitive);
}
}

View File

@ -0,0 +1,312 @@
#pragma once
#include <Core/Types.h>
#include <Core/DecimalFunctions.h>
#include <Functions/IFunction.h>
#include <Functions/FunctionHelpers.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnDecimal.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/WriteBufferFromVector.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context_fwd.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
extern const int CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER;
}
class FunctionToDecimalString : public IFunction
{
public:
static constexpr auto name = "toDecimalString";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionToDecimalString>(); }
String getName() const override { return name; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
size_t getNumberOfArguments() const override { return 2; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isNumber(*arguments[0]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal first argument for formatDecimal function: got {}, expected numeric type",
arguments[0]->getName());
if (!isUInt8(*arguments[1]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal second argument for formatDecimal function: got {}, expected UInt8",
arguments[1]->getName());
return std::make_shared<DataTypeString>();
}
bool useDefaultImplementationForConstants() const override { return true; }
private:
/// For operations with Integer/Float
template <typename FromVectorType>
void vectorConstant(const FromVectorType & vec_from, UInt8 precision,
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const
{
size_t input_rows_count = vec_from.size();
result_offsets.resize(input_rows_count);
/// Buffer is used here and in functions below because resulting size cannot be precisely anticipated,
/// and buffer resizes on-the-go. Also, .count() provided by buffer is convenient in this case.
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
for (size_t i = 0; i < input_rows_count; ++i)
{
format(vec_from[i], buf_to, precision);
result_offsets[i] = buf_to.count();
}
buf_to.finalize();
}
template <typename FirstArgVectorType>
void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector<UInt8>::Container & vec_precision,
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const
{
size_t input_rows_count = vec_from.size();
result_offsets.resize(input_rows_count);
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
for (size_t i = 0; i < input_rows_count; ++i)
{
if (vec_precision[i] > max_digits)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too many fractional digits requested, shall not be more than {}", max_digits);
format(vec_from[i], buf_to, vec_precision[i]);
result_offsets[i] = buf_to.count();
}
buf_to.finalize();
}
template <typename FirstArgType>
void constantVector(const FirstArgType & value_from, const ColumnVector<UInt8>::Container & vec_precision,
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const
{
size_t input_rows_count = vec_precision.size();
result_offsets.resize(input_rows_count);
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
for (size_t i = 0; i < input_rows_count; ++i)
{
if (vec_precision[i] > max_digits)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too many fractional digits requested, shall not be more than {}", max_digits);
format(value_from, buf_to, vec_precision[i]);
result_offsets[i] = buf_to.count();
}
buf_to.finalize();
}
/// For operations with Decimal
template <typename FirstArgVectorType>
void vectorConstant(const FirstArgVectorType & vec_from, UInt8 precision,
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const
{
/// There are no more than 77 meaning digits (as it is the max length of UInt256). So we can limit it with 77.
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
if (precision > max_digits)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too many fractional digits requested for Decimal, must not be more than {}", max_digits);
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
size_t input_rows_count = vec_from.size();
result_offsets.resize(input_rows_count);
for (size_t i = 0; i < input_rows_count; ++i)
{
writeText(vec_from[i], from_scale, buf_to, true, true, precision);
writeChar(0, buf_to);
result_offsets[i] = buf_to.count();
}
buf_to.finalize();
}
template <typename FirstArgVectorType>
void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector<UInt8>::Container & vec_precision,
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const
{
size_t input_rows_count = vec_from.size();
result_offsets.resize(input_rows_count);
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
for (size_t i = 0; i < input_rows_count; ++i)
{
if (vec_precision[i] > max_digits)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too many fractional digits requested for Decimal, must not be more than {}", max_digits);
writeText(vec_from[i], from_scale, buf_to, true, true, vec_precision[i]);
writeChar(0, buf_to);
result_offsets[i] = buf_to.count();
}
buf_to.finalize();
}
template <typename FirstArgType>
void constantVector(const FirstArgType & value_from, const ColumnVector<UInt8>::Container & vec_precision,
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const
{
size_t input_rows_count = vec_precision.size();
result_offsets.resize(input_rows_count);
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
for (size_t i = 0; i < input_rows_count; ++i)
{
if (vec_precision[i] > max_digits)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too many fractional digits requested for Decimal, must not be more than {}", max_digits);
writeText(value_from, from_scale, buf_to, true, true, vec_precision[i]);
writeChar(0, buf_to);
result_offsets[i] = buf_to.count();
}
buf_to.finalize();
}
template <is_floating_point T>
static void format(T value, DB::WriteBuffer & out, UInt8 precision)
{
/// Maximum of 60 is hard-coded in 'double-conversion/double-conversion.h' for floating point values,
/// Catch this here to give user a more reasonable error.
if (precision > 60)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too high precision requested for Float, must not be more than 60, got {}", Int8(precision));
DB::DoubleConverter<false>::BufferType buffer;
double_conversion::StringBuilder builder{buffer, sizeof(buffer)};
const auto result = DB::DoubleConverter<false>::instance().ToFixed(value, precision, &builder);
if (!result)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, "Error processing number: {}", value);
out.write(buffer, builder.position());
writeChar(0, out);
}
template <is_integer T>
static void format(T value, DB::WriteBuffer & out, UInt8 precision)
{
/// Fractional part for Integer is just trailing zeros. Let's limit it with 77 (like with Decimals).
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
if (precision > max_digits)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too many fractional digits requested, shall not be more than {}", max_digits);
writeText(value, out);
if (precision > 0) [[likely]]
{
writeChar('.', out);
for (int i = 0; i < precision; ++i)
writeChar('0', out);
writeChar(0, out);
}
}
public:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
{
switch (arguments[0].type->getTypeId())
{
case TypeIndex::UInt8: return executeType<UInt8>(arguments);
case TypeIndex::UInt16: return executeType<UInt16>(arguments);
case TypeIndex::UInt32: return executeType<UInt32>(arguments);
case TypeIndex::UInt64: return executeType<UInt64>(arguments);
case TypeIndex::UInt128: return executeType<UInt128>(arguments);
case TypeIndex::UInt256: return executeType<UInt256>(arguments);
case TypeIndex::Int8: return executeType<Int8>(arguments);
case TypeIndex::Int16: return executeType<Int16>(arguments);
case TypeIndex::Int32: return executeType<Int32>(arguments);
case TypeIndex::Int64: return executeType<Int64>(arguments);
case TypeIndex::Int128: return executeType<Int128>(arguments);
case TypeIndex::Int256: return executeType<Int256>(arguments);
case TypeIndex::Float32: return executeType<Float32>(arguments);
case TypeIndex::Float64: return executeType<Float64>(arguments);
case TypeIndex::Decimal32: return executeType<Decimal32>(arguments);
case TypeIndex::Decimal64: return executeType<Decimal64>(arguments);
case TypeIndex::Decimal128: return executeType<Decimal128>(arguments);
case TypeIndex::Decimal256: return executeType<Decimal256>(arguments);
default:
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
arguments[0].column->getName(), getName());
}
}
private:
template <typename T>
ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const
{
const auto * from_col_const = typeid_cast<const ColumnConst *>(arguments[0].column.get());
const auto * precision_col = checkAndGetColumn<ColumnVector<UInt8>>(arguments[1].column.get());
const auto * precision_col_const = typeid_cast<const ColumnConst *>(arguments[1].column.get());
auto result_col = ColumnString::create();
auto * result_col_string = assert_cast<ColumnString *>(result_col.get());
ColumnString::Chars & result_chars = result_col_string->getChars();
ColumnString::Offsets & result_offsets = result_col_string->getOffsets();
if constexpr (is_decimal<T>)
{
const auto * from_col = checkAndGetColumn<ColumnDecimal<T>>(arguments[0].column.get());
UInt8 from_scale = from_col->getScale();
if (from_col)
{
if (precision_col_const)
vectorConstant(from_col->getData(), precision_col_const->template getValue<UInt8>(), result_chars, result_offsets, from_scale);
else
vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, from_scale);
}
else if (from_col_const)
constantVector(from_col_const->template getValue<T>(), precision_col->getData(), result_chars, result_offsets, from_scale);
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName());
}
else
{
const auto * from_col = checkAndGetColumn<ColumnVector<T>>(arguments[0].column.get());
if (from_col)
{
if (precision_col_const)
vectorConstant(from_col->getData(), precision_col_const->template getValue<UInt8>(), result_chars, result_offsets);
else
vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets);
}
else if (from_col_const)
constantVector(from_col_const->template getValue<T>(), precision_col->getData(), result_chars, result_offsets);
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName());
}
return result_col;
}
};
}

View File

@ -30,7 +30,7 @@
# include <openssl/sha.h>
#endif
#include <Poco/ByteOrder.h>
#include <bit>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeString.h>
@ -150,6 +150,13 @@ struct IntHash64Impl
template<typename T, typename HashFunction>
T combineHashesFunc(T t1, T t2)
{
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
T tmp;
reverseMemcpy(&tmp, &t1, sizeof(T));
t1 = tmp;
reverseMemcpy(&tmp, &t2, sizeof(T));
t2 = tmp;
#endif
T hashes[] = {t1, t2};
return HashFunction::apply(reinterpret_cast<const char *>(hashes), 2 * sizeof(T));
}
@ -177,12 +184,16 @@ struct HalfMD5Impl
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return buf.uint64_data; /// No need to flip bytes on big endian machines
#else
return Poco::ByteOrder::flipBytes(static_cast<Poco::UInt64>(buf.uint64_data)); /// Compatibility with existing code. Cast need for old poco AND macos where UInt64 != uint64_t
return std::byteswap(buf.uint64_data); /// Compatibility with existing code. Cast need for old poco AND macos where UInt64 != uint64_t
#endif
}
static UInt64 combineHashes(UInt64 h1, UInt64 h2)
{
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
h1 = std::byteswap(h1);
h2 = std::byteswap(h2);
#endif
UInt64 hashes[] = {h1, h2};
return apply(reinterpret_cast<const char *>(hashes), 16);
}
@ -322,6 +333,10 @@ struct SipHash64KeyedImpl
static UInt64 combineHashesKeyed(const Key & key, UInt64 h1, UInt64 h2)
{
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
h1 = std::byteswap(h1);
h2 = std::byteswap(h2);
#endif
UInt64 hashes[] = {h1, h2};
return applyKeyed(key, reinterpret_cast<const char *>(hashes), 2 * sizeof(UInt64));
}
@ -360,6 +375,13 @@ struct SipHash128KeyedImpl
static UInt128 combineHashesKeyed(const Key & key, UInt128 h1, UInt128 h2)
{
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
UInt128 tmp;
reverseMemcpy(&tmp, &h1, sizeof(UInt128));
h1 = tmp;
reverseMemcpy(&tmp, &h2, sizeof(UInt128));
h2 = tmp;
#endif
UInt128 hashes[] = {h1, h2};
return applyKeyed(key, reinterpret_cast<const char *>(hashes), 2 * sizeof(UInt128));
}
@ -395,6 +417,13 @@ struct SipHash128ReferenceKeyedImpl
static UInt128 combineHashesKeyed(const Key & key, UInt128 h1, UInt128 h2)
{
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
UInt128 tmp;
reverseMemcpy(&tmp, &h1, sizeof(UInt128));
h1 = tmp;
reverseMemcpy(&tmp, &h2, sizeof(UInt128));
h2 = tmp;
#endif
UInt128 hashes[] = {h1, h2};
return applyKeyed(key, reinterpret_cast<const char *>(hashes), 2 * sizeof(UInt128));
}

View File

@ -180,9 +180,14 @@ public:
size_t offset = 0;
for (size_t i = 0; i < size; ++i)
{
memcpy(&vec_res[i],
&data_from[offset],
std::min(static_cast<UInt64>(sizeof(ToFieldType)), offsets_from[i] - offset - 1));
if constexpr (std::endian::native == std::endian::little)
memcpy(&vec_res[i],
&data_from[offset],
std::min(static_cast<UInt64>(sizeof(ToFieldType)), offsets_from[i] - offset - 1));
else
reverseMemcpy(&vec_res[i],
&data_from[offset],
std::min(static_cast<UInt64>(sizeof(ToFieldType)), offsets_from[i] - offset - 1));
offset = offsets_from[i];
}

View File

@ -891,26 +891,26 @@ inline void writeText(const IPv4 & x, WriteBuffer & buf) { writeIPv4Text(x, buf)
inline void writeText(const IPv6 & x, WriteBuffer & buf) { writeIPv6Text(x, buf); }
template <typename T>
void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros)
void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros,
bool fixed_fractional_length, UInt32 fractional_length)
{
/// If it's big integer, but the number of digits is small,
/// use the implementation for smaller integers for more efficient arithmetic.
if constexpr (std::is_same_v<T, Int256>)
{
if (x <= std::numeric_limits<UInt32>::max())
{
writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros);
writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
return;
}
else if (x <= std::numeric_limits<UInt64>::max())
{
writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros);
writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
return;
}
else if (x <= std::numeric_limits<UInt128>::max())
{
writeDecimalFractional(static_cast<UInt128>(x), scale, ostr, trailing_zeros);
writeDecimalFractional(static_cast<UInt128>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
return;
}
}
@ -918,24 +918,36 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool
{
if (x <= std::numeric_limits<UInt32>::max())
{
writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros);
writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
return;
}
else if (x <= std::numeric_limits<UInt64>::max())
{
writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros);
writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
return;
}
}
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
assert(scale <= max_digits);
assert(fractional_length <= max_digits);
char buf[max_digits];
memset(buf, '0', scale);
memset(buf, '0', std::max(scale, fractional_length));
T value = x;
Int32 last_nonzero_pos = 0;
for (Int32 pos = scale - 1; pos >= 0; --pos)
if (fixed_fractional_length && fractional_length < scale)
{
T new_value = value / DecimalUtils::scaleMultiplier<Int256>(scale - fractional_length - 1);
auto round_carry = new_value % 10;
value = new_value / 10;
if (round_carry >= 5)
value += 1;
}
for (Int32 pos = fixed_fractional_length ? std::min(scale - 1, fractional_length - 1) : scale - 1; pos >= 0; --pos)
{
auto remainder = value % 10;
value /= 10;
@ -947,11 +959,12 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool
}
writeChar('.', ostr);
ostr.write(buf, trailing_zeros ? scale : last_nonzero_pos + 1);
ostr.write(buf, fixed_fractional_length ? fractional_length : (trailing_zeros ? scale : last_nonzero_pos + 1));
}
template <typename T>
void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros)
void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros,
bool fixed_fractional_length = false, UInt32 fractional_length = 0)
{
T part = DecimalUtils::getWholePart(x, scale);
@ -962,7 +975,7 @@ void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer
writeIntText(part, ostr);
if (scale)
if (scale || (fixed_fractional_length && fractional_length > 0))
{
part = DecimalUtils::getFractionalPart(x, scale);
if (part || trailing_zeros)
@ -970,7 +983,7 @@ void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer
if (part < 0)
part *= T(-1);
writeDecimalFractional(part, scale, ostr, trailing_zeros);
writeDecimalFractional(part, scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
}
}
}

View File

@ -2588,8 +2588,12 @@ void Context::resetZooKeeper() const
shared->zookeeper.reset();
}
static void reloadZooKeeperIfChangedImpl(const ConfigurationPtr & config, const std::string & config_name, zkutil::ZooKeeperPtr & zk,
std::shared_ptr<ZooKeeperLog> zk_log)
static void reloadZooKeeperIfChangedImpl(
const ConfigurationPtr & config,
const std::string & config_name,
zkutil::ZooKeeperPtr & zk,
std::shared_ptr<ZooKeeperLog> zk_log,
bool server_started)
{
if (!zk || zk->configChanged(*config, config_name))
{
@ -2597,18 +2601,22 @@ static void reloadZooKeeperIfChangedImpl(const ConfigurationPtr & config, const
zk->finalize("Config changed");
zk = std::make_shared<zkutil::ZooKeeper>(*config, config_name, std::move(zk_log));
if (server_started)
zk->setServerCompletelyStarted();
}
}
void Context::reloadZooKeeperIfChanged(const ConfigurationPtr & config) const
{
bool server_started = isServerCompletelyStarted();
std::lock_guard lock(shared->zookeeper_mutex);
shared->zookeeper_config = config;
reloadZooKeeperIfChangedImpl(config, "zookeeper", shared->zookeeper, getZooKeeperLog());
reloadZooKeeperIfChangedImpl(config, "zookeeper", shared->zookeeper, getZooKeeperLog(), server_started);
}
void Context::reloadAuxiliaryZooKeepersConfigIfChanged(const ConfigurationPtr & config)
{
bool server_started = isServerCompletelyStarted();
std::lock_guard lock(shared->auxiliary_zookeepers_mutex);
shared->auxiliary_zookeepers_config = config;
@ -2619,7 +2627,7 @@ void Context::reloadAuxiliaryZooKeepersConfigIfChanged(const ConfigurationPtr &
it = shared->auxiliary_zookeepers.erase(it);
else
{
reloadZooKeeperIfChangedImpl(config, "auxiliary_zookeepers." + it->first, it->second, getZooKeeperLog());
reloadZooKeeperIfChangedImpl(config, "auxiliary_zookeepers." + it->first, it->second, getZooKeeperLog(), server_started);
++it;
}
}
@ -3695,6 +3703,15 @@ bool Context::isServerCompletelyStarted() const
void Context::setServerCompletelyStarted()
{
{
std::lock_guard lock(shared->zookeeper_mutex);
if (shared->zookeeper)
shared->zookeeper->setServerCompletelyStarted();
for (auto & zk : shared->auxiliary_zookeepers)
zk.second->setServerCompletelyStarted();
}
auto lock = getLock();
assert(global_context.lock().get() == this);
assert(!shared->is_server_completely_started);

View File

@ -386,7 +386,9 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF
const auto & nested_names = tuple_type.getElementNames();
std::vector<SerializeFn> nested_serializers;
nested_serializers.reserve(nested_types.size());
auto schema = avro::RecordSchema(column_name);
/// We should use unique names for records. Otherwise avro will reuse schema of this record later
/// for all records with the same name.
auto schema = avro::RecordSchema(column_name + "_" + std::to_string(type_name_increment));
for (size_t i = 0; i != nested_types.size(); ++i)
{
auto nested_mapping = createSchemaWithSerializeFn(nested_types[i], type_name_increment, nested_names[i]);

View File

@ -1517,24 +1517,32 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
{
/// Check extra parts at different disks, in order to not allow to miss data parts at undefined disks.
std::unordered_set<String> defined_disk_names;
/// If disk is wrapped into cached disk, it will not be defined in storage policy.
std::unordered_set<String> disk_names_wrapped_in_cache;
for (const auto & disk_ptr : disks)
defined_disk_names.insert(disk_ptr->getName());
for (const auto & [disk_name, disk_ptr] : getContext()->getDisksMap())
{
/// In composable cache with the underlying source disk there might the following structure:
/// DiskObjectStorage(CachedObjectStorage(...(CachedObjectStored(ObjectStorage)...)))
/// In configuration file each of these layers has a different name, but data path
/// (getPath() result) is the same. We need to take it into account here.
if (disk_ptr->supportsCache() && defined_disk_names.contains(disk_ptr->getName()))
defined_disk_names.insert(disk_ptr->getName());
}
/// In case of delegate disks it is not enough to traverse `disks`,
/// because for example cache or encrypted disk which wrap s3 disk and s3 disk itself can be put into different storage policies.
/// But disk->exists returns the same thing for both disks.
for (const auto & [disk_name, disk] : getContext()->getDisksMap())
{
/// As encrypted disk can use the same path of its nested disk,
/// we need to take it into account here.
const auto & delegate = disk->getDelegateDiskIfExists();
if (delegate && disk->getPath() == delegate->getPath())
defined_disk_names.insert(delegate->getName());
if (disk->supportsCache())
{
auto caches = disk_ptr->getCacheLayersNames();
disk_names_wrapped_in_cache.insert(caches.begin(), caches.end());
LOG_TEST(log, "Cache layers for cache disk `{}`, inner disk `{}`: {}",
disk_name, disk_ptr->getName(), fmt::join(caches, ", "));
/// As cache is implemented on object storage layer, not on disk level, e.g.
/// we have such structure:
/// DiskObjectStorage(CachedObjectStorage(...(CachedObjectStored(ObjectStorage)...)))
/// and disk_ptr->getName() here is the name of last delegate - ObjectStorage.
/// So now we need to add cache layers to defined disk names.
auto caches = disk->getCacheLayersNames();
defined_disk_names.insert(caches.begin(), caches.end());
}
}
@ -1543,9 +1551,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
if (disk->isBroken() || disk->isCustomDisk())
continue;
if (!defined_disk_names.contains(disk_name)
&& disk->exists(relative_data_path)
&& !disk_names_wrapped_in_cache.contains(disk_name))
if (!defined_disk_names.contains(disk_name) && disk->exists(relative_data_path))
{
for (const auto it = disk->iterateDirectory(relative_data_path); it->isValid(); it->next())
{
@ -1553,9 +1559,8 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
{
throw Exception(
ErrorCodes::UNKNOWN_DISK,
"Part {} ({}) was found on disk {} which is not defined in the storage policy (defined disks: {}, wrapped disks: {})",
backQuote(it->name()), backQuote(it->path()), backQuote(disk_name),
fmt::join(defined_disk_names, ", "), fmt::join(disk_names_wrapped_in_cache, ", "));
"Part {} ({}) was found on disk {} which is not defined in the storage policy (defined disks: {})",
backQuote(it->name()), backQuote(it->path()), backQuote(disk_name), fmt::join(defined_disk_names, ", "));
}
}
}

View File

@ -393,7 +393,14 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
if (static_cast<size_t>(best_partition_it->second.min_age) >= data_settings->min_age_to_force_merge_seconds)
return selectAllPartsToMergeWithinPartition(
future_part, can_merge_callback, best_partition_it->first, true, metadata_snapshot, txn, out_disable_reason);
future_part,
can_merge_callback,
best_partition_it->first,
/*final=*/true,
metadata_snapshot,
txn,
out_disable_reason,
/*optimize_skip_merged_partitions=*/true);
}
if (out_disable_reason)

View File

@ -322,14 +322,10 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
return rpn_stack[0].can_be_true;
}
bool MergeTreeConditionFullText::getKey(const std::string & key_column_name, size_t & key_column_num)
std::optional<size_t> MergeTreeConditionFullText::getKeyIndex(const std::string & key_column_name)
{
auto it = std::find(index_columns.begin(), index_columns.end(), key_column_name);
if (it == index_columns.end())
return false;
key_column_num = static_cast<size_t>(it - index_columns.begin());
return true;
const auto it = std::ranges::find(index_columns, key_column_name);
return it == index_columns.end() ? std::nullopt : std::make_optional<size_t>(std::ranges::distance(index_columns.cbegin(), it));
}
bool MergeTreeConditionFullText::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out)
@ -389,7 +385,7 @@ bool MergeTreeConditionFullText::extractAtomFromTree(const RPNBuilderTreeNode &
function_name == "mapContains" ||
function_name == "like" ||
function_name == "notLike" ||
function_name == "hasToken" ||
function_name.starts_with("hasToken") ||
function_name == "startsWith" ||
function_name == "endsWith" ||
function_name == "multiSearchAny")
@ -426,10 +422,9 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
Field const_value = value_field;
auto column_name = key_node.getColumnName();
size_t key_column_num = 0;
bool key_exists = getKey(column_name, key_column_num);
bool map_key_exists = getKey(fmt::format("mapKeys({})", column_name), key_column_num);
const auto column_name = key_node.getColumnName();
auto key_index = getKeyIndex(column_name);
const auto map_key_index = getKeyIndex(fmt::format("mapKeys({})", column_name));
if (key_node.isFunction())
{
@ -450,24 +445,14 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
auto first_argument = key_function_node.getArgumentAt(0);
const auto map_column_name = first_argument.getColumnName();
size_t map_keys_key_column_num = 0;
auto map_keys_index_column_name = fmt::format("mapKeys({})", map_column_name);
bool map_keys_exists = getKey(map_keys_index_column_name, map_keys_key_column_num);
size_t map_values_key_column_num = 0;
auto map_values_index_column_name = fmt::format("mapValues({})", map_column_name);
bool map_values_exists = getKey(map_values_index_column_name, map_values_key_column_num);
if (map_keys_exists)
if (const auto map_keys_index = getKeyIndex(fmt::format("mapKeys({})", map_column_name)))
{
auto second_argument = key_function_node.getArgumentAt(1);
DataTypePtr const_type;
if (second_argument.tryGetConstant(const_value, const_type))
{
key_column_num = map_keys_key_column_num;
key_exists = true;
key_index = map_keys_index;
auto const_data_type = WhichDataType(const_type);
if (!const_data_type.isStringOrFixedString() && !const_data_type.isArray())
@ -478,10 +463,9 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
return false;
}
}
else if (map_values_exists)
else if (const auto map_values_exists = getKeyIndex(fmt::format("mapValues({})", map_column_name)))
{
key_column_num = map_values_key_column_num;
key_exists = true;
key_index = map_values_exists;
}
else
{
@ -490,12 +474,29 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
}
}
if (!key_exists && !map_key_exists)
const auto lowercase_key_index = getKeyIndex(fmt::format("lower({})", column_name));
const auto is_has_token_case_insensitive = function_name.starts_with("hasTokenCaseInsensitive");
if (const auto is_case_insensitive_scenario = is_has_token_case_insensitive && lowercase_key_index;
function_name.starts_with("hasToken") && ((!is_has_token_case_insensitive && key_index) || is_case_insensitive_scenario))
{
out.key_column = is_case_insensitive_scenario ? *lowercase_key_index : *key_index;
out.function = RPNElement::FUNCTION_EQUALS;
out.bloom_filter = std::make_unique<BloomFilter>(params);
auto value = const_value.get<String>();
if (is_case_insensitive_scenario)
std::ranges::transform(value, value.begin(), [](const auto & c) { return static_cast<char>(std::tolower(c)); });
token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter);
return true;
}
if (!key_index && !map_key_index)
return false;
if (map_key_exists && (function_name == "has" || function_name == "mapContains"))
if (map_key_index && (function_name == "has" || function_name == "mapContains"))
{
out.key_column = key_column_num;
out.key_column = *key_index;
out.function = RPNElement::FUNCTION_HAS;
out.bloom_filter = std::make_unique<BloomFilter>(params);
auto & value = const_value.get<String>();
@ -504,7 +505,7 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
}
else if (function_name == "has")
{
out.key_column = key_column_num;
out.key_column = *key_index;
out.function = RPNElement::FUNCTION_HAS;
out.bloom_filter = std::make_unique<BloomFilter>(params);
auto & value = const_value.get<String>();
@ -514,7 +515,7 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
if (function_name == "notEquals")
{
out.key_column = key_column_num;
out.key_column = *key_index;
out.function = RPNElement::FUNCTION_NOT_EQUALS;
out.bloom_filter = std::make_unique<BloomFilter>(params);
const auto & value = const_value.get<String>();
@ -523,7 +524,7 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
}
else if (function_name == "equals")
{
out.key_column = key_column_num;
out.key_column = *key_index;
out.function = RPNElement::FUNCTION_EQUALS;
out.bloom_filter = std::make_unique<BloomFilter>(params);
const auto & value = const_value.get<String>();
@ -532,7 +533,7 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
}
else if (function_name == "like")
{
out.key_column = key_column_num;
out.key_column = *key_index;
out.function = RPNElement::FUNCTION_EQUALS;
out.bloom_filter = std::make_unique<BloomFilter>(params);
const auto & value = const_value.get<String>();
@ -541,25 +542,16 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
}
else if (function_name == "notLike")
{
out.key_column = key_column_num;
out.key_column = *key_index;
out.function = RPNElement::FUNCTION_NOT_EQUALS;
out.bloom_filter = std::make_unique<BloomFilter>(params);
const auto & value = const_value.get<String>();
token_extractor->stringLikeToBloomFilter(value.data(), value.size(), *out.bloom_filter);
return true;
}
else if (function_name == "hasToken")
{
out.key_column = key_column_num;
out.function = RPNElement::FUNCTION_EQUALS;
out.bloom_filter = std::make_unique<BloomFilter>(params);
const auto & value = const_value.get<String>();
token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter);
return true;
}
else if (function_name == "startsWith")
{
out.key_column = key_column_num;
out.key_column = *key_index;
out.function = RPNElement::FUNCTION_EQUALS;
out.bloom_filter = std::make_unique<BloomFilter>(params);
const auto & value = const_value.get<String>();
@ -568,7 +560,7 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
}
else if (function_name == "endsWith")
{
out.key_column = key_column_num;
out.key_column = *key_index;
out.function = RPNElement::FUNCTION_EQUALS;
out.bloom_filter = std::make_unique<BloomFilter>(params);
const auto & value = const_value.get<String>();
@ -577,7 +569,7 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
}
else if (function_name == "multiSearchAny")
{
out.key_column = key_column_num;
out.key_column = *key_index;
out.function = RPNElement::FUNCTION_MULTI_SEARCH;
/// 2d vector is not needed here but is used because already exists for FUNCTION_IN
@ -616,22 +608,17 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter(
for (size_t i = 0; i < left_argument_function_node_arguments_size; ++i)
{
size_t key = 0;
if (getKey(left_argument_function_node.getArgumentAt(i).getColumnName(), key))
if (const auto key = getKeyIndex(left_argument_function_node.getArgumentAt(i).getColumnName()))
{
key_tuple_mapping.emplace_back(i, key);
data_types.push_back(index_data_types[key]);
key_tuple_mapping.emplace_back(i, *key);
data_types.push_back(index_data_types[*key]);
}
}
}
else
else if (const auto key = getKeyIndex(left_argument.getColumnName()))
{
size_t key = 0;
if (getKey(left_argument.getColumnName(), key))
{
key_tuple_mapping.emplace_back(0, key);
data_types.push_back(index_data_types[key]);
}
key_tuple_mapping.emplace_back(0, *key);
data_types.push_back(index_data_types[*key]);
}
if (key_tuple_mapping.empty())

View File

@ -131,7 +131,7 @@ private:
const Field & value_field,
RPNElement & out);
bool getKey(const std::string & key_column_name, size_t & key_column_num);
std::optional<size_t> getKeyIndex(const std::string & key_column_name);
bool tryPrepareSetBloomFilter(const RPNBuilderTreeNode & left_argument, const RPNBuilderTreeNode & right_argument, RPNElement & out);
static bool createFunctionEqualsCondition(

View File

@ -426,6 +426,7 @@ bool MergeTreeConditionInverted::traverseAtomAST(const RPNBuilderTreeNode & node
function_name == "like" ||
function_name == "notLike" ||
function_name == "hasToken" ||
function_name == "hasTokenOrNull" ||
function_name == "startsWith" ||
function_name == "endsWith" ||
function_name == "multiSearchAny")
@ -568,7 +569,7 @@ bool MergeTreeConditionInverted::traverseASTEquals(
token_extractor->stringLikeToGinFilter(value.data(), value.size(), *out.gin_filter);
return true;
}
else if (function_name == "hasToken")
else if (function_name == "hasToken" || function_name == "hasTokenOrNull")
{
out.key_column = key_column_num;
out.function = RPNElement::FUNCTION_EQUALS;

View File

@ -417,7 +417,15 @@ void PostgreSQLReplicationHandler::consumerFunc()
{
assertInitialized();
bool schedule_now = getConsumer()->consume();
bool schedule_now = true;
try
{
schedule_now = getConsumer()->consume();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
if (stop_synchronization)
{

View File

@ -12,7 +12,11 @@
In other words, session will expire 4 times per 99996 successful requests
or approximately each 25000 requests (on average).
-->
<enable_fault_injections_during_startup>0</enable_fault_injections_during_startup>
<send_fault_probability>0.00002</send_fault_probability>
<recv_fault_probability>0.00002</recv_fault_probability>
<send_sleep_probability>0.00001</send_sleep_probability>
<send_sleep_ms>10000</send_sleep_ms>
</zookeeper>
</clickhouse>

View File

@ -14,9 +14,24 @@
<disk_s3_encrypted>
<type>encrypted</type>
<disk>disk_s3</disk>
<path>encrypted/</path>
<key>1234567812345678</key>
<path>encrypted/</path>
</disk_s3_encrypted>
<disk_s3_encrypted_default_path>
<type>encrypted</type>
<disk>disk_s3</disk>
<key>1234567812345678</key>
</disk_s3_encrypted_default_path>
<s3_cache>
<disk>disk_s3</disk>
<path>s3_cache/</path>
<max_size>1Gi</max_size>
</s3_cache>
<encrypted_s3_cache>
<type>encrypted</type>
<disk>s3_cache</disk>
<key>1234567812345678</key>
</encrypted_s3_cache>
<disk_local_encrypted>
<type>encrypted</type>
<disk>disk_local</disk>
@ -74,6 +89,20 @@
</external>
</volumes>
</s3_policy>
<s3_encrypted_default_path>
<volumes>
<main>
<disk>disk_s3_encrypted_default_path</disk>
</main>
</volumes>
</s3_encrypted_default_path>_
<s3_encrypted_cache_policy>
<volumes>
<main>
<disk>encrypted_s3_cache</disk>
</main>
</volumes>
</s3_encrypted_cache_policy>
</policies>
</storage_configuration>
</clickhouse>

View File

@ -12,6 +12,7 @@ node = cluster.add_instance(
main_configs=["configs/storage.xml"],
tmpfs=["/disk:size=100M"],
with_minio=True,
stay_alive=True,
)
@ -269,3 +270,28 @@ def test_read_in_order():
node.query(
"SELECT * FROM encrypted_test ORDER BY a, b SETTINGS optimize_read_in_order=0 FORMAT Null"
)
def test_restart():
for policy in ["disk_s3_encrypted_default_path", "encrypted_s3_cache"]:
node.query(
f"""
DROP TABLE IF EXISTS encrypted_test;
CREATE TABLE encrypted_test (
id Int64,
data String
) ENGINE=MergeTree()
ORDER BY id
SETTINGS disk='{policy}'
"""
)
node.query("INSERT INTO encrypted_test VALUES (0,'data'),(1,'data')")
select_query = "SELECT * FROM encrypted_test ORDER BY id FORMAT Values"
assert node.query(select_query) == "(0,'data'),(1,'data')"
node.restart_clickhouse()
assert node.query(select_query) == "(0,'data'),(1,'data')"
node.query("DROP TABLE encrypted_test NO DELAY;")

View File

@ -1,11 +1,11 @@
12940785793559895259
17926972817233444501
7456555839952096623
CC45107CC4B79F62D831BEF2103C7CBF
DF2EC2F0669B000EDFF6ADEE264E7D68
4CD1C30C38AB935D418B5269EF197B9E
9D78134EE48654D753CCA1B76185CF8E
389D16428D2AADEC9713905572F42864
1
1
1
1
1
955237314186186656
8175794665478042155
9325786087413524176
@ -18,8 +18,8 @@ DF2EC2F0669B000EDFF6ADEE264E7D68
8163029322371165472
8788309436660676487
236561483980029756
8DD5527CC43D76F4760D26BE0F641F7E
F8F7AD9B6CD4CF117A71E277E2EC2931
1
1
12384823029245979431
4507350192761038840
1188926775431157506

View File

@ -4,11 +4,11 @@ SELECT sipHash64(1, 2, 3);
SELECT sipHash64(1, 3, 2);
SELECT sipHash64(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2))));
SELECT hex(sipHash128('foo'));
SELECT hex(sipHash128('\x01'));
SELECT hex(sipHash128('foo', 'foo'));
SELECT hex(sipHash128('foo', 'foo', 'foo'));
SELECT hex(sipHash128(1, 2, 3));
SELECT hex(sipHash128('foo')) = hex(reverse(unhex('CC45107CC4B79F62D831BEF2103C7CBF'))) or hex(sipHash128('foo')) = 'CC45107CC4B79F62D831BEF2103C7CBF';
SELECT hex(sipHash128('\x01')) = hex(reverse(unhex('DF2EC2F0669B000EDFF6ADEE264E7D68'))) or hex(sipHash128('\x01')) = 'DF2EC2F0669B000EDFF6ADEE264E7D68';
SELECT hex(sipHash128('foo', 'foo')) = hex(reverse(unhex('4CD1C30C38AB935D418B5269EF197B9E'))) or hex(sipHash128('foo', 'foo')) = '4CD1C30C38AB935D418B5269EF197B9E';
SELECT hex(sipHash128('foo', 'foo', 'foo')) = hex(reverse(unhex('9D78134EE48654D753CCA1B76185CF8E'))) or hex(sipHash128('foo', 'foo', 'foo')) = '9D78134EE48654D753CCA1B76185CF8E';
SELECT hex(sipHash128(1, 2, 3)) = hex(reverse(unhex('389D16428D2AADEC9713905572F42864'))) or hex(sipHash128(1, 2, 3)) = '389D16428D2AADEC9713905572F42864';
SELECT halfMD5(1, 2, 3);
SELECT halfMD5(1, 3, 2);
@ -26,8 +26,8 @@ SELECT murmurHash3_64(1, 2, 3);
SELECT murmurHash3_64(1, 3, 2);
SELECT murmurHash3_64(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2))));
SELECT hex(murmurHash3_128('foo', 'foo'));
SELECT hex(murmurHash3_128('foo', 'foo', 'foo'));
SELECT hex(murmurHash3_128('foo', 'foo')) = hex(reverse(unhex('8DD5527CC43D76F4760D26BE0F641F7E'))) or hex(murmurHash3_128('foo', 'foo')) = '8DD5527CC43D76F4760D26BE0F641F7E';
SELECT hex(murmurHash3_128('foo', 'foo', 'foo')) = hex(reverse(unhex('F8F7AD9B6CD4CF117A71E277E2EC2931'))) or hex(murmurHash3_128('foo', 'foo', 'foo')) = 'F8F7AD9B6CD4CF117A71E277E2EC2931';
SELECT gccMurmurHash(1, 2, 3);
SELECT gccMurmurHash(1, 3, 2);

View File

@ -2,6 +2,12 @@
0
2007
2007
0
2007
2007
2007
2007
2007
2007
0
2007

View File

@ -18,15 +18,35 @@ SELECT max(id) FROM bloom_filter WHERE hasTokenCaseInsensitive(s, 'abc,def,zzz')
SELECT max(id) FROM bloom_filter WHERE hasTokenOrNull(s, 'abc,def,zzz');
SELECT max(id) FROM bloom_filter WHERE hasTokenCaseInsensitiveOrNull(s, 'abc,def,zzz');
select max(id) from bloom_filter where hasTokenCaseInsensitive(s, 'ABC');
select max(id) from bloom_filter where hasTokenCaseInsensitive(s, 'zZz');
-- as table "bloom_filter" but w/o index_granularity_bytes
drop table if exists bloom_filter2;
create table bloom_filter2
(
id UInt64,
s String,
index tok_bf3 (s, lower(s)) type tokenbf_v1(512, 3, 0) GRANULARITY 1
) engine = MergeTree() order by id settings index_granularity = 8;
insert into bloom_filter2 select number, 'yyy,uuu' from numbers(1024);
insert into bloom_filter2 select number+2000, 'ABC,def,zzz' from numbers(8);
insert into bloom_filter2 select number+3000, 'yyy,uuu' from numbers(1024);
insert into bloom_filter2 select number+3000, 'abcdefzzz' from numbers(1024);
set max_rows_to_read = 16;
SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'abc');
SELECT max(id) FROM bloom_filter WHERE hasTokenOrNull(s, 'abc');
SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'ABC');
select max(id) from bloom_filter where hasTokenCaseInsensitive(s, 'ABC');
select max(id) from bloom_filter where hasTokenCaseInsensitiveOrNull(s, 'ABC');
SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'def');
SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'zzz');
select max(id) from bloom_filter where hasTokenCaseInsensitive(s, 'zZz');
select max(id) from bloom_filter2 where hasToken(s, 'ABC');
select max(id) from bloom_filter2 where hasToken(s, 'abc');
select max(id) from bloom_filter2 where hasTokenCaseInsensitive(s, 'abc');
select max(id) from bloom_filter2 where hasTokenCaseInsensitive(s, 'ABC');
-- invert result
-- this does not work as expected, reading more rows that it should

View File

@ -4,7 +4,4 @@ With merge any part range
1
With merge partition only
1
With merge replicated any part range
1
With merge replicated partition only
1

View File

@ -24,7 +24,6 @@ wait_for_number_of_parts() {
$CLICKHOUSE_CLIENT -nmq "
DROP TABLE IF EXISTS test_without_merge;
DROP TABLE IF EXISTS test_with_merge;
DROP TABLE IF EXISTS test_replicated;
SELECT 'Without merge';
@ -62,30 +61,7 @@ INSERT INTO test_with_merge SELECT 3;"
wait_for_number_of_parts 'test_with_merge' 1 100
$CLICKHOUSE_CLIENT -nmq "
DROP TABLE test_with_merge;
SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; -- Sleep for 9 seconds and verify that we keep the old part because it's the only one
SELECT (now() - modification_time) > 5 FROM system.parts WHERE database = currentDatabase() AND table='test_with_merge' AND active;
SELECT 'With merge replicated any part range';
CREATE TABLE test_replicated (i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test02473', 'node') ORDER BY i
SETTINGS min_age_to_force_merge_seconds=1, merge_selecting_sleep_ms=1000, min_age_to_force_merge_on_partition_only=false;
INSERT INTO test_replicated SELECT 1;
INSERT INTO test_replicated SELECT 2;
INSERT INTO test_replicated SELECT 3;"
wait_for_number_of_parts 'test_replicated' 1 100
$CLICKHOUSE_CLIENT -nmq "
DROP TABLE test_replicated;
SELECT 'With merge replicated partition only';
CREATE TABLE test_replicated (i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test02473_partition_only', 'node') ORDER BY i
SETTINGS min_age_to_force_merge_seconds=1, merge_selecting_sleep_ms=1000, min_age_to_force_merge_on_partition_only=true;
INSERT INTO test_replicated SELECT 1;
INSERT INTO test_replicated SELECT 2;
INSERT INTO test_replicated SELECT 3;"
wait_for_number_of_parts 'test_replicated' 1 100
$CLICKHOUSE_CLIENT -nmq "
DROP TABLE test_replicated;"
DROP TABLE test_with_merge;"

View File

@ -0,0 +1 @@
((1,2)) ((3,4,5))

View File

@ -0,0 +1,8 @@
#!/usr/bin/env bash
# Tags: no-fasttest
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_LOCAL -q "select tuple(tuple(1, 2))::Tuple(x Tuple(a UInt32, b UInt32)) as c1, tuple(tuple(3, 4, 5))::Tuple(x Tuple(c UInt32, d UInt32, e UInt32)) as c2 format Avro" | $CLICKHOUSE_LOCAL --input-format Avro --structure 'c1 Tuple(x Tuple(a UInt32, b UInt32)), c2 Tuple(x Tuple(c UInt32, d UInt32, e UInt32))' -q "select * from table"

View File

@ -0,0 +1,7 @@
Without merge
3
With merge replicated any part range
1
With merge replicated partition only
1
1

View File

@ -0,0 +1,67 @@
#!/usr/bin/env bash
# Tags: long
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
# Wait for number of parts in table $1 to become $2.
# Print the changed value. If no changes for $3 seconds, prints initial value.
wait_for_number_of_parts() {
for _ in `seq $3`
do
sleep 1
res=`$CLICKHOUSE_CLIENT -q "SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='$1' AND active"`
if [ "$res" -eq "$2" ]
then
echo "$res"
return
fi
done
echo "$res"
}
$CLICKHOUSE_CLIENT -nmq "
DROP TABLE IF EXISTS test_without_merge;
DROP TABLE IF EXISTS test_replicated;
SELECT 'Without merge';
CREATE TABLE test_without_merge (i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test02676_without_merge', 'node') ORDER BY i SETTINGS merge_selecting_sleep_ms=1000;
INSERT INTO test_without_merge SELECT 1;
INSERT INTO test_without_merge SELECT 2;
INSERT INTO test_without_merge SELECT 3;"
wait_for_number_of_parts 'test_without_merge' 1 10
$CLICKHOUSE_CLIENT -nmq "
DROP TABLE test_without_merge;
SELECT 'With merge replicated any part range';
CREATE TABLE test_replicated (i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test02676', 'node') ORDER BY i
SETTINGS min_age_to_force_merge_seconds=1, merge_selecting_sleep_ms=1000, min_age_to_force_merge_on_partition_only=false;
INSERT INTO test_replicated SELECT 1;
INSERT INTO test_replicated SELECT 2;
INSERT INTO test_replicated SELECT 3;"
wait_for_number_of_parts 'test_replicated' 1 100
$CLICKHOUSE_CLIENT -nmq "
DROP TABLE test_replicated;
SELECT 'With merge replicated partition only';
CREATE TABLE test_replicated (i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test02676_partition_only', 'node') ORDER BY i
SETTINGS min_age_to_force_merge_seconds=1, merge_selecting_sleep_ms=1000, min_age_to_force_merge_on_partition_only=true;
INSERT INTO test_replicated SELECT 1;
INSERT INTO test_replicated SELECT 2;
INSERT INTO test_replicated SELECT 3;"
wait_for_number_of_parts 'test_replicated' 1 100
$CLICKHOUSE_CLIENT -nmq "
SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; -- Sleep for 9 seconds and verify that we keep the old part because it's the only one
SELECT (now() - modification_time) > 5 FROM system.parts WHERE database = currentDatabase() AND table='test_replicated' AND active;
DROP TABLE test_replicated;"

View File

@ -0,0 +1,21 @@
2.00000000000000000000000000000000000000000000000000000000000000000000000000000
2.12
-2.00000000000000000000000000000000000000000000000000000000000000000000000000000
-2.12
2.987600000000000033395508580724708735942840576171875000000000
2.15
-2.987600000000000033395508580724708735942840576171875000000000
-2.15
64.1230010986
64.2340000000
-64.1230010986
-64.2340000000
-32.345
32.34500000000000000000000000000000000000000000000000000000000000000000000000000
32.46
-64.5671232345
128.78932312332132985464
-128.78932312332132985464
128.78932312332132985464000000000000000000000000000000000000000000000000000000000
128.7893231233
-128.78932312332132985464123123789323123321329854600000000000000000000000000000000

View File

@ -0,0 +1,35 @@
-- Regular types
SELECT toDecimalString(2, 77); -- more digits required than exist
SELECT toDecimalString(2.123456, 2); -- rounding
SELECT toDecimalString(-2, 77); -- more digits required than exist
SELECT toDecimalString(-2.123456, 2); -- rounding
SELECT toDecimalString(2.9876, 60); -- more digits required than exist (took 60 as it is float by default)
SELECT toDecimalString(2.1456, 2); -- rounding
SELECT toDecimalString(-2.9876, 60); -- more digits required than exist
SELECT toDecimalString(-2.1456, 2); -- rounding
-- Float32 and Float64 tests. No sense to test big float precision -- the result will be a mess anyway.
SELECT toDecimalString(64.123::Float32, 10);
SELECT toDecimalString(64.234::Float64, 10);
SELECT toDecimalString(-64.123::Float32, 10);
SELECT toDecimalString(-64.234::Float64, 10);
-- Decimals
SELECT toDecimalString(-32.345::Decimal32(3), 3);
SELECT toDecimalString(32.345::Decimal32(3), 77); -- more digits required than exist
SELECT toDecimalString(32.456::Decimal32(3), 2); -- rounding
SELECT toDecimalString('-64.5671232345'::Decimal64(10), 10);
SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 20);
SELECT toDecimalString('-128.78932312332132985464123123'::Decimal128(26), 20); -- rounding
SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 77); -- more digits required than exist
SELECT toDecimalString('128.789323123321329854641231237893231233213298546'::Decimal256(45), 10); -- rounding
SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 77); -- more digits required than exist
-- Max number of decimal fractional digits is defined as 77 for Int/UInt/Decimal and 60 for Float.
-- These values shall work OK.
SELECT toDecimalString('32.32'::Float32, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
SELECT toDecimalString('64.64'::Float64, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
SELECT toDecimalString('88'::UInt8, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
SELECT toDecimalString('646464'::Int256, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}

View File

@ -0,0 +1 @@
SELECT a, sum(b) FROM (SELECT 1 AS a, 1 AS b, 0 AS c) GROUP BY a HAVING c SETTINGS allow_experimental_analyzer=1 -- { serverError NOT_AN_AGGREGATE }

View File

@ -1 +0,0 @@
SELECT a, sum(b) FROM (SELECT 1 AS a, 1 AS b, 0 AS c) GROUP BY a HAVING c SETTINGS allow_experimental_analyzer=1 -- {{ serverError NOT_AN_AGGREGATE}}

View File

@ -10,7 +10,7 @@ TESTS_PATH=$(dirname ${BASH_SOURCE[0]})
set -ue
# shellcheck disable=SC2010
LAST_TEST_NO=$(ls -1 ${TESTS_PATH} | grep -P -o '^\d+' | sort -nr | head -1)
LAST_TEST_NO=$(ls -1 ${TESTS_PATH} | grep -E -o '^[0-9]+' | sort -nr | head -1)
# remove leading zeros, increment and add padding zeros to 5 digits
NEW_TEST_NO=$(printf "%05d\n" $((10#$LAST_TEST_NO + 1)))

View File

@ -1,41 +0,0 @@
-- Tags: no-tsan
DROP TABLE IF EXISTS mixed_granularity_table;
CREATE TABLE mixed_granularity_table (`WatchID` UInt64, `JavaEnable` UInt8, `Title` String, `GoodEvent` Int16, `EventTime` DateTime, `EventDate` Date, `CounterID` UInt32, `ClientIP` UInt32, `ClientIP6` FixedString(16), `RegionID` UInt32, `UserID` UInt64, `CounterClass` Int8, `OS` UInt8, `UserAgent` UInt8, `URL` String, `Referer` String, `URLDomain` String, `RefererDomain` String, `Refresh` UInt8, `IsRobot` UInt8, `RefererCategories` Array(UInt16), `URLCategories` Array(UInt16), `URLRegions` Array(UInt32), `RefererRegions` Array(UInt32), `ResolutionWidth` UInt16, `ResolutionHeight` UInt16, `ResolutionDepth` UInt8, `FlashMajor` UInt8, `FlashMinor` UInt8, `FlashMinor2` String, `NetMajor` UInt8, `NetMinor` UInt8, `UserAgentMajor` UInt16, `UserAgentMinor` FixedString(2), `CookieEnable` UInt8, `JavascriptEnable` UInt8, `IsMobile` UInt8, `MobilePhone` UInt8, `MobilePhoneModel` String, `Params` String, `IPNetworkID` UInt32, `TraficSourceID` Int8, `SearchEngineID` UInt16, `SearchPhrase` String, `AdvEngineID` UInt8, `IsArtifical` UInt8, `WindowClientWidth` UInt16, `WindowClientHeight` UInt16, `ClientTimeZone` Int16, `ClientEventTime` DateTime, `SilverlightVersion1` UInt8, `SilverlightVersion2` UInt8, `SilverlightVersion3` UInt32, `SilverlightVersion4` UInt16, `PageCharset` String, `CodeVersion` UInt32, `IsLink` UInt8, `IsDownload` UInt8, `IsNotBounce` UInt8, `FUniqID` UInt64, `HID` UInt32, `IsOldCounter` UInt8, `IsEvent` UInt8, `IsParameter` UInt8, `DontCountHits` UInt8, `WithHash` UInt8, `HitColor` FixedString(1), `UTCEventTime` DateTime, `Age` UInt8, `Sex` UInt8, `Income` UInt8, `Interests` UInt16, `Robotness` UInt8, `GeneralInterests` Array(UInt16), `RemoteIP` UInt32, `RemoteIP6` FixedString(16), `WindowName` Int32, `OpenerName` Int32, `HistoryLength` Int16, `BrowserLanguage` FixedString(2), `BrowserCountry` FixedString(2), `SocialNetwork` String, `SocialAction` String, `HTTPError` UInt16, `SendTiming` Int32, `DNSTiming` Int32, `ConnectTiming` Int32, `ResponseStartTiming` Int32, `ResponseEndTiming` Int32, `FetchTiming` Int32, `RedirectTiming` Int32, `DOMInteractiveTiming` Int32, `DOMContentLoadedTiming` Int32, `DOMCompleteTiming` Int32, `LoadEventStartTiming` Int32, `LoadEventEndTiming` Int32, `NSToDOMContentLoadedTiming` Int32, `FirstPaintTiming` Int32, `RedirectCount` Int8, `SocialSourceNetworkID` UInt8, `SocialSourcePage` String, `ParamPrice` Int64, `ParamOrderID` String, `ParamCurrency` FixedString(3), `ParamCurrencyID` UInt16, `GoalsReached` Array(UInt32), `OpenstatServiceName` String, `OpenstatCampaignID` String, `OpenstatAdID` String, `OpenstatSourceID` String, `UTMSource` String, `UTMMedium` String, `UTMCampaign` String, `UTMContent` String, `UTMTerm` String, `FromTag` String, `HasGCLID` UInt8, `RefererHash` UInt64, `URLHash` UInt64, `CLID` UInt32, `YCLID` UInt64, `ShareService` String, `ShareURL` String, `ShareTitle` String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `IslandID` FixedString(16), `RequestNum` UInt32, `RequestTry` UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192, enable_mixed_granularity_parts=1; -- same with hits, but enabled mixed granularity
INSERT INTO mixed_granularity_table SELECT * FROM test.hits LIMIT 10;
ALTER TABLE mixed_granularity_table REPLACE PARTITION 201403 FROM test.hits;
SELECT COUNT() FROM mixed_granularity_table;
INSERT INTO mixed_granularity_table SELECT * FROM test.hits LIMIT 1;
SELECT COUNT() FROM mixed_granularity_table;
OPTIMIZE TABLE mixed_granularity_table FINAL;
SELECT COUNT() FROM mixed_granularity_table;
CREATE TABLE non_mixed_granularity_non_adaptive_table (`WatchID` UInt64, `JavaEnable` UInt8, `Title` String, `GoodEvent` Int16, `EventTime` DateTime, `EventDate` Date, `CounterID` UInt32, `ClientIP` UInt32, `ClientIP6` FixedString(16), `RegionID` UInt32, `UserID` UInt64, `CounterClass` Int8, `OS` UInt8, `UserAgent` UInt8, `URL` String, `Referer` String, `URLDomain` String, `RefererDomain` String, `Refresh` UInt8, `IsRobot` UInt8, `RefererCategories` Array(UInt16), `URLCategories` Array(UInt16), `URLRegions` Array(UInt32), `RefererRegions` Array(UInt32), `ResolutionWidth` UInt16, `ResolutionHeight` UInt16, `ResolutionDepth` UInt8, `FlashMajor` UInt8, `FlashMinor` UInt8, `FlashMinor2` String, `NetMajor` UInt8, `NetMinor` UInt8, `UserAgentMajor` UInt16, `UserAgentMinor` FixedString(2), `CookieEnable` UInt8, `JavascriptEnable` UInt8, `IsMobile` UInt8, `MobilePhone` UInt8, `MobilePhoneModel` String, `Params` String, `IPNetworkID` UInt32, `TraficSourceID` Int8, `SearchEngineID` UInt16, `SearchPhrase` String, `AdvEngineID` UInt8, `IsArtifical` UInt8, `WindowClientWidth` UInt16, `WindowClientHeight` UInt16, `ClientTimeZone` Int16, `ClientEventTime` DateTime, `SilverlightVersion1` UInt8, `SilverlightVersion2` UInt8, `SilverlightVersion3` UInt32, `SilverlightVersion4` UInt16, `PageCharset` String, `CodeVersion` UInt32, `IsLink` UInt8, `IsDownload` UInt8, `IsNotBounce` UInt8, `FUniqID` UInt64, `HID` UInt32, `IsOldCounter` UInt8, `IsEvent` UInt8, `IsParameter` UInt8, `DontCountHits` UInt8, `WithHash` UInt8, `HitColor` FixedString(1), `UTCEventTime` DateTime, `Age` UInt8, `Sex` UInt8, `Income` UInt8, `Interests` UInt16, `Robotness` UInt8, `GeneralInterests` Array(UInt16), `RemoteIP` UInt32, `RemoteIP6` FixedString(16), `WindowName` Int32, `OpenerName` Int32, `HistoryLength` Int16, `BrowserLanguage` FixedString(2), `BrowserCountry` FixedString(2), `SocialNetwork` String, `SocialAction` String, `HTTPError` UInt16, `SendTiming` Int32, `DNSTiming` Int32, `ConnectTiming` Int32, `ResponseStartTiming` Int32, `ResponseEndTiming` Int32, `FetchTiming` Int32, `RedirectTiming` Int32, `DOMInteractiveTiming` Int32, `DOMContentLoadedTiming` Int32, `DOMCompleteTiming` Int32, `LoadEventStartTiming` Int32, `LoadEventEndTiming` Int32, `NSToDOMContentLoadedTiming` Int32, `FirstPaintTiming` Int32, `RedirectCount` Int8, `SocialSourceNetworkID` UInt8, `SocialSourcePage` String, `ParamPrice` Int64, `ParamOrderID` String, `ParamCurrency` FixedString(3), `ParamCurrencyID` UInt16, `GoalsReached` Array(UInt32), `OpenstatServiceName` String, `OpenstatCampaignID` String, `OpenstatAdID` String, `OpenstatSourceID` String, `UTMSource` String, `UTMMedium` String, `UTMCampaign` String, `UTMContent` String, `UTMTerm` String, `FromTag` String, `HasGCLID` UInt8, `RefererHash` UInt64, `URLHash` UInt64, `CLID` UInt32, `YCLID` UInt64, `ShareService` String, `ShareURL` String, `ShareTitle` String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `IslandID` FixedString(16), `RequestNum` UInt32, `RequestTry` UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192, index_granularity_bytes=0; -- same with hits, but enabled mixed granularity and fixed_granularity
INSERT INTO non_mixed_granularity_non_adaptive_table SELECT * FROM test.hits LIMIT 10;
-- after optimize mixed_granularity_table will have .mrk2 parts
ALTER TABLE non_mixed_granularity_non_adaptive_table REPLACE PARTITION 201403 FROM mixed_granularity_table; -- { serverError 36 }
DROP TABLE IF EXISTS non_mixed_granularity_non_adaptive_table;
DROP TABLE IF EXISTS mixed_granularity_strictly_non_adaptive_table;
CREATE TABLE mixed_granularity_strictly_non_adaptive_table (`WatchID` UInt64, `JavaEnable` UInt8, `Title` String, `GoodEvent` Int16, `EventTime` DateTime, `EventDate` Date, `CounterID` UInt32, `ClientIP` UInt32, `ClientIP6` FixedString(16), `RegionID` UInt32, `UserID` UInt64, `CounterClass` Int8, `OS` UInt8, `UserAgent` UInt8, `URL` String, `Referer` String, `URLDomain` String, `RefererDomain` String, `Refresh` UInt8, `IsRobot` UInt8, `RefererCategories` Array(UInt16), `URLCategories` Array(UInt16), `URLRegions` Array(UInt32), `RefererRegions` Array(UInt32), `ResolutionWidth` UInt16, `ResolutionHeight` UInt16, `ResolutionDepth` UInt8, `FlashMajor` UInt8, `FlashMinor` UInt8, `FlashMinor2` String, `NetMajor` UInt8, `NetMinor` UInt8, `UserAgentMajor` UInt16, `UserAgentMinor` FixedString(2), `CookieEnable` UInt8, `JavascriptEnable` UInt8, `IsMobile` UInt8, `MobilePhone` UInt8, `MobilePhoneModel` String, `Params` String, `IPNetworkID` UInt32, `TraficSourceID` Int8, `SearchEngineID` UInt16, `SearchPhrase` String, `AdvEngineID` UInt8, `IsArtifical` UInt8, `WindowClientWidth` UInt16, `WindowClientHeight` UInt16, `ClientTimeZone` Int16, `ClientEventTime` DateTime, `SilverlightVersion1` UInt8, `SilverlightVersion2` UInt8, `SilverlightVersion3` UInt32, `SilverlightVersion4` UInt16, `PageCharset` String, `CodeVersion` UInt32, `IsLink` UInt8, `IsDownload` UInt8, `IsNotBounce` UInt8, `FUniqID` UInt64, `HID` UInt32, `IsOldCounter` UInt8, `IsEvent` UInt8, `IsParameter` UInt8, `DontCountHits` UInt8, `WithHash` UInt8, `HitColor` FixedString(1), `UTCEventTime` DateTime, `Age` UInt8, `Sex` UInt8, `Income` UInt8, `Interests` UInt16, `Robotness` UInt8, `GeneralInterests` Array(UInt16), `RemoteIP` UInt32, `RemoteIP6` FixedString(16), `WindowName` Int32, `OpenerName` Int32, `HistoryLength` Int16, `BrowserLanguage` FixedString(2), `BrowserCountry` FixedString(2), `SocialNetwork` String, `SocialAction` String, `HTTPError` UInt16, `SendTiming` Int32, `DNSTiming` Int32, `ConnectTiming` Int32, `ResponseStartTiming` Int32, `ResponseEndTiming` Int32, `FetchTiming` Int32, `RedirectTiming` Int32, `DOMInteractiveTiming` Int32, `DOMContentLoadedTiming` Int32, `DOMCompleteTiming` Int32, `LoadEventStartTiming` Int32, `LoadEventEndTiming` Int32, `NSToDOMContentLoadedTiming` Int32, `FirstPaintTiming` Int32, `RedirectCount` Int8, `SocialSourceNetworkID` UInt8, `SocialSourcePage` String, `ParamPrice` Int64, `ParamOrderID` String, `ParamCurrency` FixedString(3), `ParamCurrencyID` UInt16, `GoalsReached` Array(UInt32), `OpenstatServiceName` String, `OpenstatCampaignID` String, `OpenstatAdID` String, `OpenstatSourceID` String, `UTMSource` String, `UTMMedium` String, `UTMCampaign` String, `UTMContent` String, `UTMTerm` String, `FromTag` String, `HasGCLID` UInt8, `RefererHash` UInt64, `URLHash` UInt64, `CLID` UInt32, `YCLID` UInt64, `ShareService` String, `ShareURL` String, `ShareTitle` String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `IslandID` FixedString(16), `RequestNum` UInt32, `RequestTry` UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192, enable_mixed_granularity_parts=1, index_granularity_bytes=0; -- same with hits, but enabled mixed granularity and fixed_granularity
INSERT INTO mixed_granularity_strictly_non_adaptive_table SELECT * FROM test.hits LIMIT 10;
ALTER TABLE mixed_granularity_strictly_non_adaptive_table REPLACE PARTITION 201403 FROM mixed_granularity_table; -- { serverError 36 }
DROP TABLE IF EXISTS mixed_granularity_table;
DROP TABLE IF EXISTS mixed_granularity_strictly_non_adaptive_table;

View File

@ -165,7 +165,7 @@ find $ROOT_PATH/tests/queries -iname '*fail*' |
grep . && echo 'Tests should not be named with "fail" in their names. It makes looking at the results less convenient when you search for "fail" substring in browser.'
# Queries to system.query_log/system.query_thread_log should have current_database = currentDatabase() condition
# NOTE: it is not that accuate, but at least something.
# NOTE: it is not that accurate, but at least something.
tests_with_query_log=( $(
find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
grep -vP $EXCLUDE_DIRS |
@ -177,6 +177,17 @@ for test_case in "${tests_with_query_log[@]}"; do
} || echo "Queries to system.query_log/system.query_thread_log does not have current_database = currentDatabase() condition in $test_case"
done
# There shouldn't be large jumps between test numbers (since they should be consecutive)
max_diff=$(
find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
grep -oP '\d+\D+\K\d+' | sort -n -u | awk 's{print ($0-s) " diff " s " and " $0 }{s=$0}' | sort -n | tail -n 1
)
max_diff_value=( $(echo $max_diff) )
if [[ $max_diff_value -ge 100 ]];
then
echo "Too big of a difference between test numbers: $max_diff"
fi
# Queries to:
tables_with_database_column=(
system.tables