Merge remote-tracking branch 'upstream/master' into HEAD

This commit is contained in:
Anton Popov 2021-09-28 15:03:27 +03:00
commit 83fd853169
317 changed files with 6982 additions and 3539 deletions

View File

@ -1,5 +1,3 @@
I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en
Changelog category (leave one):
- New Feature
- Improvement

View File

@ -1,7 +1,7 @@
sudo apt-get install apt-transport-https ca-certificates dirmngr
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4
echo "deb https://repo.clickhouse.tech/deb/stable/ main/" | sudo tee \
echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee \
/etc/apt/sources.list.d/clickhouse.list
sudo apt-get update

View File

@ -1,6 +1,6 @@
sudo yum install yum-utils
sudo rpm --import https://repo.clickhouse.tech/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.tech/rpm/clickhouse.repo
sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.repo
sudo yum install clickhouse-server clickhouse-client
sudo /etc/init.d/clickhouse-server start

View File

@ -1,9 +1,9 @@
export LATEST_VERSION=$(curl -s https://repo.clickhouse.tech/tgz/stable/ | \
export LATEST_VERSION=$(curl -s https://repo.clickhouse.com/tgz/stable/ | \
grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1)
curl -O https://repo.clickhouse.tech/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz
tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh

View File

@ -13,16 +13,16 @@ The list of documented datasets:
- [GitHub Events](../../getting-started/example-datasets/github-events.md)
- [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
- [Recipes](../../getting-started/example-datasets/recipes.md)
- [OnTime](../../getting-started/example-datasets/ontime.md)
- [OpenSky](../../getting-started/example-datasets/opensky.md)
- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md)
- [UK Property Price Paid](../../getting-started/example-datasets/uk-price-paid.md)
- [What's on the Menu?](../../getting-started/example-datasets/menus.md)
- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
- [WikiStat](../../getting-started/example-datasets/wikistat.md)
- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
- [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md)
- [Brown University Benchmark](../../getting-started/example-datasets/brown-benchmark.md)
- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md)
- [OpenSky](../../getting-started/example-datasets/opensky.md)
- [UK Property Price Paid](../../getting-started/example-datasets/uk-price-paid.md)
- [Cell Towers](../../getting-started/example-datasets/cell-towers.md)
- [What's on the Menu?](../../getting-started/example-datasets/menus.md)
- [OnTime](../../getting-started/example-datasets/ontime.md)
[Original article](https://clickhouse.com/docs/en/getting_started/example_datasets) <!--hide-->

View File

@ -3,7 +3,7 @@ toc_priority: 20
toc_title: OpenSky
---
# Crowdsourced air traffic data from The OpenSky Network 2020
# Crowdsourced air traffic data from The OpenSky Network 2020 {#opensky}
"The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic. It spans all flights seen by the network's more than 2500 members since 1 January 2019. More data will be periodically included in the dataset until the end of the COVID-19 pandemic".
@ -14,17 +14,19 @@ Martin Strohmeier, Xavier Olive, Jannis Lübbe, Matthias Schäfer, and Vincent L
Earth System Science Data 13(2), 2021
https://doi.org/10.5194/essd-13-357-2021
## Download the Dataset
## Download the Dataset {#download-dataset}
```
Run the command:
```bash
wget -O- https://zenodo.org/record/5092942 | grep -oP 'https://zenodo.org/record/5092942/files/flightlist_\d+_\d+\.csv\.gz' | xargs wget
```
Download will take about 2 minutes with good internet connection. There are 30 files with total size of 4.3 GB.
## Create the Table
## Create the Table {#create-table}
```
```sql
CREATE TABLE opensky
(
callsign String,
@ -46,69 +48,101 @@ CREATE TABLE opensky
) ENGINE = MergeTree ORDER BY (origin, destination, callsign);
```
## Import Data
## Import Data {#import-data}
Upload data into ClickHouse in parallel:
```
ls -1 flightlist_*.csv.gz | xargs -P100 -I{} bash -c '
gzip -c -d "{}" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"'
```bash
ls -1 flightlist_*.csv.gz | xargs -P100 -I{} bash -c 'gzip -c -d "{}" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"'
```
Here we pass the list of files (`ls -1 flightlist_*.csv.gz`) to `xargs` for parallel processing.
- Here we pass the list of files (`ls -1 flightlist_*.csv.gz`) to `xargs` for parallel processing.
`xargs -P100` specifies to use up to 100 parallel workers but as we only have 30 files, the number of workers will be only 30.
- For every file, `xargs` will run a script with `bash -c`. The script has substitution in form of `{}` and the `xargs` command will substitute the filename to it (we have asked it for `xargs` with `-I{}`).
- The script will decompress the file (`gzip -c -d "{}"`) to standard output (`-c` parameter) and the output is redirected to `clickhouse-client`.
- We also asked to parse [DateTime](../../sql-reference/data-types/datetime.md) fields with extended parser ([--date_time_input_format best_effort](../../operations/settings/settings.md#settings-date_time_input_format)) to recognize ISO-8601 format with timezone offsets.
For every file, `xargs` will run a script with `bash -c`. The script has substitution in form of `{}` and the `xargs` command will substitute the filename to it (we have asked it for xargs with `-I{}`).
The script will decompress the file (`gzip -c -d "{}"`) to standard output (`-c` parameter) and the output is redirected to `clickhouse-client`.
Finally, `clickhouse-client` will do insertion. It will read input data in `CSVWithNames` format. We also asked to parse DateTime fields with extended parser (`--date_time_input_format best_effort`) to recognize ISO-8601 format with timezone offsets.
Finally, `clickhouse-client` will do insertion. It will read input data in [CSVWithNames](../../interfaces/formats.md#csvwithnames) format.
Parallel upload takes 24 seconds.
If you don't like parallel upload, here is sequential variant:
```
```bash
for file in flightlist_*.csv.gz; do gzip -c -d "$file" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"; done
```
## Validate the Data
## Validate the Data {#validate-data}
```
SELECT count() FROM opensky
66010819
Query:
```sql
SELECT count() FROM opensky;
```
The size of dataset in ClickHouse is just 2.64 GiB:
Result:
```
SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'opensky'
2.64 GiB
```text
┌──count()─┐
│ 66010819 │
└──────────┘
```
## Run Some Queries
The size of dataset in ClickHouse is just 2.66 GiB, check it.
Total distance travelled is 68 billion kilometers:
Query:
```sql
SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'opensky';
```
SELECT formatReadableQuantity(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) / 1000) FROM opensky
Result:
```text
┌─formatReadableSize(total_bytes)─┐
│ 2.66 GiB │
└─────────────────────────────────┘
```
## Run Some Queries {#run-queries}
Total distance travelled is 68 billion kilometers.
Query:
```sql
SELECT formatReadableQuantity(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) / 1000) FROM opensky;
```
Result:
```text
┌─formatReadableQuantity(divide(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 1000))─┐
│ 68.72 billion │
└──────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
Average flight distance is around 1000 km.
```
SELECT avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) FROM opensky
Query:
```sql
SELECT avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) FROM opensky;
```
Result:
```text
┌─avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))─┐
│ 1041090.6465708319 │
└────────────────────────────────────────────────────────────────────┘
```
### Most busy origin airports and the average distance seen:
### Most busy origin airports and the average distance seen {#busy-airports-average-distance}
```
Query:
```sql
SELECT
origin,
count(),
@ -118,10 +152,12 @@ FROM opensky
WHERE origin != ''
GROUP BY origin
ORDER BY count() DESC
LIMIT 100
LIMIT 100;
```
Query id: f9010ea5-97d0-45a3-a5bd-9657906cd105
Result:
```text
┌─origin─┬─count()─┬─distance─┬─bar────────────────────────────────────┐
1. │ KORD │ 745007 │ 1546108 │ ███████████████▍ │
2. │ KDFW │ 696702 │ 1358721 │ █████████████▌ │
@ -224,13 +260,13 @@ Query id: f9010ea5-97d0-45a3-a5bd-9657906cd105
99. │ EDDT │ 115122 │ 941740 │ █████████▍ │
100. │ EFHK │ 114860 │ 1629143 │ ████████████████▎ │
└────────┴─────────┴──────────┴────────────────────────────────────────┘
100 rows in set. Elapsed: 0.186 sec. Processed 48.31 million rows, 2.17 GB (259.27 million rows/s., 11.67 GB/s.)
```
### Number of flights from three major Moscow airports, weekly:
### Number of flights from three major Moscow airports, weekly {#flights-from-moscow}
```
Query:
```sql
SELECT
toMonday(day) AS k,
count() AS c,
@ -238,10 +274,12 @@ SELECT
FROM opensky
WHERE origin IN ('UUEE', 'UUDD', 'UUWW')
GROUP BY k
ORDER BY k ASC
ORDER BY k ASC;
```
Query id: 1b446157-9519-4cc4-a1cb-178dfcc15a8e
Result:
```text
┌──────────k─┬────c─┬─bar──────────────────────────────────────────────────────────────────────────┐
1. │ 2018-12-31 │ 5248 │ ████████████████████████████████████████████████████▍ │
2. │ 2019-01-07 │ 6302 │ ███████████████████████████████████████████████████████████████ │
@ -375,10 +413,8 @@ Query id: 1b446157-9519-4cc4-a1cb-178dfcc15a8e
130. │ 2021-06-21 │ 6061 │ ████████████████████████████████████████████████████████████▌ │
131. │ 2021-06-28 │ 2554 │ █████████████████████████▌ │
└────────────┴──────┴──────────────────────────────────────────────────────────────────────────────┘
131 rows in set. Elapsed: 0.014 sec. Processed 655.36 thousand rows, 11.14 MB (47.56 million rows/s., 808.48 MB/s.)
```
### Test it in Playground
### Online Playground {#playground}
The data is uploaded to ClickHouse Playground, [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUCiAgICBvcmlnaW4sCiAgICBjb3VudCgpLAogICAgcm91bmQoYXZnKGdlb0Rpc3RhbmNlKGxvbmdpdHVkZV8xLCBsYXRpdHVkZV8xLCBsb25naXR1ZGVfMiwgbGF0aXR1ZGVfMikpKSBBUyBkaXN0YW5jZSwKICAgIGJhcihkaXN0YW5jZSwgMCwgMTAwMDAwMDAsIDEwMCkgQVMgYmFyCkZST00gb3BlbnNreQpXSEVSRSBvcmlnaW4gIT0gJycKR1JPVVAgQlkgb3JpZ2luCk9SREVSIEJZIGNvdW50KCkgREVTQwpMSU1JVCAxMDA=).
You can test other queries to this data set using the interactive resource [Online Playground](https://gh-api.clickhouse.tech/play?user=play). For example, [like this](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUCiAgICBvcmlnaW4sCiAgICBjb3VudCgpLAogICAgcm91bmQoYXZnKGdlb0Rpc3RhbmNlKGxvbmdpdHVkZV8xLCBsYXRpdHVkZV8xLCBsb25naXR1ZGVfMiwgbGF0aXR1ZGVfMikpKSBBUyBkaXN0YW5jZSwKICAgIGJhcihkaXN0YW5jZSwgMCwgMTAwMDAwMDAsIDEwMCkgQVMgYmFyCkZST00gb3BlbnNreQpXSEVSRSBvcmlnaW4gIT0gJycKR1JPVVAgQlkgb3JpZ2luCk9SREVSIEJZIGNvdW50KCkgREVTQwpMSU1JVCAxMDA=). However, please note that you cannot create temporary tables here.

View File

@ -3,27 +3,29 @@ toc_priority: 20
toc_title: UK Property Price Paid
---
# UK Property Price Paid
# UK Property Price Paid {#uk-property-price-paid}
The dataset contains data about prices paid for real-estate property in England and Wales. The data is available since year 1995.
The size of the dataset in uncompressed form is about 4 GiB and it will take about 226 MiB in ClickHouse.
The size of the dataset in uncompressed form is about 4 GiB and it will take about 278 MiB in ClickHouse.
Source: https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads
Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data
Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0.
## Download the Dataset
## Download the Dataset {#download-dataset}
```
Run the command:
```bash
wget http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv
```
Download will take about 2 minutes with good internet connection.
## Create the Table
## Create the Table {#create-table}
```
```sql
CREATE TABLE uk_price_paid
(
price UInt32,
@ -44,7 +46,7 @@ CREATE TABLE uk_price_paid
) ENGINE = MergeTree ORDER BY (postcode1, postcode2, addr1, addr2);
```
## Preprocess and Import Data
## Preprocess and Import Data {#preprocess-import-data}
We will use `clickhouse-local` tool for data preprocessing and `clickhouse-client` to upload it.
@ -53,13 +55,13 @@ In this example, we define the structure of source data from the CSV file and sp
The preprocessing is:
- splitting the postcode to two different columns `postcode1` and `postcode2` that is better for storage and queries;
- coverting the `time` field to date as it only contains 00:00 time;
- ignoring the `uuid` field because we don't need it for analysis;
- transforming `type` and `duration` to more readable Enum fields with function `transform`;
- transforming `is_new` and `category` fields from single-character string (`Y`/`N` and `A`/`B`) to UInt8 field with 0 and 1.
- ignoring the [UUid](../../sql-reference/data-types/uuid.md) field because we don't need it for analysis;
- transforming `type` and `duration` to more readable Enum fields with function [transform](../../sql-reference/functions/other-functions.md#transform);
- transforming `is_new` and `category` fields from single-character string (`Y`/`N` and `A`/`B`) to [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256) field with 0 and 1.
Preprocessed data is piped directly to `clickhouse-client` to be inserted into ClickHouse table in streaming fashion.
```
```bash
clickhouse-local --input-format CSV --structure '
uuid String,
price UInt32,
@ -100,103 +102,131 @@ clickhouse-local --input-format CSV --structure '
It will take about 40 seconds.
## Validate the Data
## Validate the Data {#validate-data}
```
SELECT count() FROM uk_price_paid
26248711
Query:
```sql
SELECT count() FROM uk_price_paid;
```
The size of dataset in ClickHouse is just 226 MiB:
Result:
```
SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'uk_price_paid'
226.40 MiB
```text
┌──count()─┐
│ 26321785 │
└──────────┘
```
## Run Some Queries
The size of dataset in ClickHouse is just 278 MiB, check it.
### Average price per year:
Query:
```sql
SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'uk_price_paid';
```
SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 1000000, 80) FROM uk_price_paid GROUP BY year ORDER BY year
Result:
```text
┌─formatReadableSize(total_bytes)─┐
│ 278.80 MiB │
└─────────────────────────────────┘
```
## Run Some Queries {#run-queries}
### Query 1. Average Price Per Year {#average-price}
Query:
```sql
SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 1000000, 80) FROM uk_price_paid GROUP BY year ORDER BY year;
```
Result:
```text
┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
│ 1995 │ 67932 │ █████▍ │
│ 1996 │ 71505 │ █████▋ │
│ 1997 │ 78532 │ ██████▎ │
│ 1998 │ 85435 │ ██████▋ │
│ 1999 │ 96036 │ ███████▋ │
│ 2000 │ 107478 │ ████████▌ │
│ 2001 │ 118886 │ █████████▌ │
│ 2002 │ 137940 │ ███████████ │
│ 2003 │ 155888 │ ████████████▍ │
│ 1998 │ 85436 │ ██████▋ │
│ 1999 │ 96037 │ ███████▋ │
│ 2000 │ 107479 │ ████████▌ │
│ 2001 │ 118885 │ █████████▌ │
│ 2002 │ 137941 │ ███████████ │
│ 2003 │ 155889 │ ████████████▍ │
│ 2004 │ 178885 │ ██████████████▎ │
│ 2005 │ 189350 │ ███████████████▏ │
│ 2005 │ 189351 │ ███████████████▏ │
│ 2006 │ 203528 │ ████████████████▎ │
│ 2007 │ 219377 │ █████████████████▌ │
│ 2007 │ 219378 │ █████████████████▌ │
│ 2008 │ 217056 │ █████████████████▎ │
│ 2009 │ 213419 │ █████████████████ │
│ 2010 │ 236110 │ ██████████████████▊ │
│ 2011 │ 232804 │ ██████████████████▌ │
│ 2012 │ 238366 │ ███████████████████ │
│ 2010 │ 236109 │ ██████████████████▊ │
│ 2011 │ 232805 │ ██████████████████▌ │
│ 2012 │ 238367 │ ███████████████████ │
│ 2013 │ 256931 │ ████████████████████▌ │
│ 2014 │ 279917 │ ██████████████████████▍ │
│ 2015 │ 297264 │ ███████████████████████▋ │
│ 2016 │ 313197 │ █████████████████████████ │
│ 2017 │ 346070 │ ███████████████████████████▋ │
│ 2018 │ 350117 │ ████████████████████████████ │
│ 2019 │ 351010 │ ████████████████████████████ │
│ 2020 │ 368974 │ █████████████████████████████▌ │
│ 2021 │ 384351 │ ██████████████████████████████▋
│ 2014 │ 279915 │ ██████████████████████▍ │
│ 2015 │ 297266 │ ███████████████████████▋ │
│ 2016 │ 313201 │ █████████████████████████ │
│ 2017 │ 346097 │ ███████████████████████████▋ │
│ 2018 │ 350116 │ ████████████████████████████ │
│ 2019 │ 351013 │ ████████████████████████████ │
│ 2020 │ 369420 │ █████████████████████████████▌ │
│ 2021 │ 386903 │ ██████████████████████████████▊
└──────┴────────┴────────────────────────────────────────┘
27 rows in set. Elapsed: 0.027 sec. Processed 26.25 million rows, 157.49 MB (955.96 million rows/s., 5.74 GB/s.)
```
### Average price per year in London:
### Query 2. Average Price per Year in London {#average-price-london}
Query:
```sql
SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 2000000, 100) FROM uk_price_paid WHERE town = 'LONDON' GROUP BY year ORDER BY year;
```
SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 2000000, 100) FROM uk_price_paid WHERE town = 'LONDON' GROUP BY year ORDER BY year
Result:
```text
┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐
│ 1995 │ 109112 │ █████▍ │
│ 1995 │ 109116 │ █████▍ │
│ 1996 │ 118667 │ █████▊ │
│ 1997 │ 136518 │ ██████▋ │
│ 1998 │ 152983 │ ███████▋ │
│ 1999 │ 180633 │ █████████ │
│ 2000 │ 215830 │ ██████████▋ │
│ 2001 │ 232996 │ ███████████▋ │
│ 2002 │ 263672 │ █████████████▏ │
│ 1999 │ 180637 │ █████████ │
│ 2000 │ 215838 │ ██████████▋ │
│ 2001 │ 232994 │ ███████████▋ │
│ 2002 │ 263670 │ █████████████▏ │
│ 2003 │ 278394 │ █████████████▊ │
│ 2004 │ 304665 │ ███████████████▏ │
│ 2004 │ 304666 │ ███████████████▏ │
│ 2005 │ 322875 │ ████████████████▏ │
│ 2006 │ 356192 │ █████████████████▋ │
│ 2007 │ 404055 │ ████████████████████▏ │
│ 2006 │ 356191 │ █████████████████▋ │
│ 2007 │ 404054 │ ████████████████████▏ │
│ 2008 │ 420741 │ █████████████████████ │
│ 2009 │ 427754 │ █████████████████████▍ │
│ 2009 │ 427753 │ █████████████████████▍ │
│ 2010 │ 480306 │ ████████████████████████ │
│ 2011 │ 496274 │ ████████████████████████▋ │
│ 2012 │ 519441 │ █████████████████████████▊ │
│ 2013 │ 616209 │ ██████████████████████████████▋ │
│ 2014 │ 724144 │ ████████████████████████████████████▏ │
│ 2015 │ 792112 │ ███████████████████████████████████████▌ │
│ 2016 │ 843568 │ ██████████████████████████████████████████▏ │
│ 2017 │ 982566 │ █████████████████████████████████████████████████▏ │
│ 2018 │ 1016845 │ ██████████████████████████████████████████████████▋ │
│ 2019 │ 1043277 │ ████████████████████████████████████████████████████▏ │
│ 2020 │ 1003963 │ ██████████████████████████████████████████████████
│ 2021 │ 940794 │ ███████████████████████████████████████████████
│ 2012 │ 519442 │ █████████████████████████▊ │
│ 2013 │ 616212 │ ██████████████████████████████▋ │
│ 2014 │ 724154 │ ████████████████████████████████████▏ │
│ 2015 │ 792129 │ ███████████████████████████████████████▌ │
│ 2016 │ 843655 │ ██████████████████████████████████████████▏ │
│ 2017 │ 982642 │ █████████████████████████████████████████████████▏ │
│ 2018 │ 1016835 │ ██████████████████████████████████████████████████▋ │
│ 2019 │ 1042849 │ ████████████████████████████████████████████████████▏ │
│ 2020 │ 1011889 │ ██████████████████████████████████████████████████
│ 2021 │ 960343 │ ███████████████████████████████████████████████
└──────┴─────────┴───────────────────────────────────────────────────────┘
27 rows in set. Elapsed: 0.024 sec. Processed 26.25 million rows, 76.88 MB (1.08 billion rows/s., 3.15 GB/s.)
```
Something happened in 2013. I don't have a clue. Maybe you have a clue what happened in 2020?
### The most expensive neighborhoods:
### Query 3. The Most Expensive Neighborhoods {#most-expensive-neighborhoods}
```
Query:
```sql
SELECT
town,
district,
@ -210,127 +240,126 @@ GROUP BY
district
HAVING c >= 100
ORDER BY price DESC
LIMIT 100
LIMIT 100;
```
Result:
```text
┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐
│ LONDON │ CITY OF WESTMINSTER │ 3372 │ 3305225 │ ██████████████████████████████████████████████████████████████████ │
│ LONDON │ CITY OF LONDON │ 257 │ 3294478 │ █████████████████████████████████████████████████████████████████▊ │
│ LONDON │ KENSINGTON AND CHELSEA │ 2367 │ 2342422 │ ██████████████████████████████████████████████▋ │
│ LEATHERHEAD │ ELMBRIDGE │ 108 │ 1927143 │ ██████████████████████████████████████▌
VIRGINIA WATER │ RUNNYMEDE │ 142 │ 1868819 │ █████████████████████████████████████▍
LONDON │ CAMDEN │ 2815 │ 1736788 │ ██████████████████████████████████
THORNTON HEATH │ CROYDON │ 521 │ 1733051 │ ██████████████████████████████████▋
WINDLESHAM │ SURREY HEATH │ 103 │ 1717255 │ ██████████████████████████████████▎
│ BARNET │ ENFIELD │ 115 │ 1503458 │ ██████████████████████████████ │
OXFORD │ SOUTH OXFORDSHIRE │ 298 │ 1275200 │ █████████████████████████▌
│ LONDON │ ISLINGTON │ 2458 │ 1274308 │ █████████████████████████▍
COBHAM │ ELMBRIDGE │ 364 │ 1260005 │ █████████████████████████▏
│ LONDON │ HOUNSLOW │ 618 │ 1215682 │ ████████████████████████▎ │
ASCOT │ WINDSOR AND MAIDENHEAD │ 379 │ 1215146 │ ████████████████████████▎
LONDON │ RICHMOND UPON THAMES │ 654 │ 1207551 │ ████████████████████████▏
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 307 │ 1186220 │ ███████████████████████▋
│ RICHMOND │ RICHMOND UPON THAMES │ 805 │ 1100420 │ ██████████████████████
│ LONDON │ HAMMERSMITH AND FULHAM │ 2888 │ 1062959 │ █████████████████████▎
WEYBRIDGE │ ELMBRIDGE │ 607 │ 1027161 │ ████████████████████▌
│ RADLETT │ HERTSMERE │ 265 │ 1015896 │ ████████████████████▎
│ SALCOMBE │ SOUTH HAMS │ 124 │ 1014393 │ ████████████████████▎
BURFORD │ WEST OXFORDSHIRE │ 102 │ 993100 │ ███████████████████▋
│ ESHER │ ELMBRIDGE │ 454 │ 969770 │ ███████████████████▍
HINDHEAD │ WAVERLEY │ 128 │ 967786 │ ███████████████████▎
│ BROCKENHURST │ NEW FOREST │ 121 │ 967046 │ ███████████████████▎ │
LEATHERHEAD │ GUILDFORD │ 191 │ 964489 │ ███████████████████▎
GERRARDS CROSS │ BUCKINGHAMSHIRE │ 376 │ 958555 │ ███████████████████▏
EAST MOLESEY │ ELMBRIDGE │ 181 │ 943457 │ ██████████████████▋
OLNEY │ MILTON KEYNES │ 220 │ 942892 │ ██████████████████▋ │
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 135 │ 926950 │ ██████████████████▌ │
HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 509 │ 905732 │ ██████████████████
KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 889 │ 899689 │ █████████████████▊
BELVEDERE │ BEXLEY │ 313 │ 895336 │ █████████████████▊
CRANBROOK │ TUNBRIDGE WELLS │ 404 │ 888190 │ █████████████████▋ │
LONDON │ EALING │ 2460 │ 865893 │ █████████████████▎
MAIDENHEAD │ BUCKINGHAMSHIRE │ 114 │ 863814 │ █████████████████▎
LONDON │ MERTON │ 1958 │ 857192 │ █████████████████▏ │
GUILDFORD │ WAVERLEY │ 131 │ 854447 │ █████████████████
LONDON │ HACKNEY │ 3088 │ 846571 │ ████████████████▊
LYMM │ WARRINGTON │ 285 │ 839920 │ ████████████████▋ │
│ HARPENDEN │ ST ALBANS │ 606 │ 836994 │ ████████████████▋ │
│ LONDON │ WANDSWORTH │ 6113 │ 832292 │ ████████████████▋ │
│ LONDON │ SOUTHWARK │ 3612 │ 831319 │ ████████████████▋ │
BERKHAMSTED │ DACORUM │ 502 │ 830356 │ ████████████████▌
│ KINGS LANGLEY │ DACORUM │ 137 │ 821358 │ ████████████████▍ │
TONBRIDGE │ TUNBRIDGE WELLS │ 339 │ 806736 │ ████████████████▏
EPSOM │ REIGATE AND BANSTEAD │ 157 │ 805903 │ ████████████████ │
WOKING │ GUILDFORD │ 161 │ 803283 │ ████████████████
STOCKBRIDGE │ TEST VALLEY │ 168 │ 801973 │ ████████████████
TEDDINGTON │ RICHMOND UPON THAMES │ 539 │ 798591 │ ███████████████▊
OXFORD │ VALE OF WHITE HORSE │ 329 │ 792907 │ ███████████████▋ │
LONDON │ BARNET │ 3624 │ 789583 │ ███████████████▋ │
│ TWICKENHAM │ RICHMOND UPON THAMES │ 1090 │ 787760 │ ███████████████▋ │
LUTON │ CENTRAL BEDFORDSHIRE │ 196 │ 786051 │ ███████████████▋ │
TONBRIDGE │ MAIDSTONE │ 277 │ 785746 │ ███████████████▋ │
TOWCESTER │ WEST NORTHAMPTONSHIRE │ 186 │ 783532 │ ███████████████▋
│ LONDON │ LAMBETH │ 4832 │ 783422 │ ███████████████▋
LUTTERWORTH │ HARBOROUGH │ 515 │ 781775 │ ███████████████▋
WOODSTOCK │ WEST OXFORDSHIRE │ 135 │ 777499 │ ███████████████▌
ALRESFORD │ WINCHESTER │ 196 │ 775577 │ ███████████████▌
LONDON │ NEWHAM │ 2942 │ 768551 │ ███████████████▎ │
ALDERLEY EDGE │ CHESHIRE EAST │ 168 │ 768280 │ ███████████████▎ │
│ MARLOW │ BUCKINGHAMSHIRE │ 301 │ 762784 │ ███████████████▎
BILLINGSHURST │ CHICHESTER │ 134 │ 760920 │ ███████████████▏ │
LONDON │ TOWER HAMLETS │ 4183 │ 759635 │ ███████████████▏ │
MIDHURST │ CHICHESTER │ 245 │ 759101 │ ███████████████▏
THAMES DITTON │ ELMBRIDGE │ 227 │ 753347 │ ███████████████ │
POTTERS BAR │ WELWYN HATFIELD │ 163 │ 752926 │ ███████████████ │
REIGATE │ REIGATE AND BANSTEAD │ 555 │ 740961 │ ██████████████▋
TADWORTH │ REIGATE AND BANSTEAD │ 477 │ 738997 │ ██████████████▋
SEVENOAKS │ SEVENOAKS │ 1074 │ 734658 │ ██████████████▋ │
PETWORTH │ CHICHESTER │ 138 │ 732432 │ ██████████████▋ │
BOURNE END │ BUCKINGHAMSHIRE │ 127 │ 730742 │ ██████████████▌
PURLEY │ CROYDON │ 540 │ 727721 │ ██████████████▌
OXTED │ TANDRIDGE │ 320 │ 726078 │ ██████████████▌ │
LONDON │ HARINGEY │ 2988 │ 724573 │ ██████████████▍
BANSTEAD │ REIGATE AND BANSTEAD │ 373 │ 713834 │ ██████████████▎
PINNER │ HARROW │ 480 │ 712166 │ ██████████████▏
MALMESBURY │ WILTSHIRE │ 293 │ 707747 │ ██████████████▏
RICKMANSWORTH │ THREE RIVERS │ 732 │ 705400 │ ██████████████
SLOUGH │ BUCKINGHAMSHIRE │ 359 │ 705002 │ ██████████████
GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 214 │ 704904 │ ██████████████ │
READING │ SOUTH OXFORDSHIRE │ 295 │ 701697 │ ██████████████ │
HYTHE │ FOLKESTONE AND HYTHE │ 457 │ 700334 │ ██████████████ │
WELWYN │ WELWYN HATFIELD │ 217 │ 699649 │ █████████████▊ │
CHIGWELL │ EPPING FOREST │ 242 │ 697869 │ █████████████▊ │
BARNET │ BARNET │ 906 │ 695680 │ █████████████▊ │
HASLEMERE │ CHICHESTER │ 120 │ 694028 │ █████████████▊
LEATHERHEAD │ MOLE VALLEY │ 748 │ 692026 │ █████████████▋ │
LONDON │ BRENT │ 1945 │ 690799 │ █████████████▋ │
HASLEMERE │ WAVERLEY │ 258 │ 690765 │ █████████████▋ │
NORTHWOOD │ HILLINGDON │ 252 │ 690753 │ █████████████▋ │
WALTON-ON-THAMES │ ELMBRIDGE │ 871 │ 689431 │ █████████████▋ │
INGATESTONE │ BRENTWOOD │ 150 │ 688345 │ █████████████▋ │
OXFORD │ OXFORD │ 1761 │ 686114 │ █████████████▋ │
CHISLEHURST │ BROMLEY │ 410 │ 682892 │ █████████████▋
KINGS LANGLEY │ THREE RIVERS │ 109 │ 682320 │ █████████████▋
ASHTEAD │ MOLE VALLEY │ 280 │ 680483 │ █████████████▌ │
WOKING │ SURREY HEATH │ 269 │ 679035 │ █████████████▌ │
│ ASCOT │ BRACKNELL FOREST │ 160 │ 678632 │ █████████████▌ │
│ LONDON │ CITY OF WESTMINSTER │ 3606 │ 3280239 │ █████████████████████████████████████████████████████████████████
│ LONDON │ CITY OF LONDON │ 274 │ 3160502 │ ███████████████████████████████████████████████████████████████
│ LONDON │ KENSINGTON AND CHELSEA │ 2550 │ 2308478 │ ██████████████████████████████████████████████
│ LEATHERHEAD │ ELMBRIDGE │ 114 │ 1897407 │ █████████████████████████████████████
LONDON │ CAMDEN │ 3033 │ 1805404 │ ████████████████████████████████████
VIRGINIA WATER │ RUNNYMEDE │ 156 │ 1753247 │ ██████████████████████████████████
WINDLESHAM │ SURREY HEATH │ 108 │ 1677613 │ █████████████████████████████████
THORNTON HEATH │ CROYDON │ 546 │ 1671721 │ █████████████████████████████████
│ BARNET │ ENFIELD │ 124 │ 1505840 │ ██████████████████████████████ │
COBHAM │ ELMBRIDGE │ 387 │ 1237250 │ ████████████████████████▋
│ LONDON │ ISLINGTON │ 2668 │ 1236980 │ ████████████████████████▋
OXFORD │ SOUTH OXFORDSHIRE │ 321 │ 1220907 │ ████████████████████████▍
│ LONDON │ RICHMOND UPON THAMES │ 704 │ 1215551 │ ████████████████████████▎ │
LONDON │ HOUNSLOW │ 671 │ 1207493 │ ████████████████████████▏
ASCOT │ WINDSOR AND MAIDENHEAD │ 407 │ 1183299 │ ███████████████████████▋
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 330 │ 1175615 │ ███████████████████████▌
│ RICHMOND │ RICHMOND UPON THAMES │ 874 │ 1110444 │ ██████████████████████▏
│ LONDON │ HAMMERSMITH AND FULHAM │ 3086 │ 1053983 │ █████████████████████
SURBITON │ ELMBRIDGE │ 100 │ 1011800 │ ████████████████████▏
│ RADLETT │ HERTSMERE │ 283 │ 1011712 │ ████████████████████▏
│ SALCOMBE │ SOUTH HAMS │ 127 │ 1011624 │ ████████████████████▏
WEYBRIDGE │ ELMBRIDGE │ 655 │ 1007265 │ ████████████████████▏
│ ESHER │ ELMBRIDGE │ 485 │ 986581 │ ███████████████████▋
LEATHERHEAD │ GUILDFORD │ 202 │ 977320 │ ███████████████████▌
│ BURFORD │ WEST OXFORDSHIRE │ 111 │ 966893 │ ███████████████████▎ │
BROCKENHURST │ NEW FOREST │ 129 │ 956675 │ ███████████████████▏
HINDHEAD │ WAVERLEY │ 137 │ 953753 │ ███████████████████
GERRARDS CROSS │ BUCKINGHAMSHIRE │ 419 │ 951121 │ ███████████████████
EAST MOLESEY │ ELMBRIDGE │ 192 │ 936769 │ ██████████████████▋ │
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 146 │ 925515 │ ██████████████████▌ │
LONDON │ TOWER HAMLETS │ 4388 │ 918304 │ ██████████████████▎
OLNEY │ MILTON KEYNES │ 235 │ 910646 │ ██████████████████▏
HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 540 │ 902418 │ ██████████████████
LONDON │ SOUTHWARK │ 3885 │ 892997 │ █████████████████▋ │
KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 960 │ 885969 │ █████████████████▋
LONDON │ EALING │ 2658 │ 871755 │ █████████████████▍
CRANBROOK │ TUNBRIDGE WELLS │ 431 │ 862348 │ █████████████████▏ │
LONDON │ MERTON │ 2099 │ 859118 │ █████████████████▏
BELVEDERE │ BEXLEY │ 346 │ 842423 │ ████████████████▋
GUILDFORD │ WAVERLEY │ 143 │ 841277 │ ████████████████▋ │
│ HARPENDEN │ ST ALBANS │ 657 │ 841216 │ ████████████████▋ │
│ LONDON │ HACKNEY │ 3307 │ 837090 │ ████████████████▋ │
│ LONDON │ WANDSWORTH │ 6566 │ 832663 │ ████████████████▋ │
MAIDENHEAD │ BUCKINGHAMSHIRE │ 123 │ 824299 │ ████████████████▍
│ KINGS LANGLEY │ DACORUM │ 145 │ 821331 │ ████████████████▍ │
BERKHAMSTED │ DACORUM │ 543 │ 818415 │ ████████████████▎
GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 226 │ 802807 │ ████████████████ │
BILLINGSHURST │ CHICHESTER │ 144 │ 797829 │ ███████████████▊
WOKING │ GUILDFORD │ 176 │ 793494 │ ███████████████▋
STOCKBRIDGE │ TEST VALLEY │ 178 │ 793269 │ ███████████████▋
EPSOM │ REIGATE AND BANSTEAD │ 172 │ 791862 │ ███████████████▋ │
TONBRIDGE │ TUNBRIDGE WELLS │ 360 │ 787876 │ ███████████████▋ │
│ TEDDINGTON │ RICHMOND UPON THAMES │ 595 │ 786492 │ ███████████████▋ │
TWICKENHAM │ RICHMOND UPON THAMES │ 1155 │ 786193 │ ███████████████▋ │
LYNDHURST │ NEW FOREST │ 102 │ 785593 │ ███████████████▋ │
LONDON │ LAMBETH │ 5228 │ 774574 │ ███████████████▍
│ LONDON │ BARNET │ 3955 │ 773259 │ ███████████████▍
OXFORD │ VALE OF WHITE HORSE │ 353 │ 772088 │ ███████████████▍
TONBRIDGE │ MAIDSTONE │ 305 │ 770740 │ ███████████████▍
LUTTERWORTH │ HARBOROUGH │ 538 │ 768634 │ ███████████████▎
WOODSTOCK │ WEST OXFORDSHIRE │ 140 │ 766037 │ ███████████████▎ │
MIDHURST │ CHICHESTER │ 257 │ 764815 │ ███████████████▎ │
│ MARLOW │ BUCKINGHAMSHIRE │ 327 │ 761876 │ ███████████████▏
LONDON │ NEWHAM │ 3237 │ 761784 │ ███████████████▏ │
ALDERLEY EDGE │ CHESHIRE EAST │ 178 │ 757318 │ ███████████████▏ │
LUTON │ CENTRAL BEDFORDSHIRE │ 212 │ 754283 │ ███████████████
PETWORTH │ CHICHESTER │ 154 │ 754220 │ ███████████████ │
ALRESFORD │ WINCHESTER │ 219 │ 752718 │ ███████████████ │
POTTERS BAR │ WELWYN HATFIELD │ 174 │ 748465 │ ██████████████▊
HASLEMERE │ CHICHESTER │ 128 │ 746907 │ ██████████████▊
TADWORTH │ REIGATE AND BANSTEAD │ 502 │ 743252 │ ██████████████▋ │
THAMES DITTON │ ELMBRIDGE │ 244 │ 741913 │ ██████████████▋ │
REIGATE │ REIGATE AND BANSTEAD │ 581 │ 738198 │ ██████████████▋
BOURNE END │ BUCKINGHAMSHIRE │ 138 │ 735190 │ ██████████████▋
SEVENOAKS │ SEVENOAKS │ 1156 │ 730018 │ ██████████████▌ │
OXTED │ TANDRIDGE │ 336 │ 729123 │ ██████████████▌
INGATESTONE │ BRENTWOOD │ 166 │ 728103 │ ██████████████▌
LONDON │ BRENT │ 2079 │ 720605 │ ██████████████▍
LONDON │ HARINGEY │ 3216 │ 717780 │ ██████████████▎
PURLEY │ CROYDON │ 575 │ 716108 │ ██████████████▎
WELWYN │ WELWYN HATFIELD │ 222 │ 710603 │ ██████████████▏
RICKMANSWORTH │ THREE RIVERS │ 798 │ 704571 │ ██████████████ │
BANSTEAD │ REIGATE AND BANSTEAD │ 401 │ 701293 │ ██████████████ │
CHIGWELL │ EPPING FOREST │ 261 │ 701203 │ ██████████████ │
PINNER │ HARROW │ 528 │ 698885 │ █████████████▊ │
HASLEMERE │ WAVERLEY │ 280 │ 696659 │ █████████████▊ │
SLOUGH │ BUCKINGHAMSHIRE │ 396 │ 694917 │ █████████████▊ │
WALTON-ON-THAMES │ ELMBRIDGE │ 946 │ 692395 │ █████████████▋
READING │ SOUTH OXFORDSHIRE │ 318 │ 691988 │ █████████████▋ │
NORTHWOOD │ HILLINGDON │ 271 │ 690643 │ █████████████▋ │
FELTHAM │ HOUNSLOW │ 763 │ 688595 │ █████████████▋ │
ASHTEAD │ MOLE VALLEY │ 303 │ 687923 │ █████████████▋ │
BARNET │ BARNET │ 975 │ 686980 │ █████████████▋ │
WOKING │ SURREY HEATH │ 283 │ 686669 │ █████████████▋ │
MALMESBURY │ WILTSHIRE │ 323 │ 683324 │ █████████████▋ │
AMERSHAM │ BUCKINGHAMSHIRE │ 496 │ 680962 │ █████████████▌
CHISLEHURST │ BROMLEY │ 430 │ 680209 │ █████████████▌
HYTHE │ FOLKESTONE AND HYTHE │ 490 │ 676908 │ █████████████▌ │
MAYFIELD │ WEALDEN │ 101 │ 676210 │ █████████████▌ │
│ ASCOT │ BRACKNELL FOREST │ 168 │ 676004 │ █████████████▌ │
└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘
100 rows in set. Elapsed: 0.039 sec. Processed 26.25 million rows, 278.03 MB (674.32 million rows/s., 7.14 GB/s.)
```
### Test it in Playground
## Let's Speed Up Queries Using Projections {#speedup-with-projections}
The data is uploaded to ClickHouse Playground, [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==).
[Projections](../../sql-reference/statements/alter/projection.md) allow to improve queries speed by storing pre-aggregated data.
## Let's speed up queries using projections
### Build a Projection {#build-projection}
[Projections](https://../../sql-reference/statements/alter/projection/) allow to improve queries speed by storing pre-aggregated data.
### Build a projection
```
-- create an aggregate projection by dimensions (toYear(date), district, town)
Create an aggregate projection by dimensions `toYear(date)`, `district`, `town`:
```sql
ALTER TABLE uk_price_paid
ADD PROJECTION projection_by_year_district_town
(
@ -346,25 +375,31 @@ ALTER TABLE uk_price_paid
district,
town
);
```
-- populate the projection for existing data (without it projection will be
-- created for only newly inserted data)
Populate the projection for existing data (without it projection will be created for only newly inserted data):
```sql
ALTER TABLE uk_price_paid
MATERIALIZE PROJECTION projection_by_year_district_town
SETTINGS mutations_sync = 1;
```
## Test performance
## Test Performance {#test-performance}
Let's run the same 3 queries.
[Enable](../../operations/settings/settings.md#allow-experimental-projection-optimization) projections for selects:
```sql
SET allow_experimental_projection_optimization = 1;
```
-- enable projections for selects
set allow_experimental_projection_optimization=1;
-- Q1) Average price per year:
### Query 1. Average Price Per Year {#average-price-projections}
Query:
```sql
SELECT
toYear(date) AS year,
round(avg(price)) AS price,
@ -372,41 +407,47 @@ SELECT
FROM uk_price_paid
GROUP BY year
ORDER BY year ASC;
```
Result:
```text
┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
│ 1995 │ 67932 │ █████▍ │
│ 1996 │ 71505 │ █████▋ │
│ 1997 │ 78532 │ ██████▎ │
│ 1998 │ 85435 │ ██████▋ │
│ 1999 │ 96036 │ ███████▋ │
│ 2000 │ 107478 │ ████████▌ │
│ 2001 │ 118886 │ █████████▌ │
│ 2002 │ 137940 │ ███████████ │
│ 2003 │ 155888 │ ████████████▍ │
│ 1998 │ 85436 │ ██████▋ │
│ 1999 │ 96037 │ ███████▋ │
│ 2000 │ 107479 │ ████████▌ │
│ 2001 │ 118885 │ █████████▌ │
│ 2002 │ 137941 │ ███████████ │
│ 2003 │ 155889 │ ████████████▍ │
│ 2004 │ 178885 │ ██████████████▎ │
│ 2005 │ 189350 │ ███████████████▏ │
│ 2005 │ 189351 │ ███████████████▏ │
│ 2006 │ 203528 │ ████████████████▎ │
│ 2007 │ 219377 │ █████████████████▌ │
│ 2007 │ 219378 │ █████████████████▌ │
│ 2008 │ 217056 │ █████████████████▎ │
│ 2009 │ 213419 │ █████████████████ │
│ 2010 │ 236110 │ ██████████████████▊ │
│ 2011 │ 232804 │ ██████████████████▌ │
│ 2012 │ 238366 │ ███████████████████ │
│ 2010 │ 236109 │ ██████████████████▊ │
│ 2011 │ 232805 │ ██████████████████▌ │
│ 2012 │ 238367 │ ███████████████████ │
│ 2013 │ 256931 │ ████████████████████▌ │
│ 2014 │ 279917 │ ██████████████████████▍ │
│ 2015 │ 297264 │ ███████████████████████▋ │
│ 2016 │ 313197 │ █████████████████████████ │
│ 2017 │ 346070 │ ███████████████████████████▋ │
│ 2018 │ 350117 │ ████████████████████████████ │
│ 2019 │ 351010 │ ████████████████████████████ │
│ 2020 │ 368974 │ █████████████████████████████▌ │
│ 2021 │ 384351 │ ██████████████████████████████▋
│ 2014 │ 279915 │ ██████████████████████▍ │
│ 2015 │ 297266 │ ███████████████████████▋ │
│ 2016 │ 313201 │ █████████████████████████ │
│ 2017 │ 346097 │ ███████████████████████████▋ │
│ 2018 │ 350116 │ ████████████████████████████ │
│ 2019 │ 351013 │ ████████████████████████████ │
│ 2020 │ 369420 │ █████████████████████████████▌ │
│ 2021 │ 386903 │ ██████████████████████████████▊
└──────┴────────┴────────────────────────────────────────┘
```
27 rows in set. Elapsed: 0.003 sec. Processed 106.87 thousand rows, 3.21 MB (31.92 million rows/s., 959.03 MB/s.)
### Query 2. Average Price Per Year in London {#average-price-london-projections}
-- Q2) Average price per year in London:
Query:
```sql
SELECT
toYear(date) AS year,
round(avg(price)) AS price,
@ -415,42 +456,49 @@ FROM uk_price_paid
WHERE town = 'LONDON'
GROUP BY year
ORDER BY year ASC;
```
Result:
```text
┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐
│ 1995 │ 109112 │ █████▍ │
│ 1995 │ 109116 │ █████▍ │
│ 1996 │ 118667 │ █████▊ │
│ 1997 │ 136518 │ ██████▋ │
│ 1998 │ 152983 │ ███████▋ │
│ 1999 │ 180633 │ █████████ │
│ 2000 │ 215830 │ ██████████▋ │
│ 2001 │ 232996 │ ███████████▋ │
│ 2002 │ 263672 │ █████████████▏ │
│ 1999 │ 180637 │ █████████ │
│ 2000 │ 215838 │ ██████████▋ │
│ 2001 │ 232994 │ ███████████▋ │
│ 2002 │ 263670 │ █████████████▏ │
│ 2003 │ 278394 │ █████████████▊ │
│ 2004 │ 304665 │ ███████████████▏ │
│ 2004 │ 304666 │ ███████████████▏ │
│ 2005 │ 322875 │ ████████████████▏ │
│ 2006 │ 356192 │ █████████████████▋ │
│ 2007 │ 404055 │ ████████████████████▏ │
│ 2006 │ 356191 │ █████████████████▋ │
│ 2007 │ 404054 │ ████████████████████▏ │
│ 2008 │ 420741 │ █████████████████████ │
│ 2009 │ 427754 │ █████████████████████▍ │
│ 2009 │ 427753 │ █████████████████████▍ │
│ 2010 │ 480306 │ ████████████████████████ │
│ 2011 │ 496274 │ ████████████████████████▋ │
│ 2012 │ 519441 │ █████████████████████████▊ │
│ 2013 │ 616209 │ ██████████████████████████████▋ │
│ 2014 │ 724144 │ ████████████████████████████████████▏ │
│ 2015 │ 792112 │ ███████████████████████████████████████▌ │
│ 2016 │ 843568 │ ██████████████████████████████████████████▏ │
│ 2017 │ 982566 │ █████████████████████████████████████████████████▏ │
│ 2018 │ 1016845 │ ██████████████████████████████████████████████████▋ │
│ 2019 │ 1043277 │ ████████████████████████████████████████████████████▏ │
│ 2020 │ 1003963 │ ██████████████████████████████████████████████████
│ 2021 │ 940794 │ ███████████████████████████████████████████████
│ 2012 │ 519442 │ █████████████████████████▊ │
│ 2013 │ 616212 │ ██████████████████████████████▋ │
│ 2014 │ 724154 │ ████████████████████████████████████▏ │
│ 2015 │ 792129 │ ███████████████████████████████████████▌ │
│ 2016 │ 843655 │ ██████████████████████████████████████████▏ │
│ 2017 │ 982642 │ █████████████████████████████████████████████████▏ │
│ 2018 │ 1016835 │ ██████████████████████████████████████████████████▋ │
│ 2019 │ 1042849 │ ████████████████████████████████████████████████████▏ │
│ 2020 │ 1011889 │ ██████████████████████████████████████████████████
│ 2021 │ 960343 │ ███████████████████████████████████████████████
└──────┴─────────┴───────────────────────────────────────────────────────┘
```
27 rows in set. Elapsed: 0.005 sec. Processed 106.87 thousand rows, 3.53 MB (23.49 million rows/s., 775.95 MB/s.)
### Query 3. The Most Expensive Neighborhoods {#most-expensive-neighborhoods-projections}
-- Q3) The most expensive neighborhoods:
-- the condition (date >= '2020-01-01') needs to be modified to match projection dimension (toYear(date) >= 2020)
The condition (date >= '2020-01-01') needs to be modified to match projection dimension (toYear(date) >= 2020).
Query:
```sql
SELECT
town,
district,
@ -464,118 +512,138 @@ GROUP BY
district
HAVING c >= 100
ORDER BY price DESC
LIMIT 100
┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐
│ LONDON │ CITY OF WESTMINSTER │ 3372 │ 3305225 │ ██████████████████████████████████████████████████████████████████ │
│ LONDON │ CITY OF LONDON │ 257 │ 3294478 │ █████████████████████████████████████████████████████████████████▊ │
│ LONDON │ KENSINGTON AND CHELSEA │ 2367 │ 2342422 │ ██████████████████████████████████████████████▋ │
│ LEATHERHEAD │ ELMBRIDGE │ 108 │ 1927143 │ ██████████████████████████████████████▌ │
│ VIRGINIA WATER │ RUNNYMEDE │ 142 │ 1868819 │ █████████████████████████████████████▍ │
│ LONDON │ CAMDEN │ 2815 │ 1736788 │ ██████████████████████████████████▋ │
│ THORNTON HEATH │ CROYDON │ 521 │ 1733051 │ ██████████████████████████████████▋ │
│ WINDLESHAM │ SURREY HEATH │ 103 │ 1717255 │ ██████████████████████████████████▎ │
│ BARNET │ ENFIELD │ 115 │ 1503458 │ ██████████████████████████████ │
│ OXFORD │ SOUTH OXFORDSHIRE │ 298 │ 1275200 │ █████████████████████████▌ │
│ LONDON │ ISLINGTON │ 2458 │ 1274308 │ █████████████████████████▍ │
│ COBHAM │ ELMBRIDGE │ 364 │ 1260005 │ █████████████████████████▏ │
│ LONDON │ HOUNSLOW │ 618 │ 1215682 │ ████████████████████████▎ │
│ ASCOT │ WINDSOR AND MAIDENHEAD │ 379 │ 1215146 │ ████████████████████████▎ │
│ LONDON │ RICHMOND UPON THAMES │ 654 │ 1207551 │ ████████████████████████▏ │
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 307 │ 1186220 │ ███████████████████████▋ │
│ RICHMOND │ RICHMOND UPON THAMES │ 805 │ 1100420 │ ██████████████████████ │
│ LONDON │ HAMMERSMITH AND FULHAM │ 2888 │ 1062959 │ █████████████████████▎ │
│ WEYBRIDGE │ ELMBRIDGE │ 607 │ 1027161 │ ████████████████████▌ │
│ RADLETT │ HERTSMERE │ 265 │ 1015896 │ ████████████████████▎ │
│ SALCOMBE │ SOUTH HAMS │ 124 │ 1014393 │ ████████████████████▎ │
│ BURFORD │ WEST OXFORDSHIRE │ 102 │ 993100 │ ███████████████████▋ │
│ ESHER │ ELMBRIDGE │ 454 │ 969770 │ ███████████████████▍ │
│ HINDHEAD │ WAVERLEY │ 128 │ 967786 │ ███████████████████▎ │
│ BROCKENHURST │ NEW FOREST │ 121 │ 967046 │ ███████████████████▎ │
│ LEATHERHEAD │ GUILDFORD │ 191 │ 964489 │ ███████████████████▎ │
│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 376 │ 958555 │ ███████████████████▏ │
│ EAST MOLESEY │ ELMBRIDGE │ 181 │ 943457 │ ██████████████████▋ │
│ OLNEY │ MILTON KEYNES │ 220 │ 942892 │ ██████████████████▋ │
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 135 │ 926950 │ ██████████████████▌ │
│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 509 │ 905732 │ ██████████████████ │
│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 889 │ 899689 │ █████████████████▊ │
│ BELVEDERE │ BEXLEY │ 313 │ 895336 │ █████████████████▊ │
│ CRANBROOK │ TUNBRIDGE WELLS │ 404 │ 888190 │ █████████████████▋ │
│ LONDON │ EALING │ 2460 │ 865893 │ █████████████████▎ │
│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 114 │ 863814 │ █████████████████▎ │
│ LONDON │ MERTON │ 1958 │ 857192 │ █████████████████▏ │
│ GUILDFORD │ WAVERLEY │ 131 │ 854447 │ █████████████████ │
│ LONDON │ HACKNEY │ 3088 │ 846571 │ ████████████████▊ │
│ LYMM │ WARRINGTON │ 285 │ 839920 │ ████████████████▋ │
│ HARPENDEN │ ST ALBANS │ 606 │ 836994 │ ████████████████▋ │
│ LONDON │ WANDSWORTH │ 6113 │ 832292 │ ████████████████▋ │
│ LONDON │ SOUTHWARK │ 3612 │ 831319 │ ████████████████▋ │
│ BERKHAMSTED │ DACORUM │ 502 │ 830356 │ ████████████████▌ │
│ KINGS LANGLEY │ DACORUM │ 137 │ 821358 │ ████████████████▍ │
│ TONBRIDGE │ TUNBRIDGE WELLS │ 339 │ 806736 │ ████████████████▏ │
│ EPSOM │ REIGATE AND BANSTEAD │ 157 │ 805903 │ ████████████████ │
│ WOKING │ GUILDFORD │ 161 │ 803283 │ ████████████████ │
│ STOCKBRIDGE │ TEST VALLEY │ 168 │ 801973 │ ████████████████ │
│ TEDDINGTON │ RICHMOND UPON THAMES │ 539 │ 798591 │ ███████████████▊ │
│ OXFORD │ VALE OF WHITE HORSE │ 329 │ 792907 │ ███████████████▋ │
│ LONDON │ BARNET │ 3624 │ 789583 │ ███████████████▋ │
│ TWICKENHAM │ RICHMOND UPON THAMES │ 1090 │ 787760 │ ███████████████▋ │
│ LUTON │ CENTRAL BEDFORDSHIRE │ 196 │ 786051 │ ███████████████▋ │
│ TONBRIDGE │ MAIDSTONE │ 277 │ 785746 │ ███████████████▋ │
│ TOWCESTER │ WEST NORTHAMPTONSHIRE │ 186 │ 783532 │ ███████████████▋ │
│ LONDON │ LAMBETH │ 4832 │ 783422 │ ███████████████▋ │
│ LUTTERWORTH │ HARBOROUGH │ 515 │ 781775 │ ███████████████▋ │
│ WOODSTOCK │ WEST OXFORDSHIRE │ 135 │ 777499 │ ███████████████▌ │
│ ALRESFORD │ WINCHESTER │ 196 │ 775577 │ ███████████████▌ │
│ LONDON │ NEWHAM │ 2942 │ 768551 │ ███████████████▎ │
│ ALDERLEY EDGE │ CHESHIRE EAST │ 168 │ 768280 │ ███████████████▎ │
│ MARLOW │ BUCKINGHAMSHIRE │ 301 │ 762784 │ ███████████████▎ │
│ BILLINGSHURST │ CHICHESTER │ 134 │ 760920 │ ███████████████▏ │
│ LONDON │ TOWER HAMLETS │ 4183 │ 759635 │ ███████████████▏ │
│ MIDHURST │ CHICHESTER │ 245 │ 759101 │ ███████████████▏ │
│ THAMES DITTON │ ELMBRIDGE │ 227 │ 753347 │ ███████████████ │
│ POTTERS BAR │ WELWYN HATFIELD │ 163 │ 752926 │ ███████████████ │
│ REIGATE │ REIGATE AND BANSTEAD │ 555 │ 740961 │ ██████████████▋ │
│ TADWORTH │ REIGATE AND BANSTEAD │ 477 │ 738997 │ ██████████████▋ │
│ SEVENOAKS │ SEVENOAKS │ 1074 │ 734658 │ ██████████████▋ │
│ PETWORTH │ CHICHESTER │ 138 │ 732432 │ ██████████████▋ │
│ BOURNE END │ BUCKINGHAMSHIRE │ 127 │ 730742 │ ██████████████▌ │
│ PURLEY │ CROYDON │ 540 │ 727721 │ ██████████████▌ │
│ OXTED │ TANDRIDGE │ 320 │ 726078 │ ██████████████▌ │
│ LONDON │ HARINGEY │ 2988 │ 724573 │ ██████████████▍ │
│ BANSTEAD │ REIGATE AND BANSTEAD │ 373 │ 713834 │ ██████████████▎ │
│ PINNER │ HARROW │ 480 │ 712166 │ ██████████████▏ │
│ MALMESBURY │ WILTSHIRE │ 293 │ 707747 │ ██████████████▏ │
│ RICKMANSWORTH │ THREE RIVERS │ 732 │ 705400 │ ██████████████ │
│ SLOUGH │ BUCKINGHAMSHIRE │ 359 │ 705002 │ ██████████████ │
│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 214 │ 704904 │ ██████████████ │
│ READING │ SOUTH OXFORDSHIRE │ 295 │ 701697 │ ██████████████ │
│ HYTHE │ FOLKESTONE AND HYTHE │ 457 │ 700334 │ ██████████████ │
│ WELWYN │ WELWYN HATFIELD │ 217 │ 699649 │ █████████████▊ │
│ CHIGWELL │ EPPING FOREST │ 242 │ 697869 │ █████████████▊ │
│ BARNET │ BARNET │ 906 │ 695680 │ █████████████▊ │
│ HASLEMERE │ CHICHESTER │ 120 │ 694028 │ █████████████▊ │
│ LEATHERHEAD │ MOLE VALLEY │ 748 │ 692026 │ █████████████▋ │
│ LONDON │ BRENT │ 1945 │ 690799 │ █████████████▋ │
│ HASLEMERE │ WAVERLEY │ 258 │ 690765 │ █████████████▋ │
│ NORTHWOOD │ HILLINGDON │ 252 │ 690753 │ █████████████▋ │
│ WALTON-ON-THAMES │ ELMBRIDGE │ 871 │ 689431 │ █████████████▋ │
│ INGATESTONE │ BRENTWOOD │ 150 │ 688345 │ █████████████▋ │
│ OXFORD │ OXFORD │ 1761 │ 686114 │ █████████████▋ │
│ CHISLEHURST │ BROMLEY │ 410 │ 682892 │ █████████████▋ │
│ KINGS LANGLEY │ THREE RIVERS │ 109 │ 682320 │ █████████████▋ │
│ ASHTEAD │ MOLE VALLEY │ 280 │ 680483 │ █████████████▌ │
│ WOKING │ SURREY HEATH │ 269 │ 679035 │ █████████████▌ │
│ ASCOT │ BRACKNELL FOREST │ 160 │ 678632 │ █████████████▌ │
└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘
100 rows in set. Elapsed: 0.005 sec. Processed 12.85 thousand rows, 813.40 KB (2.73 million rows/s., 172.95 MB/s.)
LIMIT 100;
```
Result:
```text
┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐
│ LONDON │ CITY OF WESTMINSTER │ 3606 │ 3280239 │ █████████████████████████████████████████████████████████████████▌ │
│ LONDON │ CITY OF LONDON │ 274 │ 3160502 │ ███████████████████████████████████████████████████████████████▏ │
│ LONDON │ KENSINGTON AND CHELSEA │ 2550 │ 2308478 │ ██████████████████████████████████████████████▏ │
│ LEATHERHEAD │ ELMBRIDGE │ 114 │ 1897407 │ █████████████████████████████████████▊ │
│ LONDON │ CAMDEN │ 3033 │ 1805404 │ ████████████████████████████████████ │
│ VIRGINIA WATER │ RUNNYMEDE │ 156 │ 1753247 │ ███████████████████████████████████ │
│ WINDLESHAM │ SURREY HEATH │ 108 │ 1677613 │ █████████████████████████████████▌ │
│ THORNTON HEATH │ CROYDON │ 546 │ 1671721 │ █████████████████████████████████▍ │
│ BARNET │ ENFIELD │ 124 │ 1505840 │ ██████████████████████████████ │
│ COBHAM │ ELMBRIDGE │ 387 │ 1237250 │ ████████████████████████▋ │
│ LONDON │ ISLINGTON │ 2668 │ 1236980 │ ████████████████████████▋ │
│ OXFORD │ SOUTH OXFORDSHIRE │ 321 │ 1220907 │ ████████████████████████▍ │
│ LONDON │ RICHMOND UPON THAMES │ 704 │ 1215551 │ ████████████████████████▎ │
│ LONDON │ HOUNSLOW │ 671 │ 1207493 │ ████████████████████████▏ │
│ ASCOT │ WINDSOR AND MAIDENHEAD │ 407 │ 1183299 │ ███████████████████████▋ │
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 330 │ 1175615 │ ███████████████████████▌ │
│ RICHMOND │ RICHMOND UPON THAMES │ 874 │ 1110444 │ ██████████████████████▏ │
│ LONDON │ HAMMERSMITH AND FULHAM │ 3086 │ 1053983 │ █████████████████████ │
│ SURBITON │ ELMBRIDGE │ 100 │ 1011800 │ ████████████████████▏ │
│ RADLETT │ HERTSMERE │ 283 │ 1011712 │ ████████████████████▏ │
│ SALCOMBE │ SOUTH HAMS │ 127 │ 1011624 │ ████████████████████▏ │
│ WEYBRIDGE │ ELMBRIDGE │ 655 │ 1007265 │ ████████████████████▏ │
│ ESHER │ ELMBRIDGE │ 485 │ 986581 │ ███████████████████▋ │
│ LEATHERHEAD │ GUILDFORD │ 202 │ 977320 │ ███████████████████▌ │
│ BURFORD │ WEST OXFORDSHIRE │ 111 │ 966893 │ ███████████████████▎ │
│ BROCKENHURST │ NEW FOREST │ 129 │ 956675 │ ███████████████████▏ │
│ HINDHEAD │ WAVERLEY │ 137 │ 953753 │ ███████████████████ │
│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 419 │ 951121 │ ███████████████████ │
│ EAST MOLESEY │ ELMBRIDGE │ 192 │ 936769 │ ██████████████████▋ │
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 146 │ 925515 │ ██████████████████▌ │
│ LONDON │ TOWER HAMLETS │ 4388 │ 918304 │ ██████████████████▎ │
│ OLNEY │ MILTON KEYNES │ 235 │ 910646 │ ██████████████████▏ │
│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 540 │ 902418 │ ██████████████████ │
│ LONDON │ SOUTHWARK │ 3885 │ 892997 │ █████████████████▋ │
│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 960 │ 885969 │ █████████████████▋ │
│ LONDON │ EALING │ 2658 │ 871755 │ █████████████████▍ │
│ CRANBROOK │ TUNBRIDGE WELLS │ 431 │ 862348 │ █████████████████▏ │
│ LONDON │ MERTON │ 2099 │ 859118 │ █████████████████▏ │
│ BELVEDERE │ BEXLEY │ 346 │ 842423 │ ████████████████▋ │
│ GUILDFORD │ WAVERLEY │ 143 │ 841277 │ ████████████████▋ │
│ HARPENDEN │ ST ALBANS │ 657 │ 841216 │ ████████████████▋ │
│ LONDON │ HACKNEY │ 3307 │ 837090 │ ████████████████▋ │
│ LONDON │ WANDSWORTH │ 6566 │ 832663 │ ████████████████▋ │
│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 123 │ 824299 │ ████████████████▍ │
│ KINGS LANGLEY │ DACORUM │ 145 │ 821331 │ ████████████████▍ │
│ BERKHAMSTED │ DACORUM │ 543 │ 818415 │ ████████████████▎ │
│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 226 │ 802807 │ ████████████████ │
│ BILLINGSHURST │ CHICHESTER │ 144 │ 797829 │ ███████████████▊ │
│ WOKING │ GUILDFORD │ 176 │ 793494 │ ███████████████▋ │
│ STOCKBRIDGE │ TEST VALLEY │ 178 │ 793269 │ ███████████████▋ │
│ EPSOM │ REIGATE AND BANSTEAD │ 172 │ 791862 │ ███████████████▋ │
│ TONBRIDGE │ TUNBRIDGE WELLS │ 360 │ 787876 │ ███████████████▋ │
│ TEDDINGTON │ RICHMOND UPON THAMES │ 595 │ 786492 │ ███████████████▋ │
│ TWICKENHAM │ RICHMOND UPON THAMES │ 1155 │ 786193 │ ███████████████▋ │
│ LYNDHURST │ NEW FOREST │ 102 │ 785593 │ ███████████████▋ │
│ LONDON │ LAMBETH │ 5228 │ 774574 │ ███████████████▍ │
│ LONDON │ BARNET │ 3955 │ 773259 │ ███████████████▍ │
│ OXFORD │ VALE OF WHITE HORSE │ 353 │ 772088 │ ███████████████▍ │
│ TONBRIDGE │ MAIDSTONE │ 305 │ 770740 │ ███████████████▍ │
│ LUTTERWORTH │ HARBOROUGH │ 538 │ 768634 │ ███████████████▎ │
│ WOODSTOCK │ WEST OXFORDSHIRE │ 140 │ 766037 │ ███████████████▎ │
│ MIDHURST │ CHICHESTER │ 257 │ 764815 │ ███████████████▎ │
│ MARLOW │ BUCKINGHAMSHIRE │ 327 │ 761876 │ ███████████████▏ │
│ LONDON │ NEWHAM │ 3237 │ 761784 │ ███████████████▏ │
│ ALDERLEY EDGE │ CHESHIRE EAST │ 178 │ 757318 │ ███████████████▏ │
│ LUTON │ CENTRAL BEDFORDSHIRE │ 212 │ 754283 │ ███████████████ │
│ PETWORTH │ CHICHESTER │ 154 │ 754220 │ ███████████████ │
│ ALRESFORD │ WINCHESTER │ 219 │ 752718 │ ███████████████ │
│ POTTERS BAR │ WELWYN HATFIELD │ 174 │ 748465 │ ██████████████▊ │
│ HASLEMERE │ CHICHESTER │ 128 │ 746907 │ ██████████████▊ │
│ TADWORTH │ REIGATE AND BANSTEAD │ 502 │ 743252 │ ██████████████▋ │
│ THAMES DITTON │ ELMBRIDGE │ 244 │ 741913 │ ██████████████▋ │
│ REIGATE │ REIGATE AND BANSTEAD │ 581 │ 738198 │ ██████████████▋ │
│ BOURNE END │ BUCKINGHAMSHIRE │ 138 │ 735190 │ ██████████████▋ │
│ SEVENOAKS │ SEVENOAKS │ 1156 │ 730018 │ ██████████████▌ │
│ OXTED │ TANDRIDGE │ 336 │ 729123 │ ██████████████▌ │
│ INGATESTONE │ BRENTWOOD │ 166 │ 728103 │ ██████████████▌ │
│ LONDON │ BRENT │ 2079 │ 720605 │ ██████████████▍ │
│ LONDON │ HARINGEY │ 3216 │ 717780 │ ██████████████▎ │
│ PURLEY │ CROYDON │ 575 │ 716108 │ ██████████████▎ │
│ WELWYN │ WELWYN HATFIELD │ 222 │ 710603 │ ██████████████▏ │
│ RICKMANSWORTH │ THREE RIVERS │ 798 │ 704571 │ ██████████████ │
│ BANSTEAD │ REIGATE AND BANSTEAD │ 401 │ 701293 │ ██████████████ │
│ CHIGWELL │ EPPING FOREST │ 261 │ 701203 │ ██████████████ │
│ PINNER │ HARROW │ 528 │ 698885 │ █████████████▊ │
│ HASLEMERE │ WAVERLEY │ 280 │ 696659 │ █████████████▊ │
│ SLOUGH │ BUCKINGHAMSHIRE │ 396 │ 694917 │ █████████████▊ │
│ WALTON-ON-THAMES │ ELMBRIDGE │ 946 │ 692395 │ █████████████▋ │
│ READING │ SOUTH OXFORDSHIRE │ 318 │ 691988 │ █████████████▋ │
│ NORTHWOOD │ HILLINGDON │ 271 │ 690643 │ █████████████▋ │
│ FELTHAM │ HOUNSLOW │ 763 │ 688595 │ █████████████▋ │
│ ASHTEAD │ MOLE VALLEY │ 303 │ 687923 │ █████████████▋ │
│ BARNET │ BARNET │ 975 │ 686980 │ █████████████▋ │
│ WOKING │ SURREY HEATH │ 283 │ 686669 │ █████████████▋ │
│ MALMESBURY │ WILTSHIRE │ 323 │ 683324 │ █████████████▋ │
│ AMERSHAM │ BUCKINGHAMSHIRE │ 496 │ 680962 │ █████████████▌ │
│ CHISLEHURST │ BROMLEY │ 430 │ 680209 │ █████████████▌ │
│ HYTHE │ FOLKESTONE AND HYTHE │ 490 │ 676908 │ █████████████▌ │
│ MAYFIELD │ WEALDEN │ 101 │ 676210 │ █████████████▌ │
│ ASCOT │ BRACKNELL FOREST │ 168 │ 676004 │ █████████████▌ │
└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘
```
### Summary {#summary}
All 3 queries work much faster and read fewer rows.
```text
Query 1
no projection: 27 rows in set. Elapsed: 0.158 sec. Processed 26.32 million rows, 157.93 MB (166.57 million rows/s., 999.39 MB/s.)
projection: 27 rows in set. Elapsed: 0.007 sec. Processed 105.96 thousand rows, 3.33 MB (14.58 million rows/s., 458.13 MB/s.)
Query 2
no projection: 27 rows in set. Elapsed: 0.163 sec. Processed 26.32 million rows, 80.01 MB (161.75 million rows/s., 491.64 MB/s.)
projection: 27 rows in set. Elapsed: 0.008 sec. Processed 105.96 thousand rows, 3.67 MB (13.29 million rows/s., 459.89 MB/s.)
Query 3
no projection: 100 rows in set. Elapsed: 0.069 sec. Processed 26.32 million rows, 62.47 MB (382.13 million rows/s., 906.93 MB/s.)
projection: 100 rows in set. Elapsed: 0.029 sec. Processed 8.08 thousand rows, 511.08 KB (276.06 thousand rows/s., 17.47 MB/s.)
```
Q1)
no projection: 27 rows in set. Elapsed: 0.027 sec. Processed 26.25 million rows, 157.49 MB (955.96 million rows/s., 5.74 GB/s.)
projection: 27 rows in set. Elapsed: 0.003 sec. Processed 106.87 thousand rows, 3.21 MB (31.92 million rows/s., 959.03 MB/s.)
```
### Test It in Playground {#playground}
The dataset is also available in the [Online Playground](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==).

View File

@ -29,7 +29,7 @@ It is recommended to use official pre-compiled `deb` packages for Debian or Ubun
If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments).
You can also download and install packages manually from [here](https://repo.clickhouse.tech/deb/stable/main/).
You can also download and install packages manually from [here](https://repo.clickhouse.com/deb/stable/main/).
#### Packages {#packages}
@ -50,8 +50,8 @@ First, you need to add the official repository:
``` bash
sudo yum install yum-utils
sudo rpm --import https://repo.clickhouse.tech/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.tech/rpm/stable/x86_64
sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64
```
If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). `prestable` is sometimes also available.
@ -62,21 +62,21 @@ Then run these commands to install packages:
sudo yum install clickhouse-server clickhouse-client
```
You can also download and install packages manually from [here](https://repo.clickhouse.tech/rpm/stable/x86_64).
You can also download and install packages manually from [here](https://repo.clickhouse.com/rpm/stable/x86_64).
### From Tgz Archives {#from-tgz-archives}
It is recommended to use official pre-compiled `tgz` archives for all Linux distributions, where installation of `deb` or `rpm` packages is not possible.
The required version can be downloaded with `curl` or `wget` from repository https://repo.clickhouse.tech/tgz/.
The required version can be downloaded with `curl` or `wget` from repository https://repo.clickhouse.com/tgz/.
After that downloaded archives should be unpacked and installed with installation scripts. Example for the latest version:
``` bash
export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1`
curl -O https://repo.clickhouse.tech/tgz/clickhouse-common-static-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/clickhouse-server-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/clickhouse-client-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-server-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-client-$LATEST_VERSION.tgz
tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh

View File

@ -93,6 +93,17 @@ Works with tables in the MergeTree family.
If `force_primary_key=1`, ClickHouse checks to see if the query has a primary key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition reduces the amount of data to read. For more information about data ranges in MergeTree tables, see [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
## use_skip_indexes {#settings-use_skip_indexes}
Use data skipping indexes during query execution.
Possible values:
- 0 — Disabled.
- 1 — Enabled.
Default value: 1.
## force_data_skipping_indices {#settings-force_data_skipping_indices}
Disables query execution if passed data skipping indices wasn't used.
@ -3630,7 +3641,7 @@ Default value: `enable`.
## max_hyperscan_regexp_length {#max-hyperscan-regexp-length}
Defines the maximum length for each regular expression in the [hyperscan multi-match functions](../../sql-reference/functions/string-search-functions.md#multimatchanyhaystack-pattern1-pattern2-patternn).
Defines the maximum length for each regular expression in the [hyperscan multi-match functions](../../sql-reference/functions/string-search-functions.md#multimatchanyhaystack-pattern1-pattern2-patternn).
Possible values:
@ -3673,7 +3684,7 @@ Exception: Regexp length too large.
## max_hyperscan_regexp_total_length {#max-hyperscan-regexp-total-length}
Sets the maximum length total of all regular expressions in each [hyperscan multi-match function](../../sql-reference/functions/string-search-functions.md#multimatchanyhaystack-pattern1-pattern2-patternn).
Sets the maximum length total of all regular expressions in each [hyperscan multi-match function](../../sql-reference/functions/string-search-functions.md#multimatchanyhaystack-pattern1-pattern2-patternn).
Possible values:

View File

@ -30,7 +30,7 @@ Debian や Ubuntu 用にコンパイル済みの公式パッケージ `deb` を
最新版を使いたい場合は、`stable`を`testing`に置き換えてください。(テスト環境ではこれを推奨します)
同様に、[こちら](https://repo.clickhouse.tech/deb/stable/main/)からパッケージをダウンロードして、手動でインストールすることもできます。
同様に、[こちら](https://repo.clickhouse.com/deb/stable/main/)からパッケージをダウンロードして、手動でインストールすることもできます。
#### パッケージ {#packages}
@ -47,8 +47,8 @@ CentOS、RedHat、その他すべてのrpmベースのLinuxディストリビュ
``` bash
sudo yum install yum-utils
sudo rpm --import https://repo.clickhouse.tech/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.tech/rpm/stable/x86_64
sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64
```
最新版を使いたい場合は `stable``testing` に置き換えてください。(テスト環境ではこれが推奨されています)。`prestable` もしばしば同様に利用できます。
@ -59,20 +59,20 @@ sudo yum-config-manager --add-repo https://repo.clickhouse.tech/rpm/stable/x86_6
sudo yum install clickhouse-server clickhouse-client
```
同様に、[こちら](https://repo.clickhouse.tech/rpm/stable/x86_64) からパッケージをダウンロードして、手動でインストールすることもできます。
同様に、[こちら](https://repo.clickhouse.com/rpm/stable/x86_64) からパッケージをダウンロードして、手動でインストールすることもできます。
### Tgzアーカイブから {#from-tgz-archives}
すべての Linux ディストリビューションで、`deb` や `rpm` パッケージがインストールできない場合は、公式のコンパイル済み `tgz` アーカイブを使用することをお勧めします。
必要なバージョンは、リポジトリ https://repo.clickhouse.tech/tgz/ から `curl` または `wget` でダウンロードできます。その後、ダウンロードしたアーカイブを解凍し、インストールスクリプトでインストールしてください。最新版の例は以下です:
必要なバージョンは、リポジトリ https://repo.clickhouse.com/tgz/ から `curl` または `wget` でダウンロードできます。その後、ダウンロードしたアーカイブを解凍し、インストールスクリプトでインストールしてください。最新版の例は以下です:
``` bash
export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1`
curl -O https://repo.clickhouse.tech/tgz/clickhouse-common-static-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/clickhouse-server-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/clickhouse-client-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-server-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-client-$LATEST_VERSION.tgz
tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh

View File

@ -9,12 +9,16 @@ toc_title: "Введение"
Этот раздел описывает как получить тестовые массивы данных и загрузить их в ClickHouse.
Для некоторых тестовых массивов данных также доступны тестовые запросы.
- [Анонимизированные данные Яндекс.Метрики](metrica.md)
- [Star Schema Benchmark](star-schema.md)
- [WikiStat](wikistat.md)
- [Терабайт логов кликов от Criteo](criteo.md)
- [AMPLab Big Data Benchmark](amplab-benchmark.md)
- [Данные о такси в Нью-Йорке](nyc-taxi.md)
- [OnTime](ontime.md)
- [Анонимизированные данные Яндекс.Метрики](../../getting-started/example-datasets/metrica.md)
- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
- [Набор данных кулинарных рецептов](../../getting-started/example-datasets/recipes.md)
- [WikiStat](../../getting-started/example-datasets/wikistat.md)
- [Терабайт логов кликов от Criteo](../../getting-started/example-datasets/criteo.md)
- [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md)
- [Данные о такси в Нью-Йорке](../../getting-started/example-datasets/nyc-taxi.md)
- [Набор данных о воздушном движении OpenSky Network 2020](../../getting-started/example-datasets/opensky.md)
- [Данные о стоимости недвижимости в Великобритании](../../getting-started/example-datasets/uk-price-paid.md)
- [OnTime](../../getting-started/example-datasets/ontime.md)
- [Вышки сотовой связи](../../getting-started/example-datasets/cell-towers.md)
[Оригинальная статья](https://clickhouse.tech/docs/ru/getting_started/example_datasets) <!--hide-->

View File

@ -1 +0,0 @@
../../../en/getting-started/example-datasets/opensky.md

View File

@ -0,0 +1,422 @@
---
toc_priority: 20
toc_title: Набор данных о воздушном движении OpenSky Network 2020
---
# Набор данных о воздушном движении OpenSky Network 2020 {#opensky}
"Данные в этом наборе получены и отфильтрованы из полного набора данных OpenSky, чтобы проиллюстрировать развитие воздушного движения во время пандемии COVID-19. Набор включает в себя все рейсы, которые видели более 2500 участников сети с 1 января 2019 года. Дополнительные данные будут периодически включаться в набор данных до окончания пандемии COVID-19".
Источник: https://zenodo.org/record/5092942#.YRBCyTpRXYd
Martin Strohmeier, Xavier Olive, Jannis Lübbe, Matthias Schäfer, and Vincent Lenders
"Crowdsourced air traffic data from the OpenSky Network 20192020"
Earth System Science Data 13(2), 2021
https://doi.org/10.5194/essd-13-357-2021
## Загрузите набор данных {#download-dataset}
Выполните команду:
```bash
wget -O- https://zenodo.org/record/5092942 | grep -oP 'https://zenodo.org/record/5092942/files/flightlist_\d+_\d+\.csv\.gz' | xargs wget
```
Загрузка займет около 2 минут при хорошем подключении к интернету. Будет загружено 30 файлов общим размером 4,3 ГБ.
## Создайте таблицу {#create-table}
```sql
CREATE TABLE opensky
(
callsign String,
number String,
icao24 String,
registration String,
typecode String,
origin String,
destination String,
firstseen DateTime,
lastseen DateTime,
day DateTime,
latitude_1 Float64,
longitude_1 Float64,
altitude_1 Float64,
latitude_2 Float64,
longitude_2 Float64,
altitude_2 Float64
) ENGINE = MergeTree ORDER BY (origin, destination, callsign);
```
## Импортируйте данные в ClickHouse {#import-data}
Загрузите данные в ClickHouse параллельными потоками:
```bash
ls -1 flightlist_*.csv.gz | xargs -P100 -I{} bash -c 'gzip -c -d "{}" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"'
```
- Список файлов передаётся (`ls -1 flightlist_*.csv.gz`) в `xargs` для параллельной обработки.
- `xargs -P100` указывает на возможность использования до 100 параллельных обработчиков, но поскольку у нас всего 30 файлов, то количество обработчиков будет всего 30.
- Для каждого файла `xargs` будет запускать скрипт с `bash -c`. Сценарий имеет подстановку в виде `{}`, а команда `xargs` заменяет имя файла на указанные в подстановке символы (мы указали это для `xargs` с помощью `-I{}`).
- Скрипт распакует файл (`gzip -c -d "{}"`) в стандартный вывод (параметр `-c`) и перенаправит его в `clickhouse-client`.
- Чтобы распознать формат ISO-8601 со смещениями часовых поясов в полях типа [DateTime](../../sql-reference/data-types/datetime.md), указывается параметр парсера [--date_time_input_format best_effort](../../operations/settings/settings.md#settings-date_time_input_format).
В итоге: клиент clickhouse добавит данные в таблицу `opensky`. Входные данные импортируются в формате [CSVWithNames](../../interfaces/formats.md#csvwithnames).
Загрузка параллельными потоками займёт около 24 секунд.
Также вы можете использовать вариант последовательной загрузки:
```bash
for file in flightlist_*.csv.gz; do gzip -c -d "$file" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"; done
```
## Проверьте импортированные данные {#validate-data}
Запрос:
```sql
SELECT count() FROM opensky;
```
Результат:
```text
┌──count()─┐
│ 66010819 │
└──────────┘
```
Убедитесь, что размер набора данных в ClickHouse составляет всего 2,66 GiB.
Запрос:
```sql
SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'opensky';
```
Результат:
```text
┌─formatReadableSize(total_bytes)─┐
│ 2.66 GiB │
└─────────────────────────────────┘
```
## Примеры {#run-queries}
Общее пройденное расстояние составляет 68 миллиардов километров.
Запрос:
```sql
SELECT formatReadableQuantity(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) / 1000) FROM opensky;
```
Результат:
```text
┌─formatReadableQuantity(divide(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 1000))─┐
│ 68.72 billion │
└──────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
Средняя дальность полета составляет около 1000 км.
Запрос:
```sql
SELECT avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) FROM opensky;
```
Результат:
```text
┌─avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))─┐
│ 1041090.6465708319 │
└────────────────────────────────────────────────────────────────────┘
```
### Наиболее загруженные аэропорты в указанных координатах и среднее пройденное расстояние {#busy-airports-average-distance}
Запрос:
```sql
SELECT
origin,
count(),
round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))) AS distance,
bar(distance, 0, 10000000, 100) AS bar
FROM opensky
WHERE origin != ''
GROUP BY origin
ORDER BY count() DESC
LIMIT 100;
```
Результат:
```text
┌─origin─┬─count()─┬─distance─┬─bar────────────────────────────────────┐
1. │ KORD │ 745007 │ 1546108 │ ███████████████▍ │
2. │ KDFW │ 696702 │ 1358721 │ █████████████▌ │
3. │ KATL │ 667286 │ 1169661 │ ███████████▋ │
4. │ KDEN │ 582709 │ 1287742 │ ████████████▊ │
5. │ KLAX │ 581952 │ 2628393 │ ██████████████████████████▎ │
6. │ KLAS │ 447789 │ 1336967 │ █████████████▎ │
7. │ KPHX │ 428558 │ 1345635 │ █████████████▍ │
8. │ KSEA │ 412592 │ 1757317 │ █████████████████▌ │
9. │ KCLT │ 404612 │ 880355 │ ████████▋ │
10. │ VIDP │ 363074 │ 1445052 │ ██████████████▍ │
11. │ EDDF │ 362643 │ 2263960 │ ██████████████████████▋ │
12. │ KSFO │ 361869 │ 2445732 │ ████████████████████████▍ │
13. │ KJFK │ 349232 │ 2996550 │ █████████████████████████████▊ │
14. │ KMSP │ 346010 │ 1287328 │ ████████████▋ │
15. │ LFPG │ 344748 │ 2206203 │ ██████████████████████ │
16. │ EGLL │ 341370 │ 3216593 │ ████████████████████████████████▏ │
17. │ EHAM │ 340272 │ 2116425 │ █████████████████████▏ │
18. │ KEWR │ 337696 │ 1826545 │ ██████████████████▎ │
19. │ KPHL │ 320762 │ 1291761 │ ████████████▊ │
20. │ OMDB │ 308855 │ 2855706 │ ████████████████████████████▌ │
21. │ UUEE │ 307098 │ 1555122 │ ███████████████▌ │
22. │ KBOS │ 304416 │ 1621675 │ ████████████████▏ │
23. │ LEMD │ 291787 │ 1695097 │ ████████████████▊ │
24. │ YSSY │ 272979 │ 1875298 │ ██████████████████▋ │
25. │ KMIA │ 265121 │ 1923542 │ ███████████████████▏ │
26. │ ZGSZ │ 263497 │ 745086 │ ███████▍ │
27. │ EDDM │ 256691 │ 1361453 │ █████████████▌ │
28. │ WMKK │ 254264 │ 1626688 │ ████████████████▎ │
29. │ CYYZ │ 251192 │ 2175026 │ █████████████████████▋ │
30. │ KLGA │ 248699 │ 1106935 │ ███████████ │
31. │ VHHH │ 248473 │ 3457658 │ ██████████████████████████████████▌ │
32. │ RJTT │ 243477 │ 1272744 │ ████████████▋ │
33. │ KBWI │ 241440 │ 1187060 │ ███████████▋ │
34. │ KIAD │ 239558 │ 1683485 │ ████████████████▋ │
35. │ KIAH │ 234202 │ 1538335 │ ███████████████▍ │
36. │ KFLL │ 223447 │ 1464410 │ ██████████████▋ │
37. │ KDAL │ 212055 │ 1082339 │ ██████████▋ │
38. │ KDCA │ 207883 │ 1013359 │ ██████████▏ │
39. │ LIRF │ 207047 │ 1427965 │ ██████████████▎ │
40. │ PANC │ 206007 │ 2525359 │ █████████████████████████▎ │
41. │ LTFJ │ 205415 │ 860470 │ ████████▌ │
42. │ KDTW │ 204020 │ 1106716 │ ███████████ │
43. │ VABB │ 201679 │ 1300865 │ █████████████ │
44. │ OTHH │ 200797 │ 3759544 │ █████████████████████████████████████▌ │
45. │ KMDW │ 200796 │ 1232551 │ ████████████▎ │
46. │ KSAN │ 198003 │ 1495195 │ ██████████████▊ │
47. │ KPDX │ 197760 │ 1269230 │ ████████████▋ │
48. │ SBGR │ 197624 │ 2041697 │ ████████████████████▍ │
49. │ VOBL │ 189011 │ 1040180 │ ██████████▍ │
50. │ LEBL │ 188956 │ 1283190 │ ████████████▋ │
51. │ YBBN │ 188011 │ 1253405 │ ████████████▌ │
52. │ LSZH │ 187934 │ 1572029 │ ███████████████▋ │
53. │ YMML │ 187643 │ 1870076 │ ██████████████████▋ │
54. │ RCTP │ 184466 │ 2773976 │ ███████████████████████████▋ │
55. │ KSNA │ 180045 │ 778484 │ ███████▋ │
56. │ EGKK │ 176420 │ 1694770 │ ████████████████▊ │
57. │ LOWW │ 176191 │ 1274833 │ ████████████▋ │
58. │ UUDD │ 176099 │ 1368226 │ █████████████▋ │
59. │ RKSI │ 173466 │ 3079026 │ ██████████████████████████████▋ │
60. │ EKCH │ 172128 │ 1229895 │ ████████████▎ │
61. │ KOAK │ 171119 │ 1114447 │ ███████████▏ │
62. │ RPLL │ 170122 │ 1440735 │ ██████████████▍ │
63. │ KRDU │ 167001 │ 830521 │ ████████▎ │
64. │ KAUS │ 164524 │ 1256198 │ ████████████▌ │
65. │ KBNA │ 163242 │ 1022726 │ ██████████▏ │
66. │ KSDF │ 162655 │ 1380867 │ █████████████▋ │
67. │ ENGM │ 160732 │ 910108 │ █████████ │
68. │ LIMC │ 160696 │ 1564620 │ ███████████████▋ │
69. │ KSJC │ 159278 │ 1081125 │ ██████████▋ │
70. │ KSTL │ 157984 │ 1026699 │ ██████████▎ │
71. │ UUWW │ 156811 │ 1261155 │ ████████████▌ │
72. │ KIND │ 153929 │ 987944 │ █████████▊ │
73. │ ESSA │ 153390 │ 1203439 │ ████████████ │
74. │ KMCO │ 153351 │ 1508657 │ ███████████████ │
75. │ KDVT │ 152895 │ 74048 │ ▋ │
76. │ VTBS │ 152645 │ 2255591 │ ██████████████████████▌ │
77. │ CYVR │ 149574 │ 2027413 │ ████████████████████▎ │
78. │ EIDW │ 148723 │ 1503985 │ ███████████████ │
79. │ LFPO │ 143277 │ 1152964 │ ███████████▌ │
80. │ EGSS │ 140830 │ 1348183 │ █████████████▍ │
81. │ KAPA │ 140776 │ 420441 │ ████▏ │
82. │ KHOU │ 138985 │ 1068806 │ ██████████▋ │
83. │ KTPA │ 138033 │ 1338223 │ █████████████▍ │
84. │ KFFZ │ 137333 │ 55397 │ ▌ │
85. │ NZAA │ 136092 │ 1581264 │ ███████████████▋ │
86. │ YPPH │ 133916 │ 1271550 │ ████████████▋ │
87. │ RJBB │ 133522 │ 1805623 │ ██████████████████ │
88. │ EDDL │ 133018 │ 1265919 │ ████████████▋ │
89. │ ULLI │ 130501 │ 1197108 │ ███████████▊ │
90. │ KIWA │ 127195 │ 250876 │ ██▌ │
91. │ KTEB │ 126969 │ 1189414 │ ███████████▊ │
92. │ VOMM │ 125616 │ 1127757 │ ███████████▎ │
93. │ LSGG │ 123998 │ 1049101 │ ██████████▍ │
94. │ LPPT │ 122733 │ 1779187 │ █████████████████▋ │
95. │ WSSS │ 120493 │ 3264122 │ ████████████████████████████████▋ │
96. │ EBBR │ 118539 │ 1579939 │ ███████████████▋ │
97. │ VTBD │ 118107 │ 661627 │ ██████▌ │
98. │ KVNY │ 116326 │ 692960 │ ██████▊ │
99. │ EDDT │ 115122 │ 941740 │ █████████▍ │
100. │ EFHK │ 114860 │ 1629143 │ ████████████████▎ │
└────────┴─────────┴──────────┴────────────────────────────────────────┘
```
### Номера рейсов из трех крупных аэропортов Москвы, еженедельно {#flights-from-moscow}
Запрос:
```sql
SELECT
toMonday(day) AS k,
count() AS c,
bar(c, 0, 10000, 100) AS bar
FROM opensky
WHERE origin IN ('UUEE', 'UUDD', 'UUWW')
GROUP BY k
ORDER BY k ASC;
```
Результат:
```text
┌──────────k─┬────c─┬─bar──────────────────────────────────────────────────────────────────────────┐
1. │ 2018-12-31 │ 5248 │ ████████████████████████████████████████████████████▍ │
2. │ 2019-01-07 │ 6302 │ ███████████████████████████████████████████████████████████████ │
3. │ 2019-01-14 │ 5701 │ █████████████████████████████████████████████████████████ │
4. │ 2019-01-21 │ 5638 │ ████████████████████████████████████████████████████████▍ │
5. │ 2019-01-28 │ 5731 │ █████████████████████████████████████████████████████████▎ │
6. │ 2019-02-04 │ 5683 │ ████████████████████████████████████████████████████████▋ │
7. │ 2019-02-11 │ 5759 │ █████████████████████████████████████████████████████████▌ │
8. │ 2019-02-18 │ 5736 │ █████████████████████████████████████████████████████████▎ │
9. │ 2019-02-25 │ 5873 │ ██████████████████████████████████████████████████████████▋ │
10. │ 2019-03-04 │ 5965 │ ███████████████████████████████████████████████████████████▋ │
11. │ 2019-03-11 │ 5900 │ ███████████████████████████████████████████████████████████ │
12. │ 2019-03-18 │ 5823 │ ██████████████████████████████████████████████████████████▏ │
13. │ 2019-03-25 │ 5899 │ ██████████████████████████████████████████████████████████▊ │
14. │ 2019-04-01 │ 6043 │ ████████████████████████████████████████████████████████████▍ │
15. │ 2019-04-08 │ 6098 │ ████████████████████████████████████████████████████████████▊ │
16. │ 2019-04-15 │ 6196 │ █████████████████████████████████████████████████████████████▊ │
17. │ 2019-04-22 │ 6486 │ ████████████████████████████████████████████████████████████████▋ │
18. │ 2019-04-29 │ 6682 │ ██████████████████████████████████████████████████████████████████▋ │
19. │ 2019-05-06 │ 6739 │ ███████████████████████████████████████████████████████████████████▍ │
20. │ 2019-05-13 │ 6600 │ ██████████████████████████████████████████████████████████████████ │
21. │ 2019-05-20 │ 6575 │ █████████████████████████████████████████████████████████████████▋ │
22. │ 2019-05-27 │ 6786 │ ███████████████████████████████████████████████████████████████████▋ │
23. │ 2019-06-03 │ 6872 │ ████████████████████████████████████████████████████████████████████▋ │
24. │ 2019-06-10 │ 7045 │ ██████████████████████████████████████████████████████████████████████▍ │
25. │ 2019-06-17 │ 7045 │ ██████████████████████████████████████████████████████████████████████▍ │
26. │ 2019-06-24 │ 6852 │ ████████████████████████████████████████████████████████████████████▌ │
27. │ 2019-07-01 │ 7248 │ ████████████████████████████████████████████████████████████████████████▍ │
28. │ 2019-07-08 │ 7284 │ ████████████████████████████████████████████████████████████████████████▋ │
29. │ 2019-07-15 │ 7142 │ ███████████████████████████████████████████████████████████████████████▍ │
30. │ 2019-07-22 │ 7108 │ ███████████████████████████████████████████████████████████████████████ │
31. │ 2019-07-29 │ 7251 │ ████████████████████████████████████████████████████████████████████████▌ │
32. │ 2019-08-05 │ 7403 │ ██████████████████████████████████████████████████████████████████████████ │
33. │ 2019-08-12 │ 7457 │ ██████████████████████████████████████████████████████████████████████████▌ │
34. │ 2019-08-19 │ 7502 │ ███████████████████████████████████████████████████████████████████████████ │
35. │ 2019-08-26 │ 7540 │ ███████████████████████████████████████████████████████████████████████████▍ │
36. │ 2019-09-02 │ 7237 │ ████████████████████████████████████████████████████████████████████████▎ │
37. │ 2019-09-09 │ 7328 │ █████████████████████████████████████████████████████████████████████████▎ │
38. │ 2019-09-16 │ 5566 │ ███████████████████████████████████████████████████████▋ │
39. │ 2019-09-23 │ 7049 │ ██████████████████████████████████████████████████████████████████████▍ │
40. │ 2019-09-30 │ 6880 │ ████████████████████████████████████████████████████████████████████▋ │
41. │ 2019-10-07 │ 6518 │ █████████████████████████████████████████████████████████████████▏ │
42. │ 2019-10-14 │ 6688 │ ██████████████████████████████████████████████████████████████████▊ │
43. │ 2019-10-21 │ 6667 │ ██████████████████████████████████████████████████████████████████▋ │
44. │ 2019-10-28 │ 6303 │ ███████████████████████████████████████████████████████████████ │
45. │ 2019-11-04 │ 6298 │ ██████████████████████████████████████████████████████████████▊ │
46. │ 2019-11-11 │ 6137 │ █████████████████████████████████████████████████████████████▎ │
47. │ 2019-11-18 │ 6051 │ ████████████████████████████████████████████████████████████▌ │
48. │ 2019-11-25 │ 5820 │ ██████████████████████████████████████████████████████████▏ │
49. │ 2019-12-02 │ 5942 │ ███████████████████████████████████████████████████████████▍ │
50. │ 2019-12-09 │ 4891 │ ████████████████████████████████████████████████▊ │
51. │ 2019-12-16 │ 5682 │ ████████████████████████████████████████████████████████▋ │
52. │ 2019-12-23 │ 6111 │ █████████████████████████████████████████████████████████████ │
53. │ 2019-12-30 │ 5870 │ ██████████████████████████████████████████████████████████▋ │
54. │ 2020-01-06 │ 5953 │ ███████████████████████████████████████████████████████████▌ │
55. │ 2020-01-13 │ 5698 │ ████████████████████████████████████████████████████████▊ │
56. │ 2020-01-20 │ 5339 │ █████████████████████████████████████████████████████▍ │
57. │ 2020-01-27 │ 5566 │ ███████████████████████████████████████████████████████▋ │
58. │ 2020-02-03 │ 5801 │ ██████████████████████████████████████████████████████████ │
59. │ 2020-02-10 │ 5692 │ ████████████████████████████████████████████████████████▊ │
60. │ 2020-02-17 │ 5912 │ ███████████████████████████████████████████████████████████ │
61. │ 2020-02-24 │ 6031 │ ████████████████████████████████████████████████████████████▎ │
62. │ 2020-03-02 │ 6105 │ █████████████████████████████████████████████████████████████ │
63. │ 2020-03-09 │ 5823 │ ██████████████████████████████████████████████████████████▏ │
64. │ 2020-03-16 │ 4659 │ ██████████████████████████████████████████████▌ │
65. │ 2020-03-23 │ 3720 │ █████████████████████████████████████▏ │
66. │ 2020-03-30 │ 1720 │ █████████████████▏ │
67. │ 2020-04-06 │ 849 │ ████████▍ │
68. │ 2020-04-13 │ 710 │ ███████ │
69. │ 2020-04-20 │ 725 │ ███████▏ │
70. │ 2020-04-27 │ 920 │ █████████▏ │
71. │ 2020-05-04 │ 859 │ ████████▌ │
72. │ 2020-05-11 │ 1047 │ ██████████▍ │
73. │ 2020-05-18 │ 1135 │ ███████████▎ │
74. │ 2020-05-25 │ 1266 │ ████████████▋ │
75. │ 2020-06-01 │ 1793 │ █████████████████▊ │
76. │ 2020-06-08 │ 1979 │ ███████████████████▋ │
77. │ 2020-06-15 │ 2297 │ ██████████████████████▊ │
78. │ 2020-06-22 │ 2788 │ ███████████████████████████▊ │
79. │ 2020-06-29 │ 3389 │ █████████████████████████████████▊ │
80. │ 2020-07-06 │ 3545 │ ███████████████████████████████████▍ │
81. │ 2020-07-13 │ 3569 │ ███████████████████████████████████▋ │
82. │ 2020-07-20 │ 3784 │ █████████████████████████████████████▋ │
83. │ 2020-07-27 │ 3960 │ ███████████████████████████████████████▌ │
84. │ 2020-08-03 │ 4323 │ ███████████████████████████████████████████▏ │
85. │ 2020-08-10 │ 4581 │ █████████████████████████████████████████████▋ │
86. │ 2020-08-17 │ 4791 │ ███████████████████████████████████████████████▊ │
87. │ 2020-08-24 │ 4928 │ █████████████████████████████████████████████████▎ │
88. │ 2020-08-31 │ 4687 │ ██████████████████████████████████████████████▋ │
89. │ 2020-09-07 │ 4643 │ ██████████████████████████████████████████████▍ │
90. │ 2020-09-14 │ 4594 │ █████████████████████████████████████████████▊ │
91. │ 2020-09-21 │ 4478 │ ████████████████████████████████████████████▋ │
92. │ 2020-09-28 │ 4382 │ ███████████████████████████████████████████▋ │
93. │ 2020-10-05 │ 4261 │ ██████████████████████████████████████████▌ │
94. │ 2020-10-12 │ 4243 │ ██████████████████████████████████████████▍ │
95. │ 2020-10-19 │ 3941 │ ███████████████████████████████████████▍ │
96. │ 2020-10-26 │ 3616 │ ████████████████████████████████████▏ │
97. │ 2020-11-02 │ 3586 │ ███████████████████████████████████▋ │
98. │ 2020-11-09 │ 3403 │ ██████████████████████████████████ │
99. │ 2020-11-16 │ 3336 │ █████████████████████████████████▎ │
100. │ 2020-11-23 │ 3230 │ ████████████████████████████████▎ │
101. │ 2020-11-30 │ 3183 │ ███████████████████████████████▋ │
102. │ 2020-12-07 │ 3285 │ ████████████████████████████████▋ │
103. │ 2020-12-14 │ 3367 │ █████████████████████████████████▋ │
104. │ 2020-12-21 │ 3748 │ █████████████████████████████████████▍ │
105. │ 2020-12-28 │ 3986 │ ███████████████████████████████████████▋ │
106. │ 2021-01-04 │ 3906 │ ███████████████████████████████████████ │
107. │ 2021-01-11 │ 3425 │ ██████████████████████████████████▎ │
108. │ 2021-01-18 │ 3144 │ ███████████████████████████████▍ │
109. │ 2021-01-25 │ 3115 │ ███████████████████████████████▏ │
110. │ 2021-02-01 │ 3285 │ ████████████████████████████████▋ │
111. │ 2021-02-08 │ 3321 │ █████████████████████████████████▏ │
112. │ 2021-02-15 │ 3475 │ ██████████████████████████████████▋ │
113. │ 2021-02-22 │ 3549 │ ███████████████████████████████████▍ │
114. │ 2021-03-01 │ 3755 │ █████████████████████████████████████▌ │
115. │ 2021-03-08 │ 3080 │ ██████████████████████████████▋ │
116. │ 2021-03-15 │ 3789 │ █████████████████████████████████████▊ │
117. │ 2021-03-22 │ 3804 │ ██████████████████████████████████████ │
118. │ 2021-03-29 │ 4238 │ ██████████████████████████████████████████▍ │
119. │ 2021-04-05 │ 4307 │ ███████████████████████████████████████████ │
120. │ 2021-04-12 │ 4225 │ ██████████████████████████████████████████▎ │
121. │ 2021-04-19 │ 4391 │ ███████████████████████████████████████████▊ │
122. │ 2021-04-26 │ 4868 │ ████████████████████████████████████████████████▋ │
123. │ 2021-05-03 │ 4977 │ █████████████████████████████████████████████████▋ │
124. │ 2021-05-10 │ 5164 │ ███████████████████████████████████████████████████▋ │
125. │ 2021-05-17 │ 4986 │ █████████████████████████████████████████████████▋ │
126. │ 2021-05-24 │ 5024 │ ██████████████████████████████████████████████████▏ │
127. │ 2021-05-31 │ 4824 │ ████████████████████████████████████████████████▏ │
128. │ 2021-06-07 │ 5652 │ ████████████████████████████████████████████████████████▌ │
129. │ 2021-06-14 │ 5613 │ ████████████████████████████████████████████████████████▏ │
130. │ 2021-06-21 │ 6061 │ ████████████████████████████████████████████████████████████▌ │
131. │ 2021-06-28 │ 2554 │ █████████████████████████▌ │
└────────────┴──────┴──────────────────────────────────────────────────────────────────────────────┘
```
### Online Playground {#playground}
Вы можете протестировать другие запросы к этому набору данным с помощью интерактивного ресурса [Online Playground](https://gh-api.clickhouse.tech/play?user=play). Например, [вот так](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUCiAgICBvcmlnaW4sCiAgICBjb3VudCgpLAogICAgcm91bmQoYXZnKGdlb0Rpc3RhbmNlKGxvbmdpdHVkZV8xLCBsYXRpdHVkZV8xLCBsb25naXR1ZGVfMiwgbGF0aXR1ZGVfMikpKSBBUyBkaXN0YW5jZSwKICAgIGJhcihkaXN0YW5jZSwgMCwgMTAwMDAwMDAsIDEwMCkgQVMgYmFyCkZST00gb3BlbnNreQpXSEVSRSBvcmlnaW4gIT0gJycKR1JPVVAgQlkgb3JpZ2luCk9SREVSIEJZIGNvdW50KCkgREVTQwpMSU1JVCAxMDA=). Однако обратите внимание, что здесь нельзя создавать временные таблицы.

View File

@ -1 +0,0 @@
../../../en/getting-started/example-datasets/uk-price-paid.md

View File

@ -0,0 +1,650 @@
---
toc_priority: 20
toc_title: Набор данных о стоимости недвижимости в Великобритании
---
# Набор данных о стоимости недвижимости в Великобритании {#uk-property-price-paid}
Набор содержит данные о стоимости недвижимости в Англии и Уэльсе. Данные доступны с 1995 года.
Размер набора данных в несжатом виде составляет около 4 GiB, а в ClickHouse он займет около 278 MiB.
Источник: https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads
Описание полей таблицы: https://www.gov.uk/guidance/about-the-price-paid-data
Набор содержит данные HM Land Registry data © Crown copyright and database right 2021. Эти данные лицензированы в соответствии с Open Government Licence v3.0.
## Загрузите набор данных {#download-dataset}
Выполните команду:
```bash
wget http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv
```
Загрузка займет около 2 минут при хорошем подключении к интернету.
## Создайте таблицу {#create-table}
```sql
CREATE TABLE uk_price_paid
(
price UInt32,
date Date,
postcode1 LowCardinality(String),
postcode2 LowCardinality(String),
type Enum8('terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4, 'other' = 0),
is_new UInt8,
duration Enum8('freehold' = 1, 'leasehold' = 2, 'unknown' = 0),
addr1 String,
addr2 String,
street LowCardinality(String),
locality LowCardinality(String),
town LowCardinality(String),
district LowCardinality(String),
county LowCardinality(String),
category UInt8
) ENGINE = MergeTree ORDER BY (postcode1, postcode2, addr1, addr2);
```
## Обработайте и импортируйте данные {#preprocess-import-data}
В этом примере используется `clickhouse-local` для предварительной обработки данных и `clickhouse-client` для импорта данных.
Указывается структура исходных данных CSV-файла и запрос для предварительной обработки данных с помощью `clickhouse-local`.
Предварительная обработка включает:
- разделение почтового индекса на два разных столбца `postcode1` и `postcode2`, что лучше подходит для хранения данных и выполнения запросов к ним;
- преобразование поля `time` в дату, поскольку оно содержит только время 00:00;
- поле [UUid](../../sql-reference/data-types/uuid.md) игнорируется, потому что оно не будет использовано для анализа;
- преобразование полей `type` и `duration` в более читаемые поля типа `Enum` с помощью функции [transform](../../sql-reference/functions/other-functions.md#transform);
- преобразование полей `is_new` и `category` из односимвольной строки (`Y`/`N` и `A`/`B`) в поле [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256) со значениями 0 и 1 соответственно.
Обработанные данные передаются в `clickhouse-client` и импортируются в таблицу ClickHouse потоковым способом.
```bash
clickhouse-local --input-format CSV --structure '
uuid String,
price UInt32,
time DateTime,
postcode String,
a String,
b String,
c String,
addr1 String,
addr2 String,
street String,
locality String,
town String,
district String,
county String,
d String,
e String
' --query "
WITH splitByChar(' ', postcode) AS p
SELECT
price,
toDate(time) AS date,
p[1] AS postcode1,
p[2] AS postcode2,
transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
b = 'Y' AS is_new,
transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
addr1,
addr2,
street,
locality,
town,
district,
county,
d = 'B' AS category
FROM table" --date_time_input_format best_effort < pp-complete.csv | clickhouse-client --query "INSERT INTO uk_price_paid FORMAT TSV"
```
Выполнение запроса займет около 40 секунд.
## Проверьте импортированные данные {#validate-data}
Запрос:
```sql
SELECT count() FROM uk_price_paid;
```
Результат:
```text
┌──count()─┐
│ 26321785 │
└──────────┘
```
Размер набора данных в ClickHouse составляет всего 278 MiB, проверьте это.
Запрос:
```sql
SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'uk_price_paid';
```
Результат:
```text
┌─formatReadableSize(total_bytes)─┐
│ 278.80 MiB │
└─────────────────────────────────┘
```
## Примеры запросов {#run-queries}
### Запрос 1. Средняя цена за год {#average-price}
Запрос:
```sql
SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 1000000, 80) FROM uk_price_paid GROUP BY year ORDER BY year;
```
Результат:
```text
┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
│ 1995 │ 67932 │ █████▍ │
│ 1996 │ 71505 │ █████▋ │
│ 1997 │ 78532 │ ██████▎ │
│ 1998 │ 85436 │ ██████▋ │
│ 1999 │ 96037 │ ███████▋ │
│ 2000 │ 107479 │ ████████▌ │
│ 2001 │ 118885 │ █████████▌ │
│ 2002 │ 137941 │ ███████████ │
│ 2003 │ 155889 │ ████████████▍ │
│ 2004 │ 178885 │ ██████████████▎ │
│ 2005 │ 189351 │ ███████████████▏ │
│ 2006 │ 203528 │ ████████████████▎ │
│ 2007 │ 219378 │ █████████████████▌ │
│ 2008 │ 217056 │ █████████████████▎ │
│ 2009 │ 213419 │ █████████████████ │
│ 2010 │ 236109 │ ██████████████████▊ │
│ 2011 │ 232805 │ ██████████████████▌ │
│ 2012 │ 238367 │ ███████████████████ │
│ 2013 │ 256931 │ ████████████████████▌ │
│ 2014 │ 279915 │ ██████████████████████▍ │
│ 2015 │ 297266 │ ███████████████████████▋ │
│ 2016 │ 313201 │ █████████████████████████ │
│ 2017 │ 346097 │ ███████████████████████████▋ │
│ 2018 │ 350116 │ ████████████████████████████ │
│ 2019 │ 351013 │ ████████████████████████████ │
│ 2020 │ 369420 │ █████████████████████████████▌ │
│ 2021 │ 386903 │ ██████████████████████████████▊ │
└──────┴────────┴────────────────────────────────────────┘
```
### Запрос 2. Средняя цена за год в Лондоне {#average-price-london}
Запрос:
```sql
SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 2000000, 100) FROM uk_price_paid WHERE town = 'LONDON' GROUP BY year ORDER BY year;
```
Результат:
```text
┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐
│ 1995 │ 109116 │ █████▍ │
│ 1996 │ 118667 │ █████▊ │
│ 1997 │ 136518 │ ██████▋ │
│ 1998 │ 152983 │ ███████▋ │
│ 1999 │ 180637 │ █████████ │
│ 2000 │ 215838 │ ██████████▋ │
│ 2001 │ 232994 │ ███████████▋ │
│ 2002 │ 263670 │ █████████████▏ │
│ 2003 │ 278394 │ █████████████▊ │
│ 2004 │ 304666 │ ███████████████▏ │
│ 2005 │ 322875 │ ████████████████▏ │
│ 2006 │ 356191 │ █████████████████▋ │
│ 2007 │ 404054 │ ████████████████████▏ │
│ 2008 │ 420741 │ █████████████████████ │
│ 2009 │ 427753 │ █████████████████████▍ │
│ 2010 │ 480306 │ ████████████████████████ │
│ 2011 │ 496274 │ ████████████████████████▋ │
│ 2012 │ 519442 │ █████████████████████████▊ │
│ 2013 │ 616212 │ ██████████████████████████████▋ │
│ 2014 │ 724154 │ ████████████████████████████████████▏ │
│ 2015 │ 792129 │ ███████████████████████████████████████▌ │
│ 2016 │ 843655 │ ██████████████████████████████████████████▏ │
│ 2017 │ 982642 │ █████████████████████████████████████████████████▏ │
│ 2018 │ 1016835 │ ██████████████████████████████████████████████████▋ │
│ 2019 │ 1042849 │ ████████████████████████████████████████████████████▏ │
│ 2020 │ 1011889 │ ██████████████████████████████████████████████████▌ │
│ 2021 │ 960343 │ ████████████████████████████████████████████████ │
└──────┴─────────┴───────────────────────────────────────────────────────┘
```
Что-то случилось в 2013 году. Я понятия не имею. Может быть, вы имеете представление о том, что произошло в 2020 году?
### Запрос 3. Самые дорогие районы {#most-expensive-neighborhoods}
Запрос:
```sql
SELECT
town,
district,
count() AS c,
round(avg(price)) AS price,
bar(price, 0, 5000000, 100)
FROM uk_price_paid
WHERE date >= '2020-01-01'
GROUP BY
town,
district
HAVING c >= 100
ORDER BY price DESC
LIMIT 100;
```
Результат:
```text
┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐
│ LONDON │ CITY OF WESTMINSTER │ 3606 │ 3280239 │ █████████████████████████████████████████████████████████████████▌ │
│ LONDON │ CITY OF LONDON │ 274 │ 3160502 │ ███████████████████████████████████████████████████████████████▏ │
│ LONDON │ KENSINGTON AND CHELSEA │ 2550 │ 2308478 │ ██████████████████████████████████████████████▏ │
│ LEATHERHEAD │ ELMBRIDGE │ 114 │ 1897407 │ █████████████████████████████████████▊ │
│ LONDON │ CAMDEN │ 3033 │ 1805404 │ ████████████████████████████████████ │
│ VIRGINIA WATER │ RUNNYMEDE │ 156 │ 1753247 │ ███████████████████████████████████ │
│ WINDLESHAM │ SURREY HEATH │ 108 │ 1677613 │ █████████████████████████████████▌ │
│ THORNTON HEATH │ CROYDON │ 546 │ 1671721 │ █████████████████████████████████▍ │
│ BARNET │ ENFIELD │ 124 │ 1505840 │ ██████████████████████████████ │
│ COBHAM │ ELMBRIDGE │ 387 │ 1237250 │ ████████████████████████▋ │
│ LONDON │ ISLINGTON │ 2668 │ 1236980 │ ████████████████████████▋ │
│ OXFORD │ SOUTH OXFORDSHIRE │ 321 │ 1220907 │ ████████████████████████▍ │
│ LONDON │ RICHMOND UPON THAMES │ 704 │ 1215551 │ ████████████████████████▎ │
│ LONDON │ HOUNSLOW │ 671 │ 1207493 │ ████████████████████████▏ │
│ ASCOT │ WINDSOR AND MAIDENHEAD │ 407 │ 1183299 │ ███████████████████████▋ │
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 330 │ 1175615 │ ███████████████████████▌ │
│ RICHMOND │ RICHMOND UPON THAMES │ 874 │ 1110444 │ ██████████████████████▏ │
│ LONDON │ HAMMERSMITH AND FULHAM │ 3086 │ 1053983 │ █████████████████████ │
│ SURBITON │ ELMBRIDGE │ 100 │ 1011800 │ ████████████████████▏ │
│ RADLETT │ HERTSMERE │ 283 │ 1011712 │ ████████████████████▏ │
│ SALCOMBE │ SOUTH HAMS │ 127 │ 1011624 │ ████████████████████▏ │
│ WEYBRIDGE │ ELMBRIDGE │ 655 │ 1007265 │ ████████████████████▏ │
│ ESHER │ ELMBRIDGE │ 485 │ 986581 │ ███████████████████▋ │
│ LEATHERHEAD │ GUILDFORD │ 202 │ 977320 │ ███████████████████▌ │
│ BURFORD │ WEST OXFORDSHIRE │ 111 │ 966893 │ ███████████████████▎ │
│ BROCKENHURST │ NEW FOREST │ 129 │ 956675 │ ███████████████████▏ │
│ HINDHEAD │ WAVERLEY │ 137 │ 953753 │ ███████████████████ │
│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 419 │ 951121 │ ███████████████████ │
│ EAST MOLESEY │ ELMBRIDGE │ 192 │ 936769 │ ██████████████████▋ │
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 146 │ 925515 │ ██████████████████▌ │
│ LONDON │ TOWER HAMLETS │ 4388 │ 918304 │ ██████████████████▎ │
│ OLNEY │ MILTON KEYNES │ 235 │ 910646 │ ██████████████████▏ │
│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 540 │ 902418 │ ██████████████████ │
│ LONDON │ SOUTHWARK │ 3885 │ 892997 │ █████████████████▋ │
│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 960 │ 885969 │ █████████████████▋ │
│ LONDON │ EALING │ 2658 │ 871755 │ █████████████████▍ │
│ CRANBROOK │ TUNBRIDGE WELLS │ 431 │ 862348 │ █████████████████▏ │
│ LONDON │ MERTON │ 2099 │ 859118 │ █████████████████▏ │
│ BELVEDERE │ BEXLEY │ 346 │ 842423 │ ████████████████▋ │
│ GUILDFORD │ WAVERLEY │ 143 │ 841277 │ ████████████████▋ │
│ HARPENDEN │ ST ALBANS │ 657 │ 841216 │ ████████████████▋ │
│ LONDON │ HACKNEY │ 3307 │ 837090 │ ████████████████▋ │
│ LONDON │ WANDSWORTH │ 6566 │ 832663 │ ████████████████▋ │
│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 123 │ 824299 │ ████████████████▍ │
│ KINGS LANGLEY │ DACORUM │ 145 │ 821331 │ ████████████████▍ │
│ BERKHAMSTED │ DACORUM │ 543 │ 818415 │ ████████████████▎ │
│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 226 │ 802807 │ ████████████████ │
│ BILLINGSHURST │ CHICHESTER │ 144 │ 797829 │ ███████████████▊ │
│ WOKING │ GUILDFORD │ 176 │ 793494 │ ███████████████▋ │
│ STOCKBRIDGE │ TEST VALLEY │ 178 │ 793269 │ ███████████████▋ │
│ EPSOM │ REIGATE AND BANSTEAD │ 172 │ 791862 │ ███████████████▋ │
│ TONBRIDGE │ TUNBRIDGE WELLS │ 360 │ 787876 │ ███████████████▋ │
│ TEDDINGTON │ RICHMOND UPON THAMES │ 595 │ 786492 │ ███████████████▋ │
│ TWICKENHAM │ RICHMOND UPON THAMES │ 1155 │ 786193 │ ███████████████▋ │
│ LYNDHURST │ NEW FOREST │ 102 │ 785593 │ ███████████████▋ │
│ LONDON │ LAMBETH │ 5228 │ 774574 │ ███████████████▍ │
│ LONDON │ BARNET │ 3955 │ 773259 │ ███████████████▍ │
│ OXFORD │ VALE OF WHITE HORSE │ 353 │ 772088 │ ███████████████▍ │
│ TONBRIDGE │ MAIDSTONE │ 305 │ 770740 │ ███████████████▍ │
│ LUTTERWORTH │ HARBOROUGH │ 538 │ 768634 │ ███████████████▎ │
│ WOODSTOCK │ WEST OXFORDSHIRE │ 140 │ 766037 │ ███████████████▎ │
│ MIDHURST │ CHICHESTER │ 257 │ 764815 │ ███████████████▎ │
│ MARLOW │ BUCKINGHAMSHIRE │ 327 │ 761876 │ ███████████████▏ │
│ LONDON │ NEWHAM │ 3237 │ 761784 │ ███████████████▏ │
│ ALDERLEY EDGE │ CHESHIRE EAST │ 178 │ 757318 │ ███████████████▏ │
│ LUTON │ CENTRAL BEDFORDSHIRE │ 212 │ 754283 │ ███████████████ │
│ PETWORTH │ CHICHESTER │ 154 │ 754220 │ ███████████████ │
│ ALRESFORD │ WINCHESTER │ 219 │ 752718 │ ███████████████ │
│ POTTERS BAR │ WELWYN HATFIELD │ 174 │ 748465 │ ██████████████▊ │
│ HASLEMERE │ CHICHESTER │ 128 │ 746907 │ ██████████████▊ │
│ TADWORTH │ REIGATE AND BANSTEAD │ 502 │ 743252 │ ██████████████▋ │
│ THAMES DITTON │ ELMBRIDGE │ 244 │ 741913 │ ██████████████▋ │
│ REIGATE │ REIGATE AND BANSTEAD │ 581 │ 738198 │ ██████████████▋ │
│ BOURNE END │ BUCKINGHAMSHIRE │ 138 │ 735190 │ ██████████████▋ │
│ SEVENOAKS │ SEVENOAKS │ 1156 │ 730018 │ ██████████████▌ │
│ OXTED │ TANDRIDGE │ 336 │ 729123 │ ██████████████▌ │
│ INGATESTONE │ BRENTWOOD │ 166 │ 728103 │ ██████████████▌ │
│ LONDON │ BRENT │ 2079 │ 720605 │ ██████████████▍ │
│ LONDON │ HARINGEY │ 3216 │ 717780 │ ██████████████▎ │
│ PURLEY │ CROYDON │ 575 │ 716108 │ ██████████████▎ │
│ WELWYN │ WELWYN HATFIELD │ 222 │ 710603 │ ██████████████▏ │
│ RICKMANSWORTH │ THREE RIVERS │ 798 │ 704571 │ ██████████████ │
│ BANSTEAD │ REIGATE AND BANSTEAD │ 401 │ 701293 │ ██████████████ │
│ CHIGWELL │ EPPING FOREST │ 261 │ 701203 │ ██████████████ │
│ PINNER │ HARROW │ 528 │ 698885 │ █████████████▊ │
│ HASLEMERE │ WAVERLEY │ 280 │ 696659 │ █████████████▊ │
│ SLOUGH │ BUCKINGHAMSHIRE │ 396 │ 694917 │ █████████████▊ │
│ WALTON-ON-THAMES │ ELMBRIDGE │ 946 │ 692395 │ █████████████▋ │
│ READING │ SOUTH OXFORDSHIRE │ 318 │ 691988 │ █████████████▋ │
│ NORTHWOOD │ HILLINGDON │ 271 │ 690643 │ █████████████▋ │
│ FELTHAM │ HOUNSLOW │ 763 │ 688595 │ █████████████▋ │
│ ASHTEAD │ MOLE VALLEY │ 303 │ 687923 │ █████████████▋ │
│ BARNET │ BARNET │ 975 │ 686980 │ █████████████▋ │
│ WOKING │ SURREY HEATH │ 283 │ 686669 │ █████████████▋ │
│ MALMESBURY │ WILTSHIRE │ 323 │ 683324 │ █████████████▋ │
│ AMERSHAM │ BUCKINGHAMSHIRE │ 496 │ 680962 │ █████████████▌ │
│ CHISLEHURST │ BROMLEY │ 430 │ 680209 │ █████████████▌ │
│ HYTHE │ FOLKESTONE AND HYTHE │ 490 │ 676908 │ █████████████▌ │
│ MAYFIELD │ WEALDEN │ 101 │ 676210 │ █████████████▌ │
│ ASCOT │ BRACKNELL FOREST │ 168 │ 676004 │ █████████████▌ │
└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘
```
## Ускорьте запросы с помощью проекций {#speedup-with-projections}
[Проекции](../../sql-reference/statements/alter/projection.md) позволяют повысить скорость запросов за счет хранения предварительно агрегированных данных.
### Создайте проекцию {#build-projection}
Создайте агрегирующую проекцию по параметрам `toYear(date)`, `district`, `town`:
```sql
ALTER TABLE uk_price_paid
ADD PROJECTION projection_by_year_district_town
(
SELECT
toYear(date),
district,
town,
avg(price),
sum(price),
count()
GROUP BY
toYear(date),
district,
town
);
```
Заполните проекцию для текущих данных (иначе проекция будет создана только для добавляемых данных):
```sql
ALTER TABLE uk_price_paid
MATERIALIZE PROJECTION projection_by_year_district_town
SETTINGS mutations_sync = 1;
```
## Проверьте производительность {#test-performance}
Давайте выполним те же 3 запроса.
[Включите](../../operations/settings/settings.md#allow-experimental-projection-optimization) поддержку проекций:
```sql
SET allow_experimental_projection_optimization = 1;
```
### Запрос 1. Средняя цена за год {#average-price-projections}
Запрос:
```sql
SELECT
toYear(date) AS year,
round(avg(price)) AS price,
bar(price, 0, 1000000, 80)
FROM uk_price_paid
GROUP BY year
ORDER BY year ASC;
```
Результат:
```text
┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
│ 1995 │ 67932 │ █████▍ │
│ 1996 │ 71505 │ █████▋ │
│ 1997 │ 78532 │ ██████▎ │
│ 1998 │ 85436 │ ██████▋ │
│ 1999 │ 96037 │ ███████▋ │
│ 2000 │ 107479 │ ████████▌ │
│ 2001 │ 118885 │ █████████▌ │
│ 2002 │ 137941 │ ███████████ │
│ 2003 │ 155889 │ ████████████▍ │
│ 2004 │ 178885 │ ██████████████▎ │
│ 2005 │ 189351 │ ███████████████▏ │
│ 2006 │ 203528 │ ████████████████▎ │
│ 2007 │ 219378 │ █████████████████▌ │
│ 2008 │ 217056 │ █████████████████▎ │
│ 2009 │ 213419 │ █████████████████ │
│ 2010 │ 236109 │ ██████████████████▊ │
│ 2011 │ 232805 │ ██████████████████▌ │
│ 2012 │ 238367 │ ███████████████████ │
│ 2013 │ 256931 │ ████████████████████▌ │
│ 2014 │ 279915 │ ██████████████████████▍ │
│ 2015 │ 297266 │ ███████████████████████▋ │
│ 2016 │ 313201 │ █████████████████████████ │
│ 2017 │ 346097 │ ███████████████████████████▋ │
│ 2018 │ 350116 │ ████████████████████████████ │
│ 2019 │ 351013 │ ████████████████████████████ │
│ 2020 │ 369420 │ █████████████████████████████▌ │
│ 2021 │ 386903 │ ██████████████████████████████▊ │
└──────┴────────┴────────────────────────────────────────┘
```
### Запрос 2. Средняя цена за год в Лондоне {#average-price-london-projections}
Запрос:
```sql
SELECT
toYear(date) AS year,
round(avg(price)) AS price,
bar(price, 0, 2000000, 100)
FROM uk_price_paid
WHERE town = 'LONDON'
GROUP BY year
ORDER BY year ASC;
```
Результат:
```text
┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐
│ 1995 │ 109116 │ █████▍ │
│ 1996 │ 118667 │ █████▊ │
│ 1997 │ 136518 │ ██████▋ │
│ 1998 │ 152983 │ ███████▋ │
│ 1999 │ 180637 │ █████████ │
│ 2000 │ 215838 │ ██████████▋ │
│ 2001 │ 232994 │ ███████████▋ │
│ 2002 │ 263670 │ █████████████▏ │
│ 2003 │ 278394 │ █████████████▊ │
│ 2004 │ 304666 │ ███████████████▏ │
│ 2005 │ 322875 │ ████████████████▏ │
│ 2006 │ 356191 │ █████████████████▋ │
│ 2007 │ 404054 │ ████████████████████▏ │
│ 2008 │ 420741 │ █████████████████████ │
│ 2009 │ 427753 │ █████████████████████▍ │
│ 2010 │ 480306 │ ████████████████████████ │
│ 2011 │ 496274 │ ████████████████████████▋ │
│ 2012 │ 519442 │ █████████████████████████▊ │
│ 2013 │ 616212 │ ██████████████████████████████▋ │
│ 2014 │ 724154 │ ████████████████████████████████████▏ │
│ 2015 │ 792129 │ ███████████████████████████████████████▌ │
│ 2016 │ 843655 │ ██████████████████████████████████████████▏ │
│ 2017 │ 982642 │ █████████████████████████████████████████████████▏ │
│ 2018 │ 1016835 │ ██████████████████████████████████████████████████▋ │
│ 2019 │ 1042849 │ ████████████████████████████████████████████████████▏ │
│ 2020 │ 1011889 │ ██████████████████████████████████████████████████▌ │
│ 2021 │ 960343 │ ████████████████████████████████████████████████ │
└──────┴─────────┴───────────────────────────────────────────────────────┘
```
### Запрос 3. Самые дорогие районы {#most-expensive-neighborhoods-projections}
Условие (date >= '2020-01-01') необходимо изменить, чтобы оно соответствовало проекции (toYear(date) >= 2020).
Запрос:
```sql
SELECT
town,
district,
count() AS c,
round(avg(price)) AS price,
bar(price, 0, 5000000, 100)
FROM uk_price_paid
WHERE toYear(date) >= 2020
GROUP BY
town,
district
HAVING c >= 100
ORDER BY price DESC
LIMIT 100;
```
Результат:
```text
┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐
│ LONDON │ CITY OF WESTMINSTER │ 3606 │ 3280239 │ █████████████████████████████████████████████████████████████████▌ │
│ LONDON │ CITY OF LONDON │ 274 │ 3160502 │ ███████████████████████████████████████████████████████████████▏ │
│ LONDON │ KENSINGTON AND CHELSEA │ 2550 │ 2308478 │ ██████████████████████████████████████████████▏ │
│ LEATHERHEAD │ ELMBRIDGE │ 114 │ 1897407 │ █████████████████████████████████████▊ │
│ LONDON │ CAMDEN │ 3033 │ 1805404 │ ████████████████████████████████████ │
│ VIRGINIA WATER │ RUNNYMEDE │ 156 │ 1753247 │ ███████████████████████████████████ │
│ WINDLESHAM │ SURREY HEATH │ 108 │ 1677613 │ █████████████████████████████████▌ │
│ THORNTON HEATH │ CROYDON │ 546 │ 1671721 │ █████████████████████████████████▍ │
│ BARNET │ ENFIELD │ 124 │ 1505840 │ ██████████████████████████████ │
│ COBHAM │ ELMBRIDGE │ 387 │ 1237250 │ ████████████████████████▋ │
│ LONDON │ ISLINGTON │ 2668 │ 1236980 │ ████████████████████████▋ │
│ OXFORD │ SOUTH OXFORDSHIRE │ 321 │ 1220907 │ ████████████████████████▍ │
│ LONDON │ RICHMOND UPON THAMES │ 704 │ 1215551 │ ████████████████████████▎ │
│ LONDON │ HOUNSLOW │ 671 │ 1207493 │ ████████████████████████▏ │
│ ASCOT │ WINDSOR AND MAIDENHEAD │ 407 │ 1183299 │ ███████████████████████▋ │
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 330 │ 1175615 │ ███████████████████████▌ │
│ RICHMOND │ RICHMOND UPON THAMES │ 874 │ 1110444 │ ██████████████████████▏ │
│ LONDON │ HAMMERSMITH AND FULHAM │ 3086 │ 1053983 │ █████████████████████ │
│ SURBITON │ ELMBRIDGE │ 100 │ 1011800 │ ████████████████████▏ │
│ RADLETT │ HERTSMERE │ 283 │ 1011712 │ ████████████████████▏ │
│ SALCOMBE │ SOUTH HAMS │ 127 │ 1011624 │ ████████████████████▏ │
│ WEYBRIDGE │ ELMBRIDGE │ 655 │ 1007265 │ ████████████████████▏ │
│ ESHER │ ELMBRIDGE │ 485 │ 986581 │ ███████████████████▋ │
│ LEATHERHEAD │ GUILDFORD │ 202 │ 977320 │ ███████████████████▌ │
│ BURFORD │ WEST OXFORDSHIRE │ 111 │ 966893 │ ███████████████████▎ │
│ BROCKENHURST │ NEW FOREST │ 129 │ 956675 │ ███████████████████▏ │
│ HINDHEAD │ WAVERLEY │ 137 │ 953753 │ ███████████████████ │
│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 419 │ 951121 │ ███████████████████ │
│ EAST MOLESEY │ ELMBRIDGE │ 192 │ 936769 │ ██████████████████▋ │
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 146 │ 925515 │ ██████████████████▌ │
│ LONDON │ TOWER HAMLETS │ 4388 │ 918304 │ ██████████████████▎ │
│ OLNEY │ MILTON KEYNES │ 235 │ 910646 │ ██████████████████▏ │
│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 540 │ 902418 │ ██████████████████ │
│ LONDON │ SOUTHWARK │ 3885 │ 892997 │ █████████████████▋ │
│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 960 │ 885969 │ █████████████████▋ │
│ LONDON │ EALING │ 2658 │ 871755 │ █████████████████▍ │
│ CRANBROOK │ TUNBRIDGE WELLS │ 431 │ 862348 │ █████████████████▏ │
│ LONDON │ MERTON │ 2099 │ 859118 │ █████████████████▏ │
│ BELVEDERE │ BEXLEY │ 346 │ 842423 │ ████████████████▋ │
│ GUILDFORD │ WAVERLEY │ 143 │ 841277 │ ████████████████▋ │
│ HARPENDEN │ ST ALBANS │ 657 │ 841216 │ ████████████████▋ │
│ LONDON │ HACKNEY │ 3307 │ 837090 │ ████████████████▋ │
│ LONDON │ WANDSWORTH │ 6566 │ 832663 │ ████████████████▋ │
│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 123 │ 824299 │ ████████████████▍ │
│ KINGS LANGLEY │ DACORUM │ 145 │ 821331 │ ████████████████▍ │
│ BERKHAMSTED │ DACORUM │ 543 │ 818415 │ ████████████████▎ │
│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 226 │ 802807 │ ████████████████ │
│ BILLINGSHURST │ CHICHESTER │ 144 │ 797829 │ ███████████████▊ │
│ WOKING │ GUILDFORD │ 176 │ 793494 │ ███████████████▋ │
│ STOCKBRIDGE │ TEST VALLEY │ 178 │ 793269 │ ███████████████▋ │
│ EPSOM │ REIGATE AND BANSTEAD │ 172 │ 791862 │ ███████████████▋ │
│ TONBRIDGE │ TUNBRIDGE WELLS │ 360 │ 787876 │ ███████████████▋ │
│ TEDDINGTON │ RICHMOND UPON THAMES │ 595 │ 786492 │ ███████████████▋ │
│ TWICKENHAM │ RICHMOND UPON THAMES │ 1155 │ 786193 │ ███████████████▋ │
│ LYNDHURST │ NEW FOREST │ 102 │ 785593 │ ███████████████▋ │
│ LONDON │ LAMBETH │ 5228 │ 774574 │ ███████████████▍ │
│ LONDON │ BARNET │ 3955 │ 773259 │ ███████████████▍ │
│ OXFORD │ VALE OF WHITE HORSE │ 353 │ 772088 │ ███████████████▍ │
│ TONBRIDGE │ MAIDSTONE │ 305 │ 770740 │ ███████████████▍ │
│ LUTTERWORTH │ HARBOROUGH │ 538 │ 768634 │ ███████████████▎ │
│ WOODSTOCK │ WEST OXFORDSHIRE │ 140 │ 766037 │ ███████████████▎ │
│ MIDHURST │ CHICHESTER │ 257 │ 764815 │ ███████████████▎ │
│ MARLOW │ BUCKINGHAMSHIRE │ 327 │ 761876 │ ███████████████▏ │
│ LONDON │ NEWHAM │ 3237 │ 761784 │ ███████████████▏ │
│ ALDERLEY EDGE │ CHESHIRE EAST │ 178 │ 757318 │ ███████████████▏ │
│ LUTON │ CENTRAL BEDFORDSHIRE │ 212 │ 754283 │ ███████████████ │
│ PETWORTH │ CHICHESTER │ 154 │ 754220 │ ███████████████ │
│ ALRESFORD │ WINCHESTER │ 219 │ 752718 │ ███████████████ │
│ POTTERS BAR │ WELWYN HATFIELD │ 174 │ 748465 │ ██████████████▊ │
│ HASLEMERE │ CHICHESTER │ 128 │ 746907 │ ██████████████▊ │
│ TADWORTH │ REIGATE AND BANSTEAD │ 502 │ 743252 │ ██████████████▋ │
│ THAMES DITTON │ ELMBRIDGE │ 244 │ 741913 │ ██████████████▋ │
│ REIGATE │ REIGATE AND BANSTEAD │ 581 │ 738198 │ ██████████████▋ │
│ BOURNE END │ BUCKINGHAMSHIRE │ 138 │ 735190 │ ██████████████▋ │
│ SEVENOAKS │ SEVENOAKS │ 1156 │ 730018 │ ██████████████▌ │
│ OXTED │ TANDRIDGE │ 336 │ 729123 │ ██████████████▌ │
│ INGATESTONE │ BRENTWOOD │ 166 │ 728103 │ ██████████████▌ │
│ LONDON │ BRENT │ 2079 │ 720605 │ ██████████████▍ │
│ LONDON │ HARINGEY │ 3216 │ 717780 │ ██████████████▎ │
│ PURLEY │ CROYDON │ 575 │ 716108 │ ██████████████▎ │
│ WELWYN │ WELWYN HATFIELD │ 222 │ 710603 │ ██████████████▏ │
│ RICKMANSWORTH │ THREE RIVERS │ 798 │ 704571 │ ██████████████ │
│ BANSTEAD │ REIGATE AND BANSTEAD │ 401 │ 701293 │ ██████████████ │
│ CHIGWELL │ EPPING FOREST │ 261 │ 701203 │ ██████████████ │
│ PINNER │ HARROW │ 528 │ 698885 │ █████████████▊ │
│ HASLEMERE │ WAVERLEY │ 280 │ 696659 │ █████████████▊ │
│ SLOUGH │ BUCKINGHAMSHIRE │ 396 │ 694917 │ █████████████▊ │
│ WALTON-ON-THAMES │ ELMBRIDGE │ 946 │ 692395 │ █████████████▋ │
│ READING │ SOUTH OXFORDSHIRE │ 318 │ 691988 │ █████████████▋ │
│ NORTHWOOD │ HILLINGDON │ 271 │ 690643 │ █████████████▋ │
│ FELTHAM │ HOUNSLOW │ 763 │ 688595 │ █████████████▋ │
│ ASHTEAD │ MOLE VALLEY │ 303 │ 687923 │ █████████████▋ │
│ BARNET │ BARNET │ 975 │ 686980 │ █████████████▋ │
│ WOKING │ SURREY HEATH │ 283 │ 686669 │ █████████████▋ │
│ MALMESBURY │ WILTSHIRE │ 323 │ 683324 │ █████████████▋ │
│ AMERSHAM │ BUCKINGHAMSHIRE │ 496 │ 680962 │ █████████████▌ │
│ CHISLEHURST │ BROMLEY │ 430 │ 680209 │ █████████████▌ │
│ HYTHE │ FOLKESTONE AND HYTHE │ 490 │ 676908 │ █████████████▌ │
│ MAYFIELD │ WEALDEN │ 101 │ 676210 │ █████████████▌ │
│ ASCOT │ BRACKNELL FOREST │ 168 │ 676004 │ █████████████▌ │
└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘
```
### Резюме {#summary}
Все три запроса работают намного быстрее и читают меньшее количество строк.
```text
Query 1
no projection: 27 rows in set. Elapsed: 0.158 sec. Processed 26.32 million rows, 157.93 MB (166.57 million rows/s., 999.39 MB/s.)
projection: 27 rows in set. Elapsed: 0.007 sec. Processed 105.96 thousand rows, 3.33 MB (14.58 million rows/s., 458.13 MB/s.)
Query 2
no projection: 27 rows in set. Elapsed: 0.163 sec. Processed 26.32 million rows, 80.01 MB (161.75 million rows/s., 491.64 MB/s.)
projection: 27 rows in set. Elapsed: 0.008 sec. Processed 105.96 thousand rows, 3.67 MB (13.29 million rows/s., 459.89 MB/s.)
Query 3
no projection: 100 rows in set. Elapsed: 0.069 sec. Processed 26.32 million rows, 62.47 MB (382.13 million rows/s., 906.93 MB/s.)
projection: 100 rows in set. Elapsed: 0.029 sec. Processed 8.08 thousand rows, 511.08 KB (276.06 thousand rows/s., 17.47 MB/s.)
```
### Online Playground {#playground}
Этот набор данных доступен в [Online Playground](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==).

View File

@ -27,11 +27,11 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su
{% include 'install/deb.sh' %}
```
Также эти пакеты можно скачать и установить вручную отсюда: https://repo.clickhouse.tech/deb/stable/main/.
Также эти пакеты можно скачать и установить вручную отсюда: https://repo.clickhouse.com/deb/stable/main/.
Если вы хотите использовать наиболее свежую версию, замените `stable` на `testing` (рекомендуется для тестовых окружений).
Также вы можете вручную скачать и установить пакеты из [репозитория](https://repo.clickhouse.tech/deb/stable/main/).
Также вы можете вручную скачать и установить пакеты из [репозитория](https://repo.clickhouse.com/deb/stable/main/).
#### Пакеты {#packages}
@ -52,8 +52,8 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su
``` bash
sudo yum install yum-utils
sudo rpm --import https://repo.clickhouse.tech/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.tech/rpm/stable/x86_64
sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64
```
Для использования наиболее свежих версий нужно заменить `stable` на `testing` (рекомендуется для тестовых окружений). Также иногда доступен `prestable`.
@ -64,21 +64,21 @@ sudo yum-config-manager --add-repo https://repo.clickhouse.tech/rpm/stable/x86_6
sudo yum install clickhouse-server clickhouse-client
```
Также есть возможность установить пакеты вручную, скачав отсюда: https://repo.clickhouse.tech/rpm/stable/x86_64.
Также есть возможность установить пакеты вручную, скачав отсюда: https://repo.clickhouse.com/rpm/stable/x86_64.
### Из Tgz архивов {#from-tgz-archives}
Команда ClickHouse в Яндексе рекомендует использовать предкомпилированные бинарники из `tgz` архивов для всех дистрибутивов, где невозможна установка `deb` и `rpm` пакетов.
Интересующую версию архивов можно скачать вручную с помощью `curl` или `wget` из репозитория https://repo.clickhouse.tech/tgz/.
Интересующую версию архивов можно скачать вручную с помощью `curl` или `wget` из репозитория https://repo.clickhouse.com/tgz/.
После этого архивы нужно распаковать и воспользоваться скриптами установки. Пример установки самой свежей версии:
``` bash
export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1`
curl -O https://repo.clickhouse.tech/tgz/clickhouse-common-static-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/clickhouse-server-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/clickhouse-client-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-server-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-client-$LATEST_VERSION.tgz
tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh

View File

@ -29,7 +29,7 @@ $ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not
如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。
你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.tech/deb/stable/main/)。
你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.com/deb/stable/main/)。
安装包列表:
@ -46,8 +46,8 @@ $ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not
``` bash
sudo yum install yum-utils
sudo rpm --import https://repo.clickhouse.tech/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.tech/rpm/stable/x86_64
sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64
```
如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。`prestable`有时也可用。
@ -58,22 +58,22 @@ sudo yum-config-manager --add-repo https://repo.clickhouse.tech/rpm/stable/x86_6
sudo yum install clickhouse-server clickhouse-client
```
你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.tech/rpm/stable/x86_64)。
你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.com/rpm/stable/x86_64)。
### `Tgz`安装包 {#from-tgz-archives}
如果您的操作系统不支持安装`deb`或`rpm`包,建议使用官方预编译的`tgz`软件包。
所需的版本可以通过`curl`或`wget`从存储库`https://repo.clickhouse.tech/tgz/`下载。
所需的版本可以通过`curl`或`wget`从存储库`https://repo.clickhouse.com/tgz/`下载。
下载后解压缩下载资源文件并使用安装脚本进行安装。以下是一个最新版本的安装示例:
``` bash
export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1`
curl -O https://repo.clickhouse.tech/tgz/clickhouse-common-static-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/clickhouse-server-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/clickhouse-client-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-server-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-client-$LATEST_VERSION.tgz
tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh

View File

@ -34,7 +34,7 @@
#include <Poco/Util/Application.h>
#include <Processors/Formats/IInputFormat.h>
#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
#include <Processors/QueryPipeline.h>
#include <Processors/QueryPipelineBuilder.h>
#include <Columns/ColumnString.h>
#include <common/find_symbols.h>
#include <common/LineReader.h>
@ -2050,8 +2050,7 @@ private:
});
}
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
QueryPipeline pipeline(std::move(pipe));
PullingAsyncPipelineExecutor executor(pipeline);
Block block;

View File

@ -9,6 +9,11 @@
#include <IO/ConnectionTimeoutsContext.h>
#include <Interpreters/InterpreterInsertQuery.h>
#include <Processors/Transforms/ExpressionTransform.h>
#include <Processors/QueryPipelineBuilder.h>
#include <Processors/Chain.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/Executors/PushingPipelineExecutor.h>
#include <Processors/Sources/RemoteSource.h>
#include <DataStreams/ExpressionBlockInputStream.h>
namespace DB
@ -1446,7 +1451,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
local_context->setSettings(task_cluster->settings_pull);
local_context->setSetting("skip_unavailable_shards", true);
Block block = getBlockWithAllStreamData(InterpreterFactory::get(query_select_ast, local_context)->execute().getInputStream());
Block block = getBlockWithAllStreamData(InterpreterFactory::get(query_select_ast, local_context)->execute().pipeline);
count = (block) ? block.safeGetByPosition(0).column->getUInt(0) : 0;
}
@ -1524,25 +1529,30 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
context_insert->setSettings(task_cluster->settings_push);
/// Custom INSERT SELECT implementation
BlockInputStreamPtr input;
BlockOutputStreamPtr output;
QueryPipeline input;
QueryPipeline output;
{
BlockIO io_select = InterpreterFactory::get(query_select_ast, context_select)->execute();
BlockIO io_insert = InterpreterFactory::get(query_insert_ast, context_insert)->execute();
auto pure_input = io_select.getInputStream();
output = io_insert.out;
output = std::move(io_insert.pipeline);
/// Add converting actions to make it possible to copy blocks with slightly different schema
const auto & select_block = pure_input->getHeader();
const auto & insert_block = output->getHeader();
const auto & select_block = io_select.pipeline.getHeader();
const auto & insert_block = output.getHeader();
auto actions_dag = ActionsDAG::makeConvertingActions(
select_block.getColumnsWithTypeAndName(),
insert_block.getColumnsWithTypeAndName(),
ActionsDAG::MatchColumnsMode::Position);
auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(getContext()));
input = std::make_shared<ExpressionBlockInputStream>(pure_input, actions);
QueryPipelineBuilder builder;
builder.init(std::move(io_select.pipeline));
builder.addSimpleTransform([&](const Block & header)
{
return std::make_shared<ExpressionTransform>(header, actions);
});
input = QueryPipelineBuilder::getPipeline(std::move(builder));
}
/// Fail-fast optimization to abort copying when the current clean state expires
@ -1588,7 +1598,26 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
};
/// Main work is here
copyData(*input, *output, cancel_check, update_stats);
PullingPipelineExecutor pulling_executor(input);
PushingPipelineExecutor pushing_executor(output);
Block data;
bool is_cancelled = false;
while (pulling_executor.pull(data))
{
if (cancel_check())
{
is_cancelled = true;
pushing_executor.cancel();
pushing_executor.cancel();
break;
}
pushing_executor.push(data);
update_stats(data);
}
if (!is_cancelled)
pushing_executor.finish();
// Just in case
if (future_is_dirty_checker.valid())
@ -1711,7 +1740,8 @@ String ClusterCopier::getRemoteCreateTable(
String query = "SHOW CREATE TABLE " + getQuotedTable(table);
Block block = getBlockWithAllStreamData(
std::make_shared<RemoteBlockInputStream>(connection, query, InterpreterShowCreateQuery::getSampleBlock(), remote_context));
QueryPipeline(std::make_shared<RemoteSource>(
std::make_shared<RemoteQueryExecutor>(connection, query, InterpreterShowCreateQuery::getSampleBlock(), remote_context), false, false)));
return typeid_cast<const ColumnString &>(*block.safeGetByPosition(0).column).getDataAt(0).toString();
}
@ -1824,7 +1854,7 @@ std::set<String> ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti
auto local_context = Context::createCopy(context);
local_context->setSettings(task_cluster->settings_pull);
Block block = getBlockWithAllStreamData(InterpreterFactory::get(query_ast, local_context)->execute().getInputStream());
Block block = getBlockWithAllStreamData(InterpreterFactory::get(query_ast, local_context)->execute().pipeline);
if (block)
{
@ -1869,7 +1899,11 @@ bool ClusterCopier::checkShardHasPartition(const ConnectionTimeouts & timeouts,
auto local_context = Context::createCopy(context);
local_context->setSettings(task_cluster->settings_pull);
return InterpreterFactory::get(query_ast, local_context)->execute().getInputStream()->read().rows() != 0;
auto pipeline = InterpreterFactory::get(query_ast, local_context)->execute().pipeline;
PullingPipelineExecutor executor(pipeline);
Block block;
executor.pull(block);
return block.rows() != 0;
}
bool ClusterCopier::checkPresentPartitionPiecesOnCurrentShard(const ConnectionTimeouts & timeouts,
@ -1910,12 +1944,15 @@ bool ClusterCopier::checkPresentPartitionPiecesOnCurrentShard(const ConnectionTi
auto local_context = Context::createCopy(context);
local_context->setSettings(task_cluster->settings_pull);
auto result = InterpreterFactory::get(query_ast, local_context)->execute().getInputStream()->read().rows();
if (result != 0)
auto pipeline = InterpreterFactory::get(query_ast, local_context)->execute().pipeline;
PullingPipelineExecutor executor(pipeline);
Block result;
executor.pull(result);
if (result.rows() != 0)
LOG_INFO(log, "Partition {} piece number {} is PRESENT on shard {}", partition_quoted_name, std::to_string(current_piece_number), task_shard.getDescription());
else
LOG_INFO(log, "Partition {} piece number {} is ABSENT on shard {}", partition_quoted_name, std::to_string(current_piece_number), task_shard.getDescription());
return result != 0;
return result.rows() != 0;
}

View File

@ -1,6 +1,8 @@
#include "Internals.h"
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/extractKeyExpressionList.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/Transforms/SquashingChunksTransform.h>
namespace DB
{
@ -63,9 +65,21 @@ BlockInputStreamPtr squashStreamIntoOneBlock(const BlockInputStreamPtr & stream)
std::numeric_limits<size_t>::max());
}
Block getBlockWithAllStreamData(const BlockInputStreamPtr & stream)
Block getBlockWithAllStreamData(QueryPipeline pipeline)
{
return squashStreamIntoOneBlock(stream)->read();
QueryPipelineBuilder builder;
builder.init(std::move(pipeline));
builder.addTransform(std::make_shared<SquashingChunksTransform>(
builder.getHeader(),
std::numeric_limits<size_t>::max(),
std::numeric_limits<size_t>::max()));
auto cur_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder));
Block block;
PullingPipelineExecutor executor(cur_pipeline);
executor.pull(block);
return block;
}

View File

@ -165,10 +165,7 @@ std::shared_ptr<ASTStorage> createASTStorageDistributed(
const String & cluster_name, const String & database, const String & table,
const ASTPtr & sharding_key_ast = nullptr);
BlockInputStreamPtr squashStreamIntoOneBlock(const BlockInputStreamPtr & stream);
Block getBlockWithAllStreamData(const BlockInputStreamPtr & stream);
Block getBlockWithAllStreamData(QueryPipeline pipeline);
bool isExtendedDefinitionStorage(const ASTPtr & storage_ast);

View File

@ -26,7 +26,7 @@
#include <Formats/registerFormats.h>
#include <Formats/FormatFactory.h>
#include <Processors/Formats/IInputFormat.h>
#include <Processors/QueryPipeline.h>
#include <Processors/QueryPipelineBuilder.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Core/Block.h>
#include <common/StringRef.h>
@ -1162,8 +1162,7 @@ try
Pipe pipe(FormatFactory::instance().getInput(input_format, file_in, header, context, max_block_size));
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
QueryPipeline pipeline(std::move(pipe));
PullingPipelineExecutor executor(pipeline);
Block block;
@ -1200,8 +1199,7 @@ try
});
}
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
QueryPipeline pipeline(std::move(pipe));
BlockOutputStreamPtr output = context->getOutputStreamParallelIfPossible(output_format, file_out, header);

View File

@ -859,6 +859,9 @@ if (ThreadFuzzer::instance().isEffective())
if (config->has("max_partition_size_to_drop"))
global_context->setMaxPartitionSizeToDrop(config->getUInt64("max_partition_size_to_drop"));
if (config->has("max_concurrent_queries"))
global_context->getProcessList().setMaxSize(config->getInt("max_concurrent_queries", 0));
if (!initial_loading)
{
/// We do not load ZooKeeper configuration on the first config loading

View File

@ -45,6 +45,7 @@ enum class AccessType
M(ALTER_RENAME_COLUMN, "RENAME COLUMN", COLUMN, ALTER_COLUMN) \
M(ALTER_MATERIALIZE_COLUMN, "MATERIALIZE COLUMN", COLUMN, ALTER_COLUMN) \
M(ALTER_COLUMN, "", GROUP, ALTER_TABLE) /* allow to execute ALTER {ADD|DROP|MODIFY...} COLUMN */\
M(ALTER_MODIFY_COMMENT, "MODIFY COMMENT", TABLE, ALTER_TABLE) /* modify table comment */\
\
M(ALTER_ORDER_BY, "ALTER MODIFY ORDER BY, MODIFY ORDER BY", TABLE, ALTER_INDEX) \
M(ALTER_SAMPLE_BY, "ALTER MODIFY SAMPLE BY, MODIFY SAMPLE BY", TABLE, ALTER_INDEX) \

View File

@ -14,6 +14,7 @@
#include <Interpreters/InterpreterCreateUserQuery.h>
#include <Interpreters/InterpreterShowGrantsQuery.h>
#include <Common/quoteString.h>
#include <common/logger_useful.h>
#include <Poco/JSON/JSON.h>
#include <Poco/JSON/Object.h>
#include <Poco/JSON/Stringifier.h>

View File

@ -212,6 +212,7 @@ add_object_library(clickhouse_processors_formats Processors/Formats)
add_object_library(clickhouse_processors_formats_impl Processors/Formats/Impl)
add_object_library(clickhouse_processors_transforms Processors/Transforms)
add_object_library(clickhouse_processors_sources Processors/Sources)
add_object_library(clickhouse_processors_sinks Processors/Sinks)
add_object_library(clickhouse_processors_merges Processors/Merges)
add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms)
add_object_library(clickhouse_processors_queryplan Processors/QueryPlan)

View File

@ -23,7 +23,7 @@
#include <Interpreters/ClientInfo.h>
#include <Compression/CompressionFactory.h>
#include <Processors/Pipe.h>
#include <Processors/QueryPipeline.h>
#include <Processors/QueryPipelineBuilder.h>
#include <Processors/ISink.h>
#include <Processors/Executors/PipelineExecutor.h>
#include <pcg_random.hpp>
@ -700,14 +700,14 @@ void Connection::sendExternalTablesData(ExternalTablesData & data)
if (!elem->pipe)
elem->pipe = elem->creating_pipe_callback();
QueryPipeline pipeline;
QueryPipelineBuilder pipeline;
pipeline.init(std::move(*elem->pipe));
elem->pipe.reset();
pipeline.resize(1);
auto sink = std::make_shared<ExternalTableDataSink>(pipeline.getHeader(), *this, *elem, std::move(on_cancel));
pipeline.setSinks([&](const Block &, QueryPipeline::StreamType type) -> ProcessorPtr
pipeline.setSinks([&](const Block &, QueryPipelineBuilder::StreamType type) -> ProcessorPtr
{
if (type != QueryPipeline::StreamType::Main)
if (type != QueryPipelineBuilder::StreamType::Main)
return nullptr;
return sink;
});

View File

@ -145,7 +145,6 @@ protected:
Poco::Logger * log = nullptr;
friend class CurrentThread;
friend class PushingToViewsBlockOutputStream;
/// Use ptr not to add extra dependencies in the header
std::unique_ptr<RUsageCounters> last_rusage;
@ -188,6 +187,11 @@ public:
return query_context.lock();
}
void disableProfiling()
{
query_profiled_enabled = false;
}
/// Starts new query and create new thread group for it, current thread becomes master thread of the query
void initializeQuery();
@ -222,6 +226,7 @@ public:
/// Detaches thread from the thread group and the query, dumps performance counters if they have not been dumped
void detachQuery(bool exit_if_already_detached = false, bool thread_exits = false);
void logToQueryViewsLog(const ViewRuntimeData & vinfo);
protected:
void applyQuerySettings();
@ -234,7 +239,6 @@ protected:
void logToQueryThreadLog(QueryThreadLog & thread_log, const String & current_database, std::chrono::time_point<std::chrono::system_clock> now);
void logToQueryViewsLog(const ViewRuntimeData & vinfo);
void assertState(const std::initializer_list<int> & permitted_states, const char * description = nullptr) const;

View File

@ -15,6 +15,7 @@
#include <Processors/Executors/PipelineExecutor.h>
#include <Processors/Sources/SourceFromInputStream.h>
#include <Processors/Sinks/SinkToStorage.h>
#include <Processors/Sinks/EmptySink.h>
#include <Core/ExternalTable.h>
#include <Poco/Net/MessageHeader.h>
@ -160,14 +161,17 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header,
auto storage = temporary_table.getTable();
getContext()->addExternalTable(data->table_name, std::move(temporary_table));
auto sink = storage->write(ASTPtr(), storage->getInMemoryMetadataPtr(), getContext());
auto exception_handling = std::make_shared<EmptySink>(sink->getOutputPort().getHeader());
/// Write data
data->pipe->resize(1);
connect(*data->pipe->getOutputPort(0), sink->getPort());
connect(*data->pipe->getOutputPort(0), sink->getInputPort());
connect(sink->getOutputPort(), exception_handling->getPort());
auto processors = Pipe::detachProcessors(std::move(*data->pipe));
processors.push_back(std::move(sink));
processors.push_back(std::move(exception_handling));
auto executor = std::make_shared<PipelineExecutor>(processors);
executor->execute(/*num_threads = */ 1);

View File

@ -161,6 +161,7 @@ class IColumn;
\
M(Bool, force_index_by_date, false, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \
M(Bool, force_primary_key, false, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
M(Bool, use_skip_indexes, true, "Use data skipping indexes during query execution.", 0) \
M(String, force_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be used during query execution, otherwise an exception will be thrown.", 0) \
\
M(Float, max_streams_to_max_threads_ratio, 1, "Allows you to use more sources than the number of threads - to more evenly distribute work across threads. It is assumed that this is a temporary solution, since it will be possible in the future to make the number of sources equal to the number of threads, but for each source to dynamically select available work for itself.", 0) \

View File

@ -1,43 +0,0 @@
#include <DataStreams/AddingDefaultBlockOutputStream.h>
#include <Interpreters/addMissingDefaults.h>
#include <Interpreters/ExpressionActions.h>
namespace DB
{
AddingDefaultBlockOutputStream::AddingDefaultBlockOutputStream(
const BlockOutputStreamPtr & output_,
const Block & header_,
const ColumnsDescription & columns_,
ContextPtr context_,
bool null_as_default_)
: output(output_), header(header_)
{
auto dag = addMissingDefaults(header_, output->getHeader().getNamesAndTypesList(), columns_, context_, null_as_default_);
adding_defaults_actions = std::make_shared<ExpressionActions>(std::move(dag), ExpressionActionsSettings::fromContext(context_, CompileExpressions::yes));
}
void AddingDefaultBlockOutputStream::write(const Block & block)
{
auto copy = block;
adding_defaults_actions->execute(copy);
output->write(copy);
}
void AddingDefaultBlockOutputStream::flush()
{
output->flush();
}
void AddingDefaultBlockOutputStream::writePrefix()
{
output->writePrefix();
}
void AddingDefaultBlockOutputStream::writeSuffix()
{
output->writeSuffix();
}
}

View File

@ -1,48 +0,0 @@
#pragma once
#include <DataStreams/IBlockOutputStream.h>
#include <Columns/ColumnConst.h>
#include <Storages/ColumnsDescription.h>
namespace DB
{
class ExpressionActions;
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
class Context;
/** This stream adds three types of columns into block
* 1. Columns, that are missed inside request, but present in table without defaults (missed columns)
* 2. Columns, that are missed inside request, but present in table with defaults (columns with default values)
* 3. Columns that materialized from other columns (materialized columns)
* Also the stream can substitute NULL into DEFAULT value in case of INSERT SELECT query (null_as_default) if according setting is 1.
* All three types of columns are materialized (not constants).
*/
class AddingDefaultBlockOutputStream : public IBlockOutputStream
{
public:
AddingDefaultBlockOutputStream(
const BlockOutputStreamPtr & output_,
const Block & header_,
const ColumnsDescription & columns_,
ContextPtr context_,
bool null_as_default_ = false);
Block getHeader() const override { return header; }
void write(const Block & block) override;
void flush() override;
void writePrefix() override;
void writeSuffix() override;
private:
BlockOutputStreamPtr output;
const Block header;
ExpressionActionsPtr adding_defaults_actions;
};
}

View File

@ -5,26 +5,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
BlockInputStreamPtr BlockIO::getInputStream()
{
if (out)
throw Exception("Cannot get input stream from BlockIO because output stream is not empty",
ErrorCodes::LOGICAL_ERROR);
if (in)
return in;
if (pipeline.initialized())
return std::make_shared<PipelineExecutingBlockInputStream>(std::move(pipeline));
throw Exception("Cannot get input stream from BlockIO because query pipeline was not initialized",
ErrorCodes::LOGICAL_ERROR);
}
void BlockIO::reset()
{
@ -38,10 +18,6 @@ void BlockIO::reset()
*/
/// TODO simplify it all
out.reset();
in.reset();
if (process_list_entry)
process_list_entry->get().releaseQueryStreams();
pipeline.reset();
process_list_entry.reset();
@ -57,8 +33,6 @@ BlockIO & BlockIO::operator= (BlockIO && rhs)
reset();
process_list_entry = std::move(rhs.process_list_entry);
in = std::move(rhs.in);
out = std::move(rhs.out);
pipeline = std::move(rhs.pipeline);
finish_callback = std::move(rhs.finish_callback);

View File

@ -1,9 +1,6 @@
#pragma once
#include <DataStreams/IBlockStream_fwd.h>
#include <functional>
#include <Processors/QueryPipeline.h>
@ -25,14 +22,11 @@ struct BlockIO
std::shared_ptr<ProcessListEntry> process_list_entry;
BlockOutputStreamPtr out;
BlockInputStreamPtr in;
QueryPipeline pipeline;
/// Callbacks for query logging could be set here.
std::function<void(IBlockInputStream *, IBlockOutputStream *, QueryPipeline *)> finish_callback;
std::function<void()> exception_callback;
std::function<void(QueryPipeline &)> finish_callback;
std::function<void()> exception_callback;
/// When it is true, don't bother sending any non-empty blocks to the out stream
bool null_format = false;
@ -42,11 +36,7 @@ struct BlockIO
{
if (finish_callback)
{
QueryPipeline * pipeline_ptr = nullptr;
if (pipeline.initialized())
pipeline_ptr = &pipeline;
finish_callback(in.get(), out.get(), pipeline_ptr);
finish_callback(pipeline);
}
}
@ -56,9 +46,6 @@ struct BlockIO
exception_callback();
}
/// Returns in or converts pipeline to stream. Throws if out is not empty.
BlockInputStreamPtr getInputStream();
private:
void reset();
};

View File

@ -22,26 +22,24 @@ namespace ErrorCodes
}
CheckConstraintsBlockOutputStream::CheckConstraintsBlockOutputStream(
CheckConstraintsTransform::CheckConstraintsTransform(
const StorageID & table_id_,
const BlockOutputStreamPtr & output_,
const Block & header_,
const Block & header,
const ConstraintsDescription & constraints_,
ContextPtr context_)
: table_id(table_id_),
output(output_),
header(header_),
constraints(constraints_),
expressions(constraints_.getExpressions(context_, header.getNamesAndTypesList()))
: ExceptionKeepingTransform(header, header)
, table_id(table_id_)
, constraints(constraints_)
, expressions(constraints_.getExpressions(context_, header.getNamesAndTypesList()))
{
}
void CheckConstraintsBlockOutputStream::write(const Block & block)
void CheckConstraintsTransform::transform(Chunk & chunk)
{
if (block.rows() > 0)
if (chunk.getNumRows() > 0)
{
Block block_to_calculate = block;
Block block_to_calculate = getInputPort().getHeader().cloneWithColumns(chunk.getColumns());
for (size_t i = 0; i < expressions.size(); ++i)
{
auto constraint_expr = expressions[i];
@ -65,8 +63,8 @@ void CheckConstraintsBlockOutputStream::write(const Block & block)
/// Check if constraint value is nullable
const auto & null_map = column_nullable->getNullMapColumn();
const PaddedPODArray<UInt8> & data = null_map.getData();
bool null_map_contains_null = !memoryIsZero(data.raw_data(), data.size() * sizeof(UInt8));
const PaddedPODArray<UInt8> & null_map_data = null_map.getData();
bool null_map_contains_null = !memoryIsZero(null_map_data.raw_data(), null_map_data.size() * sizeof(UInt8));
if (null_map_contains_null)
throw Exception(
@ -82,15 +80,15 @@ void CheckConstraintsBlockOutputStream::write(const Block & block)
const ColumnUInt8 & res_column_uint8 = assert_cast<const ColumnUInt8 &>(*result_column);
const UInt8 * data = res_column_uint8.getData().data();
const UInt8 * res_data = res_column_uint8.getData().data();
size_t size = res_column_uint8.size();
/// Is violated.
if (!memoryIsByte(data, size, 1))
if (!memoryIsByte(res_data, size, 1))
{
size_t row_idx = 0;
for (; row_idx < size; ++row_idx)
if (data[row_idx] != 1)
if (res_data[row_idx] != 1)
break;
Names related_columns = constraint_expr->getRequiredColumns();
@ -101,7 +99,7 @@ void CheckConstraintsBlockOutputStream::write(const Block & block)
column_values_msg.reserve(approx_bytes_for_col * related_columns.size());
for (const auto & name : related_columns)
{
const IColumn & column = *block.getByName(name).column;
const IColumn & column = *chunk.getColumns()[getInputPort().getHeader().getPositionByName(name)];
assert(row_idx < column.size());
if (!first)
@ -124,23 +122,7 @@ void CheckConstraintsBlockOutputStream::write(const Block & block)
}
}
output->write(block);
rows_written += block.rows();
}
void CheckConstraintsBlockOutputStream::flush()
{
output->flush();
}
void CheckConstraintsBlockOutputStream::writePrefix()
{
output->writePrefix();
}
void CheckConstraintsBlockOutputStream::writeSuffix()
{
output->writeSuffix();
rows_written += chunk.getNumRows();
}
}

View File

@ -1,6 +1,6 @@
#pragma once
#include <DataStreams/IBlockOutputStream.h>
#include <Processors/Transforms/ExceptionKeepingTransform.h>
#include <Storages/ConstraintsDescription.h>
#include <Interpreters/StorageID.h>
@ -12,28 +12,21 @@ namespace DB
* Otherwise just pass block to output unchanged.
*/
class CheckConstraintsBlockOutputStream : public IBlockOutputStream
class CheckConstraintsTransform final : public ExceptionKeepingTransform
{
public:
CheckConstraintsBlockOutputStream(
CheckConstraintsTransform(
const StorageID & table_,
const BlockOutputStreamPtr & output_,
const Block & header_,
const Block & header,
const ConstraintsDescription & constraints_,
ContextPtr context_);
Block getHeader() const override { return header; }
void write(const Block & block) override;
String getName() const override { return "CheckConstraintsTransform"; }
void flush() override;
void writePrefix() override;
void writeSuffix() override;
void transform(Chunk & chunk) override;
private:
StorageID table_id;
BlockOutputStreamPtr output;
Block header;
const ConstraintsDescription constraints;
const ConstraintsExpressions expressions;
size_t rows_written = 0;

View File

@ -1,5 +1,6 @@
#include <DataStreams/CountingBlockOutputStream.h>
#include <Common/ProfileEvents.h>
#include <iostream>
namespace ProfileEvents
@ -12,15 +13,23 @@ namespace ProfileEvents
namespace DB
{
void CountingBlockOutputStream::write(const Block & block)
void CountingTransform::transform(Chunk & chunk)
{
stream->write(block);
Progress local_progress(block.rows(), block.bytes(), 0);
Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0);
progress.incrementPiecewiseAtomically(local_progress);
ProfileEvents::increment(ProfileEvents::InsertedRows, local_progress.read_rows);
ProfileEvents::increment(ProfileEvents::InsertedBytes, local_progress.read_bytes);
//std::cerr << "============ counting adding progress for " << static_cast<const void *>(thread_status) << ' ' << chunk.getNumRows() << " rows\n";
if (thread_status)
{
thread_status->performance_counters.increment(ProfileEvents::InsertedRows, local_progress.read_rows);
thread_status->performance_counters.increment(ProfileEvents::InsertedBytes, local_progress.read_bytes);
}
else
{
ProfileEvents::increment(ProfileEvents::InsertedRows, local_progress.read_rows);
ProfileEvents::increment(ProfileEvents::InsertedBytes, local_progress.read_bytes);
}
if (process_elem)
process_elem->updateProgressOut(local_progress);

View File

@ -1,6 +1,6 @@
#pragma once
#include <DataStreams/IBlockOutputStream.h>
#include <Processors/Transforms/ExceptionKeepingTransform.h>
#include <Interpreters/ProcessList.h>
@ -9,10 +9,13 @@ namespace DB
/// Proxy class which counts number of written block, rows, bytes
class CountingBlockOutputStream final : public IBlockOutputStream
class CountingTransform final : public ExceptionKeepingTransform
{
public:
explicit CountingBlockOutputStream(const BlockOutputStreamPtr & stream_) : stream(stream_) {}
explicit CountingTransform(const Block & header, ThreadStatus * thread_status_ = nullptr)
: ExceptionKeepingTransform(header, header), thread_status(thread_status_) {}
String getName() const override { return "CountingTransform"; }
void setProgressCallback(const ProgressCallback & callback)
{
@ -29,20 +32,13 @@ public:
return progress;
}
Block getHeader() const override { return stream->getHeader(); }
void write(const Block & block) override;
void writePrefix() override { stream->writePrefix(); }
void writeSuffix() override { stream->writeSuffix(); }
void flush() override { stream->flush(); }
void onProgress(const Progress & current_progress) override { stream->onProgress(current_progress); }
String getContentType() const override { return stream->getContentType(); }
void transform(Chunk & chunk) override;
protected:
BlockOutputStreamPtr stream;
Progress progress;
ProgressCallback progress_callback;
QueryStatus * process_elem = nullptr;
ThreadStatus * thread_status = nullptr;
};
}

View File

@ -1,114 +0,0 @@
#pragma once
#include <DataStreams/IBlockOutputStream.h>
#include <Processors/Sinks/SinkToStorage.h>
#include <iostream>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
class PushingToSinkBlockOutputStream : public IBlockOutputStream
{
public:
explicit PushingToSinkBlockOutputStream(SinkToStoragePtr sink_)
: sink(std::move(sink_)), port(sink->getPort().getHeader(), sink.get()) {}
Block getHeader() const override { return sink->getPort().getHeader(); }
void write(const Block & block) override
{
/// In case writePrefix was not called.
if (!port.isConnected())
writePrefix();
if (!block)
return;
size_t num_rows = block.rows();
Chunk chunk(block.getColumns(), num_rows);
port.push(std::move(chunk));
while (true)
{
auto status = sink->prepare();
switch (status)
{
case IProcessor::Status::Ready:
sink->work();
continue;
case IProcessor::Status::NeedData:
return;
case IProcessor::Status::Async: [[fallthrough]];
case IProcessor::Status::ExpandPipeline: [[fallthrough]];
case IProcessor::Status::Finished: [[fallthrough]];
case IProcessor::Status::PortFull:
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Status {} in not expected in PushingToSinkBlockOutputStream::writePrefix",
IProcessor::statusToName(status));
}
}
}
void writePrefix() override
{
connect(port, sink->getPort());
while (true)
{
auto status = sink->prepare();
switch (status)
{
case IProcessor::Status::Ready:
sink->work();
continue;
case IProcessor::Status::NeedData:
return;
case IProcessor::Status::Async: [[fallthrough]];
case IProcessor::Status::ExpandPipeline: [[fallthrough]];
case IProcessor::Status::Finished: [[fallthrough]];
case IProcessor::Status::PortFull:
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Status {} in not expected in PushingToSinkBlockOutputStream::writePrefix",
IProcessor::statusToName(status));
}
}
}
void writeSuffix() override
{
port.finish();
while (true)
{
auto status = sink->prepare();
switch (status)
{
case IProcessor::Status::Ready:
sink->work();
continue;
case IProcessor::Status::Finished:
///flush();
return;
case IProcessor::Status::NeedData:
case IProcessor::Status::Async:
case IProcessor::Status::ExpandPipeline:
case IProcessor::Status::PortFull:
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Status {} in not expected in PushingToSinkBlockOutputStream::writeSuffix",
IProcessor::statusToName(status));
}
}
}
private:
SinkToStoragePtr sink;
OutputPort port;
};
}

File diff suppressed because it is too large Load Diff

View File

@ -3,7 +3,10 @@
#include <DataStreams/IBlockOutputStream.h>
#include <Interpreters/QueryViewsLog.h>
#include <Parsers/IAST_fwd.h>
#include <Processors/Chain.h>
#include <Processors/ISimpleTransform.h>
#include <Storages/IStorage.h>
#include <Processors/Sinks/SinkToStorage.h>
#include <Common/Stopwatch.h>
namespace Poco
@ -14,61 +17,44 @@ class Logger;
namespace DB
{
class ReplicatedMergeTreeSink;
struct ViewRuntimeData
{
/// A query we should run over inserted block befire pushing into inner storage.
const ASTPtr query;
/// This structure is expected by inner storage. Will convert query result to it.
Block sample_block;
/// Inner storage id.
StorageID table_id;
BlockOutputStreamPtr out;
/// In case of exception at any step (e.g. query execution or insertion into inner table)
/// exception is stored here (will be stored in query views log).
std::exception_ptr exception;
QueryViewsLogElement::ViewRuntimeStats runtime_stats;
/// Info which is needed for query views log.
std::unique_ptr<QueryViewsLogElement::ViewRuntimeStats> runtime_stats;
void setException(std::exception_ptr e)
{
exception = e;
runtime_stats.setStatus(QueryViewsLogElement::ViewStatus::EXCEPTION_WHILE_PROCESSING);
runtime_stats->setStatus(QueryViewsLogElement::ViewStatus::EXCEPTION_WHILE_PROCESSING);
}
};
/** Writes data to the specified table and to all dependent materialized views.
*/
class PushingToViewsBlockOutputStream : public IBlockOutputStream, WithContext
{
public:
PushingToViewsBlockOutputStream(
const StoragePtr & storage_,
const StorageMetadataPtr & metadata_snapshot_,
ContextPtr context_,
const ASTPtr & query_ptr_,
bool no_destination = false);
Block getHeader() const override;
void write(const Block & block) override;
void flush() override;
void writePrefix() override;
void writeSuffix() override;
void onProgress(const Progress & progress) override;
private:
StoragePtr storage;
StorageMetadataPtr metadata_snapshot;
BlockOutputStreamPtr output;
ReplicatedMergeTreeSink * replicated_output = nullptr;
Poco::Logger * log;
ASTPtr query_ptr;
Stopwatch main_watch;
std::vector<ViewRuntimeData> views;
ContextMutablePtr select_context;
ContextMutablePtr insert_context;
void process(const Block & block, ViewRuntimeData & view);
void checkExceptionsInViews();
void logQueryViews();
};
Chain buildPushingToViewsChain(
const StoragePtr & storage,
const StorageMetadataPtr & metadata_snapshot,
ContextPtr context,
const ASTPtr & query_ptr,
/// It is true when we should not insert into table, but only to views.
/// Used e.g. for kafka. We should try to remove it somehow.
bool no_destination,
/// We could specify separate thread_status for each view.
/// Needed mainly to collect counters separately. Should be improved.
ThreadStatus * thread_status,
/// Counter to measure time spent separately per view. Should be improved.
std::atomic_uint64_t * elapsed_counter_ms,
/// LiveView executes query itself, it needs source block structure.
const Block & live_view_header = {});
}

View File

@ -18,11 +18,12 @@ namespace ErrorCodes
}
RemoteBlockOutputStream::RemoteBlockOutputStream(Connection & connection_,
const ConnectionTimeouts & timeouts,
const String & query_,
const Settings & settings_,
const ClientInfo & client_info_)
RemoteInserter::RemoteInserter(
Connection & connection_,
const ConnectionTimeouts & timeouts,
const String & query_,
const Settings & settings_,
const ClientInfo & client_info_)
: connection(connection_), query(query_)
{
ClientInfo modified_client_info = client_info_;
@ -70,11 +71,8 @@ RemoteBlockOutputStream::RemoteBlockOutputStream(Connection & connection_,
}
void RemoteBlockOutputStream::write(const Block & block)
void RemoteInserter::write(Block block)
{
if (header)
assertBlocksHaveEqualStructure(block, header, "RemoteBlockOutputStream");
try
{
connection.sendData(block);
@ -94,14 +92,14 @@ void RemoteBlockOutputStream::write(const Block & block)
}
void RemoteBlockOutputStream::writePrepared(ReadBuffer & input, size_t size)
void RemoteInserter::writePrepared(ReadBuffer & buf, size_t size)
{
/// We cannot use 'header'. Input must contain block with proper structure.
connection.sendPreparedData(input, size);
connection.sendPreparedData(buf, size);
}
void RemoteBlockOutputStream::writeSuffix()
void RemoteInserter::onFinish()
{
/// Empty block means end of data.
connection.sendData(Block());
@ -127,7 +125,7 @@ void RemoteBlockOutputStream::writeSuffix()
finished = true;
}
RemoteBlockOutputStream::~RemoteBlockOutputStream()
RemoteInserter::~RemoteInserter()
{
/// If interrupted in the middle of the loop of communication with the server, then interrupt the connection,
/// to not leave the connection in unsynchronized state.

View File

@ -1,7 +1,7 @@
#pragma once
#include <Core/Block.h>
#include <DataStreams/IBlockOutputStream.h>
#include <Processors/Sinks/SinkToStorage.h>
#include <Common/Throttler.h>
#include <IO/ConnectionTimeouts.h>
#include <Interpreters/ClientInfo.h>
@ -17,24 +17,25 @@ struct Settings;
/** Allow to execute INSERT query on remote server and send data for it.
*/
class RemoteBlockOutputStream : public IBlockOutputStream
class RemoteInserter
{
public:
RemoteBlockOutputStream(Connection & connection_,
const ConnectionTimeouts & timeouts,
const String & query_,
const Settings & settings_,
const ClientInfo & client_info_);
RemoteInserter(
Connection & connection_,
const ConnectionTimeouts & timeouts,
const String & query_,
const Settings & settings_,
const ClientInfo & client_info_);
Block getHeader() const override { return header; }
void write(const Block & block) override;
void writeSuffix() override;
void write(Block block);
void onFinish();
/// Send pre-serialized and possibly pre-compressed block of data, that will be read from 'input'.
void writePrepared(ReadBuffer & input, size_t size = 0);
void writePrepared(ReadBuffer & buf, size_t size = 0);
~RemoteBlockOutputStream() override;
~RemoteInserter();
const Block & getHeader() const { return header; }
private:
Connection & connection;
@ -43,4 +44,23 @@ private:
bool finished = false;
};
class RemoteSink final : public RemoteInserter, public SinkToStorage
{
public:
explicit RemoteSink(
Connection & connection_,
const ConnectionTimeouts & timeouts,
const String & query_,
const Settings & settings_,
const ClientInfo & client_info_)
: RemoteInserter(connection_, timeouts, query_, settings_, client_info_)
, SinkToStorage(RemoteInserter::getHeader())
{
}
String getName() const override { return "RemoteSink"; }
void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); }
void onFinish() override { RemoteInserter::onFinish(); }
};
}

View File

@ -13,7 +13,7 @@
#include <Processors/ISimpleTransform.h>
#include <Processors/Sources/SourceWithProgress.h>
#include <Processors/Formats/IInputFormat.h>
#include <Processors/QueryPipeline.h>
#include <Processors/QueryPipelineBuilder.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
@ -97,7 +97,7 @@ public:
max_block_size = configuration.number_of_rows_to_read;
}
pipeline.init(Pipe(FormatFactory::instance().getInput(format, command->out, sample_block, context, max_block_size)));
pipeline = QueryPipeline(Pipe(FormatFactory::instance().getInput(format, command->out, sample_block, context, max_block_size)));
executor = std::make_unique<PullingPipelineExecutor>(pipeline);
}

View File

@ -1,4 +1,5 @@
#include <DataStreams/SquashingTransform.h>
#include <iostream>
namespace DB
@ -67,7 +68,6 @@ Block SquashingTransform::addImpl(ReferenceType input_block)
}
append<ReferenceType>(std::move(input_block));
if (isEnoughSize(accumulated_block))
{
Block to_return;

View File

@ -4,7 +4,7 @@
#include <DataStreams/NativeBlockInputStream.h>
#include <DataStreams/NativeBlockOutputStream.h>
#include <DataStreams/copyData.h>
#include <Processors/QueryPipeline.h>
#include <Processors/QueryPipelineBuilder.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/ISource.h>
#include <Compression/CompressedReadBuffer.h>
@ -35,12 +35,13 @@ struct TemporaryFileStream
{}
/// Flush data from input stream into file for future reading
static void write(const std::string & path, const Block & header, QueryPipeline pipeline, const std::string & codec)
static void write(const std::string & path, const Block & header, QueryPipelineBuilder builder, const std::string & codec)
{
WriteBufferFromFile file_buf(path);
CompressedWriteBuffer compressed_buf(file_buf, CompressionCodecFactory::instance().get(codec, {}));
NativeBlockOutputStream output(compressed_buf, 0, header);
auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder));
PullingPipelineExecutor executor(pipeline);
output.writePrefix();

View File

@ -87,9 +87,8 @@ TEST(MergingSortedTest, SimpleBlockSizeTest)
pipe.addTransform(std::move(transform));
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
pipeline.setMaxThreads(1);
QueryPipeline pipeline(std::move(pipe));
pipeline.setNumThreads(1);
auto stream = std::make_shared<PipelineExecutingBlockInputStream>(std::move(pipeline));
size_t total_rows = 0;
@ -132,9 +131,8 @@ TEST(MergingSortedTest, MoreInterestingBlockSizes)
pipe.addTransform(std::move(transform));
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
pipeline.setMaxThreads(1);
QueryPipeline pipeline(std::move(pipe));
pipeline.setNumThreads(1);
auto stream = std::make_shared<PipelineExecutingBlockInputStream>(std::move(pipeline));
auto block1 = stream->read();

View File

@ -6,6 +6,7 @@
#include <Processors/Transforms/CheckSortedTransform.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/QueryPipeline.h>
#include <Processors/Pipe.h>
#include <DataTypes/DataTypesNumber.h>
@ -97,8 +98,7 @@ TEST(CheckSortedBlockInputStream, CheckGoodCase)
return std::make_shared<CheckSortedTransform>(header, sort_description);
});
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
QueryPipeline pipeline(std::move(pipe));
PullingPipelineExecutor executor(pipeline);
@ -125,8 +125,7 @@ TEST(CheckSortedBlockInputStream, CheckBadLastRow)
return std::make_shared<CheckSortedTransform>(header, sort_description);
});
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
QueryPipeline pipeline(std::move(pipe));
PullingPipelineExecutor executor(pipeline);
@ -150,8 +149,7 @@ TEST(CheckSortedBlockInputStream, CheckUnsortedBlock1)
return std::make_shared<CheckSortedTransform>(header, sort_description);
});
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
QueryPipeline pipeline(std::move(pipe));
PullingPipelineExecutor executor(pipeline);
@ -172,8 +170,7 @@ TEST(CheckSortedBlockInputStream, CheckUnsortedBlock2)
return std::make_shared<CheckSortedTransform>(header, sort_description);
});
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
QueryPipeline pipeline(std::move(pipe));
PullingPipelineExecutor executor(pipeline);
@ -194,8 +191,7 @@ TEST(CheckSortedBlockInputStream, CheckUnsortedBlock3)
return std::make_shared<CheckSortedTransform>(header, sort_description);
});
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
QueryPipeline pipeline(std::move(pipe));
PullingPipelineExecutor executor(pipeline);
@ -218,8 +214,7 @@ TEST(CheckSortedBlockInputStream, CheckEqualBlock)
return std::make_shared<CheckSortedTransform>(header, sort_description);
});
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
QueryPipeline pipeline(std::move(pipe));
PullingPipelineExecutor executor(pipeline);

View File

@ -18,9 +18,18 @@ void DDLDependencyVisitor::visit(const ASTPtr & ast, Data & data)
visit(*dict_source, data);
}
bool DDLDependencyVisitor::needChildVisit(const ASTPtr & node, const ASTPtr & /*child*/)
bool DDLDependencyVisitor::needChildVisit(const ASTPtr & node, const ASTPtr & child)
{
return !node->as<ASTStorage>();
if (node->as<ASTStorage>())
return false;
if (auto * create = node->as<ASTCreateQuery>())
{
if (child.get() == create->select)
return false;
}
return true;
}
void DDLDependencyVisitor::visit(const ASTFunction & function, Data & data)

View File

@ -187,6 +187,11 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo
if (metadata.settings_changes)
storage_ast.set(storage_ast.settings, metadata.settings_changes);
}
if (metadata.comment.empty())
storage_ast.reset(storage_ast.comment);
else
storage_ast.set(storage_ast.comment, std::make_shared<ASTLiteral>(metadata.comment));
}
}

View File

@ -13,7 +13,7 @@
# include <Databases/MySQL/FetchTablesColumnsList.h>
# include <Formats/MySQLSource.h>
# include <Processors/Executors/PullingPipelineExecutor.h>
# include <Processors/QueryPipeline.h>
# include <Processors/QueryPipelineBuilder.h>
# include <IO/Operators.h>
# include <Interpreters/Context.h>
# include <Parsers/ASTCreateQuery.h>
@ -284,8 +284,7 @@ std::map<String, UInt64> DatabaseMySQL::fetchTablesWithModificationTime(ContextP
std::map<String, UInt64> tables_with_modification_time;
StreamSettings mysql_input_stream_settings(local_context->getSettingsRef());
auto result = std::make_unique<MySQLSource>(mysql_pool.get(), query.str(), tables_status_sample_block, mysql_input_stream_settings);
QueryPipeline pipeline;
pipeline.init(Pipe(std::move(result)));
QueryPipeline pipeline(std::move(result));
Block block;
PullingPipelineExecutor executor(pipeline);

View File

@ -9,7 +9,7 @@
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/QueryPipeline.h>
#include <Processors/QueryPipelineBuilder.h>
#include <Formats/MySQLSource.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
@ -88,8 +88,7 @@ std::map<String, ColumnsDescription> fetchTablesColumnsList(
StreamSettings mysql_input_stream_settings(settings);
auto result = std::make_unique<MySQLSource>(pool.get(), query.str(), tables_columns_sample_block, mysql_input_stream_settings);
QueryPipeline pipeline;
pipeline.init(Pipe(std::move(result)));
QueryPipeline pipeline(std::move(result));
Block block;
PullingPipelineExecutor executor(pipeline);

View File

@ -7,7 +7,7 @@
#include <DataTypes/DataTypesNumber.h>
#include <Formats/MySQLSource.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/QueryPipeline.h>
#include <Processors/QueryPipelineBuilder.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteBufferFromFile.h>
#include <Common/quoteString.h>
@ -44,8 +44,7 @@ static std::unordered_map<String, String> fetchTablesCreateQuery(
connection, "SHOW CREATE TABLE " + backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(fetch_table_name),
show_create_table_header, mysql_input_stream_settings);
QueryPipeline pipeline;
pipeline.init(Pipe(std::move(show_create_table)));
QueryPipeline pipeline(std::move(show_create_table));
Block create_query_block;
PullingPipelineExecutor executor(pipeline);
@ -69,8 +68,7 @@ static std::vector<String> fetchTablesInDB(const mysqlxx::PoolWithFailover::Entr
StreamSettings mysql_input_stream_settings(global_settings);
auto input = std::make_unique<MySQLSource>(connection, query, header, mysql_input_stream_settings);
QueryPipeline pipeline;
pipeline.init(Pipe(std::move(input)));
QueryPipeline pipeline(std::move(input));
Block block;
PullingPipelineExecutor executor(pipeline);
@ -97,8 +95,7 @@ void MaterializeMetadata::fetchMasterStatus(mysqlxx::PoolWithFailover::Entry & c
StreamSettings mysql_input_stream_settings(settings, false, true);
auto input = std::make_unique<MySQLSource>(connection, "SHOW MASTER STATUS;", header, mysql_input_stream_settings);
QueryPipeline pipeline;
pipeline.init(Pipe(std::move(input)));
QueryPipeline pipeline(std::move(input));
Block master_status;
PullingPipelineExecutor executor(pipeline);
@ -125,8 +122,7 @@ void MaterializeMetadata::fetchMasterVariablesValue(const mysqlxx::PoolWithFailo
const String & fetch_query = "SHOW VARIABLES WHERE Variable_name = 'binlog_checksum'";
StreamSettings mysql_input_stream_settings(settings, false, true);
auto variables_input = std::make_unique<MySQLSource>(connection, fetch_query, variables_header, mysql_input_stream_settings);
QueryPipeline pipeline;
pipeline.init(Pipe(std::move(variables_input)));
QueryPipeline pipeline(std::move(variables_input));
Block variables_block;
PullingPipelineExecutor executor(pipeline);
@ -153,8 +149,7 @@ static bool checkSyncUserPrivImpl(const mysqlxx::PoolWithFailover::Entry & conne
String grants_query, sub_privs;
StreamSettings mysql_input_stream_settings(global_settings);
auto input = std::make_unique<MySQLSource>(connection, "SHOW GRANTS FOR CURRENT_USER();", sync_user_privs_header, mysql_input_stream_settings);
QueryPipeline pipeline;
pipeline.init(Pipe(std::move(input)));
QueryPipeline pipeline(std::move(input));
Block block;
PullingPipelineExecutor executor(pipeline);
@ -204,8 +199,7 @@ bool MaterializeMetadata::checkBinlogFileExists(const mysqlxx::PoolWithFailover:
StreamSettings mysql_input_stream_settings(settings, false, true);
auto input = std::make_unique<MySQLSource>(connection, "SHOW MASTER LOGS", logs_header, mysql_input_stream_settings);
QueryPipeline pipeline;
pipeline.init(Pipe(std::move(input)));
QueryPipeline pipeline(std::move(input));
Block block;
PullingPipelineExecutor executor(pipeline);

View File

@ -9,8 +9,10 @@
# include <random>
# include <Columns/ColumnTuple.h>
# include <Columns/ColumnDecimal.h>
# include <Processors/QueryPipeline.h>
# include <Processors/QueryPipelineBuilder.h>
# include <Processors/Executors/PullingPipelineExecutor.h>
# include <Processors/Executors/CompletedPipelineExecutor.h>
# include <Processors/Sources/SourceFromSingleChunk.h>
# include <DataStreams/CountingBlockOutputStream.h>
# include <DataStreams/OneBlockInputStream.h>
# include <DataStreams/copyData.h>
@ -112,8 +114,7 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection, const S
{"log_bin_use_v1_row_events", "OFF"}
};
QueryPipeline pipeline;
pipeline.init(Pipe(std::move(variables_input)));
QueryPipeline pipeline(std::move(variables_input));
PullingPipelineExecutor executor(pipeline);
Block variables_block;
@ -288,7 +289,7 @@ static inline void cleanOutdatedTables(const String & database_name, ContextPtr
}
}
static inline BlockOutputStreamPtr
static inline QueryPipeline
getTableOutput(const String & database_name, const String & table_name, ContextMutablePtr query_context, bool insert_materialized = false)
{
const StoragePtr & storage = DatabaseCatalog::instance().getTable(StorageID(database_name, table_name), query_context);
@ -312,10 +313,7 @@ getTableOutput(const String & database_name, const String & table_name, ContextM
BlockIO res = tryToExecuteQuery("INSERT INTO " + backQuoteIfNeed(table_name) + "(" + insert_columns_str.str() + ")" + " VALUES",
query_context, database_name, comment);
if (!res.out)
throw Exception("LOGICAL ERROR: It is a bug.", ErrorCodes::LOGICAL_ERROR);
return res.out;
return std::move(res.pipeline);
}
static inline void dumpDataForTables(
@ -333,25 +331,21 @@ static inline void dumpDataForTables(
String comment = "Materialize MySQL step 1: execute MySQL DDL for dump data";
tryToExecuteQuery(query_prefix + " " + iterator->second, query_context, database_name, comment); /// create table.
auto out = std::make_shared<CountingBlockOutputStream>(getTableOutput(database_name, table_name, query_context));
auto pipeline = getTableOutput(database_name, table_name, query_context);
StreamSettings mysql_input_stream_settings(context->getSettingsRef());
auto input = std::make_unique<MySQLSource>(
connection, "SELECT * FROM " + backQuoteIfNeed(mysql_database_name) + "." + backQuoteIfNeed(table_name),
out->getHeader(), mysql_input_stream_settings);
QueryPipeline pipeline;
pipeline.init(Pipe(std::move(input)));
PullingPipelineExecutor executor(pipeline);
pipeline.getHeader(), mysql_input_stream_settings);
auto counting = std::make_shared<CountingTransform>(pipeline.getHeader());
Pipe pipe(std::move(input));
pipe.addTransform(counting);
pipeline.complete(std::move(pipe));
Stopwatch watch;
CompletedPipelineExecutor executor(pipeline);
executor.execute();
out->writePrefix();
Block block;
while (executor.pull(block))
out->write(block);
out->writeSuffix();
const Progress & progress = out->getProgress();
const Progress & progress = counting->getProgress();
LOG_INFO(&Poco::Logger::get("MaterializedMySQLSyncThread(" + database_name + ")"),
"Materialize MySQL step 1: dump {}, {} rows, {} in {} sec., {} rows/sec., {}/sec."
, table_name, formatReadableQuantity(progress.written_rows), formatReadableSizeWithBinarySuffix(progress.written_bytes)
@ -801,9 +795,12 @@ void MaterializedMySQLSyncThread::Buffers::commit(ContextPtr context)
for (auto & table_name_and_buffer : data)
{
auto query_context = createQueryContext(context);
OneBlockInputStream input(table_name_and_buffer.second->first);
BlockOutputStreamPtr out = getTableOutput(database, table_name_and_buffer.first, query_context, true);
copyData(input, *out);
auto input = std::make_shared<SourceFromSingleChunk>(table_name_and_buffer.second->first);
auto pipeline = getTableOutput(database, table_name_and_buffer.first, query_context, true);
pipeline.complete(Pipe(std::move(input)));
CompletedPipelineExecutor executor(pipeline);
executor.execute();
}
data.clear();

View File

@ -14,7 +14,7 @@
#include <Dictionaries/HierarchyDictionariesUtils.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/QueryPipeline.h>
#include <Processors/QueryPipelineBuilder.h>
namespace ProfileEvents
{
@ -573,9 +573,9 @@ void CacheDictionary<dictionary_key_type>::update(CacheDictionaryUpdateUnitPtr<d
QueryPipeline pipeline;
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
pipeline.init(current_source_ptr->loadIds(requested_keys_vector));
pipeline = QueryPipeline(current_source_ptr->loadIds(requested_keys_vector));
else
pipeline.init(current_source_ptr->loadKeys(update_unit_ptr->key_columns, requested_complex_key_rows));
pipeline = QueryPipeline(current_source_ptr->loadKeys(update_unit_ptr->key_columns, requested_complex_key_rows));
size_t skip_keys_size_offset = dict_struct.getKeysSize();
PaddedPODArray<KeyType> found_keys_in_source;

View File

@ -6,6 +6,7 @@
#include <Interpreters/ActionsDAG.h>
#include <Interpreters/ExpressionActions.h>
#include <Processors/Transforms/ExpressionTransform.h>
#include <Processors/QueryPipelineBuilder.h>
#include <IO/ConnectionTimeouts.h>
#include <Interpreters/Session.h>
#include <Interpreters/executeQuery.h>
@ -162,39 +163,39 @@ std::string ClickHouseDictionarySource::toString() const
Pipe ClickHouseDictionarySource::createStreamForQuery(const String & query, std::atomic<size_t> * result_size_hint)
{
QueryPipeline pipeline;
QueryPipelineBuilder builder;
/// Sample block should not contain first row default values
auto empty_sample_block = sample_block.cloneEmpty();
if (configuration.is_local)
{
pipeline = executeQuery(query, context, true).pipeline;
builder.init(executeQuery(query, context, true).pipeline);
auto converting = ActionsDAG::makeConvertingActions(
pipeline.getHeader().getColumnsWithTypeAndName(),
builder.getHeader().getColumnsWithTypeAndName(),
empty_sample_block.getColumnsWithTypeAndName(),
ActionsDAG::MatchColumnsMode::Position);
pipeline.addSimpleTransform([&](const Block & header)
builder.addSimpleTransform([&](const Block & header)
{
return std::make_shared<ExpressionTransform>(header, std::make_shared<ExpressionActions>(converting));
});
}
else
{
pipeline.init(Pipe(std::make_shared<RemoteSource>(
builder.init(Pipe(std::make_shared<RemoteSource>(
std::make_shared<RemoteQueryExecutor>(pool, query, empty_sample_block, context), false, false)));
}
if (result_size_hint)
{
pipeline.setProgressCallback([result_size_hint](const Progress & progress)
builder.setProgressCallback([result_size_hint](const Progress & progress)
{
*result_size_hint += progress.total_rows_to_read;
});
}
return QueryPipeline::getPipe(std::move(pipeline));
return QueryPipelineBuilder::getPipe(std::move(builder));
}
std::string ClickHouseDictionarySource::doInvalidateQuery(const std::string & request) const
@ -203,16 +204,15 @@ std::string ClickHouseDictionarySource::doInvalidateQuery(const std::string & re
if (configuration.is_local)
{
auto query_context = Context::createCopy(context);
auto pipe = QueryPipeline::getPipe(executeQuery(request, query_context, true).pipeline);
return readInvalidateQuery(std::move(pipe));
return readInvalidateQuery(executeQuery(request, query_context, true).pipeline);
}
else
{
/// We pass empty block to RemoteBlockInputStream, because we don't know the structure of the result.
Block invalidate_sample_block;
Pipe pipe(std::make_shared<RemoteSource>(
QueryPipeline pipeline(std::make_shared<RemoteSource>(
std::make_shared<RemoteQueryExecutor>(pool, request, invalidate_sample_block, context), false, false));
return readInvalidateQuery(std::move(pipe));
return readInvalidateQuery(std::move(pipeline));
}
}

View File

@ -16,7 +16,7 @@
#include <Dictionaries/IDictionary.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/QueryPipeline.h>
#include <Processors/QueryPipelineBuilder.h>
namespace DB
@ -567,8 +567,7 @@ void mergeBlockWithPipe(
auto result_fetched_columns = block_to_update.cloneEmptyColumns();
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
QueryPipeline pipeline(std::move(pipe));
PullingPipelineExecutor executor(pipeline);
Block block;

View File

@ -8,7 +8,7 @@
#include <Dictionaries/DictionaryFactory.h>
#include <Dictionaries/HierarchyDictionariesUtils.h>
#include <Processors/QueryPipeline.h>
#include <Processors/QueryPipelineBuilder.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
namespace DB
@ -68,8 +68,7 @@ Columns DirectDictionary<dictionary_key_type>::getColumns(
size_t dictionary_keys_size = dict_struct.getKeysNames().size();
block_key_columns.reserve(dictionary_keys_size);
QueryPipeline pipeline;
pipeline.init(getSourceBlockInputStream(key_columns, requested_keys));
QueryPipeline pipeline(getSourceBlockInputStream(key_columns, requested_keys));
PullingPipelineExecutor executor(pipeline);
@ -185,9 +184,7 @@ ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(
size_t dictionary_keys_size = dict_struct.getKeysNames().size();
block_key_columns.reserve(dictionary_keys_size);
QueryPipeline pipeline;
pipeline.init(getSourceBlockInputStream(key_columns, requested_keys));
QueryPipeline pipeline(getSourceBlockInputStream(key_columns, requested_keys));
PullingPipelineExecutor executor(pipeline);
size_t keys_found = 0;

View File

@ -10,7 +10,7 @@
#include <Columns/ColumnNullable.h>
#include <Functions/FunctionHelpers.h>
#include <Processors/QueryPipeline.h>
#include <Processors/QueryPipelineBuilder.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Dictionaries//DictionarySource.h>
@ -322,8 +322,7 @@ void FlatDictionary::updateData()
{
if (!update_field_loaded_block || update_field_loaded_block->rows() == 0)
{
QueryPipeline pipeline;
pipeline.init(source_ptr->loadUpdatedAll());
QueryPipeline pipeline(source_ptr->loadUpdatedAll());
PullingPipelineExecutor executor(pipeline);
Block block;
@ -358,8 +357,7 @@ void FlatDictionary::loadData()
{
if (!source_ptr->hasUpdateField())
{
QueryPipeline pipeline;
pipeline.init(source_ptr->loadAll());
QueryPipeline pipeline(source_ptr->loadAll());
PullingPipelineExecutor executor(pipeline);
Block block;

View File

@ -367,8 +367,7 @@ void HashedDictionary<dictionary_key_type, sparse>::updateData()
if (!update_field_loaded_block || update_field_loaded_block->rows() == 0)
{
QueryPipeline pipeline;
pipeline.init(source_ptr->loadUpdatedAll());
QueryPipeline pipeline(source_ptr->loadUpdatedAll());
PullingPipelineExecutor executor(pipeline);
Block block;
@ -563,9 +562,9 @@ void HashedDictionary<dictionary_key_type, sparse>::loadData()
QueryPipeline pipeline;
if (configuration.preallocate)
pipeline.init(source_ptr->loadAllWithSizeHint(&new_size));
pipeline = QueryPipeline(source_ptr->loadAllWithSizeHint(&new_size));
else
pipeline.init(source_ptr->loadAll());
pipeline = QueryPipeline(source_ptr->loadAll());
PullingPipelineExecutor executor(pipeline);
Block block;

View File

@ -352,8 +352,7 @@ void IPAddressDictionary::createAttributes()
void IPAddressDictionary::loadData()
{
QueryPipeline pipeline;
pipeline.init(source_ptr->loadAll());
QueryPipeline pipeline(source_ptr->loadAll());
std::vector<IPRecord> ip_records;

View File

@ -12,6 +12,7 @@
#include <Core/Settings.h>
#include <Interpreters/Context.h>
#include <Processors/Pipe.h>
#include <Processors/QueryPipeline.h>
#include <Storages/ExternalDataSourceConfiguration.h>
@ -307,7 +308,7 @@ std::string MySQLDictionarySource::doInvalidateQuery(const std::string & request
Block invalidate_sample_block;
ColumnPtr column(ColumnString::create());
invalidate_sample_block.insert(ColumnWithTypeAndName(column, std::make_shared<DataTypeString>(), "Sample Block"));
return readInvalidateQuery(Pipe(std::make_unique<MySQLSource>(pool->get(), request, invalidate_sample_block, settings)));
return readInvalidateQuery(QueryPipeline(std::make_unique<MySQLSource>(pool->get(), request, invalidate_sample_block, settings)));
}
}

View File

@ -165,8 +165,7 @@ void IPolygonDictionary::blockToAttributes(const DB::Block & block)
void IPolygonDictionary::loadData()
{
QueryPipeline pipeline;
pipeline.init(source_ptr->loadAll());
QueryPipeline pipeline(source_ptr->loadAll());
PullingPipelineExecutor executor(pipeline);
Block block;

View File

@ -11,6 +11,7 @@
#include <DataStreams/PostgreSQLSource.h>
#include "readInvalidateQuery.h"
#include <Interpreters/Context.h>
#include <Processors/QueryPipeline.h>
#include <Storages/ExternalDataSourceConfiguration.h>
#endif
@ -125,7 +126,7 @@ std::string PostgreSQLDictionarySource::doInvalidateQuery(const std::string & re
Block invalidate_sample_block;
ColumnPtr column(ColumnString::create());
invalidate_sample_block.insert(ColumnWithTypeAndName(column, std::make_shared<DataTypeString>(), "Sample Block"));
return readInvalidateQuery(Pipe(std::make_unique<PostgreSQLSource<>>(pool->get(), request, invalidate_sample_block, 1)));
return readInvalidateQuery(QueryPipeline(std::make_unique<PostgreSQLSource<>>(pool->get(), request, invalidate_sample_block, 1)));
}

View File

@ -303,8 +303,7 @@ void RangeHashedDictionary<dictionary_key_type>::createAttributes()
template <DictionaryKeyType dictionary_key_type>
void RangeHashedDictionary<dictionary_key_type>::loadData()
{
QueryPipeline pipeline;
pipeline.init(source_ptr->loadAll());
QueryPipeline pipeline(source_ptr->loadAll());
PullingPipelineExecutor executor(pipeline);
Block block;

View File

@ -18,6 +18,7 @@
#include "readInvalidateQuery.h"
#include "registerDictionaries.h"
#include <Common/escapeForFileName.h>
#include <Processors/QueryPipeline.h>
namespace DB
@ -199,7 +200,7 @@ std::string XDBCDictionarySource::doInvalidateQuery(const std::string & request)
for (const auto & [name, value] : url_params)
invalidate_url.addQueryParameter(name, value);
return readInvalidateQuery(loadFromQuery(invalidate_url, invalidate_sample_block, request));
return readInvalidateQuery(QueryPipeline(loadFromQuery(invalidate_url, invalidate_sample_block, request)));
}

View File

@ -1,5 +1,5 @@
#include "readInvalidateQuery.h"
#include <Processors/QueryPipeline.h>
#include <Processors/QueryPipelineBuilder.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <IO/WriteBufferFromString.h>
#include <Formats/FormatSettings.h>
@ -15,11 +15,8 @@ namespace ErrorCodes
extern const int RECEIVED_EMPTY_DATA;
}
std::string readInvalidateQuery(Pipe pipe)
std::string readInvalidateQuery(QueryPipeline pipeline)
{
QueryPipeline pipeline;
pipeline.init(std::move(pipe));
PullingPipelineExecutor executor(pipeline);
Block block;

View File

@ -5,9 +5,9 @@
namespace DB
{
class Pipe;
class QueryPipeline;
/// Using in MySQLDictionarySource and XDBCDictionarySource after processing invalidate_query.
std::string readInvalidateQuery(Pipe pipe);
std::string readInvalidateQuery(QueryPipeline pipeline);
}

View File

@ -107,6 +107,8 @@ public:
FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
{
DataTypes argument_types;
for (const auto & argument : arguments)
argument_types.push_back(argument.type);
/// More efficient specialization for two numeric arguments.
if (arguments.size() == 2 && isNumber(arguments[0].type) && isNumber(arguments[1].type))

View File

@ -26,6 +26,8 @@
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/misc.h>

View File

@ -7,7 +7,9 @@
#include <Processors/Transforms/getSourceFromASTInsertQuery.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Processors/Executors/StreamingFormatExecutor.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <Processors/Transforms/AddingDefaultsTransform.h>
#include <Processors/QueryPipeline.h>
#include <IO/ConcatReadBuffer.h>
#include <IO/ReadBufferFromMemory.h>
#include <IO/ReadBufferFromString.h>
@ -371,11 +373,11 @@ try
insert_context->getClientInfo().query_kind = ClientInfo::QueryKind::INITIAL_QUERY;
insert_context->setCurrentQueryId("");
InterpreterInsertQuery interpreter(key.query, insert_context, key.settings.insert_allow_materialized_columns);
auto sinks = interpreter.getSinks();
assert(sinks.size() == 1);
InterpreterInsertQuery interpreter(key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true);
auto pipeline = interpreter.execute().pipeline;
assert(pipeline.pushing());
auto header = sinks.at(0)->getInputs().front().getHeader();
auto header = pipeline.getHeader();
auto format = getInputFormatFromASTInsertQuery(key.query, false, header, insert_context, nullptr);
size_t total_rows = 0;
@ -417,15 +419,10 @@ try
size_t total_bytes = chunk.bytes();
auto source = std::make_shared<SourceFromSingleChunk>(header, std::move(chunk));
Pipe pipe(source);
pipeline.complete(Pipe(std::move(source)));
QueryPipeline out_pipeline;
out_pipeline.init(std::move(pipe));
out_pipeline.resize(1);
out_pipeline.setSinks([&](const Block &, Pipe::StreamType) { return sinks.at(0); });
auto out_executor = out_pipeline.execute();
out_executor->execute(out_pipeline.getNumThreads());
CompletedPipelineExecutor completed_executor(pipeline);
completed_executor.execute();
LOG_INFO(log, "Flushed {} rows, {} bytes for query '{}'",
total_rows, total_bytes, queryToString(key.query));

View File

@ -55,6 +55,7 @@
#include <IO/WriteBufferFromString.h>
#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Parsers/formatAST.h>
namespace DB

View File

@ -8,6 +8,7 @@
#include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Interpreters/interpretSubquery.h>
#include <Interpreters/SubqueryForSet.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
@ -15,7 +16,7 @@
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/IAST.h>
#include <Processors/Executors/PipelineExecutor.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <Processors/Sinks/SinkToStorage.h>
#include <Common/typeid_cast.h>
@ -158,13 +159,9 @@ public:
auto external_table = external_storage_holder->getTable();
auto table_out = external_table->write({}, external_table->getInMemoryMetadataPtr(), getContext());
auto io = interpreter->execute();
io.pipeline.resize(1);
io.pipeline.setSinks([&](const Block &, Pipe::StreamType) -> ProcessorPtr
{
return table_out;
});
auto executor = io.pipeline.execute();
executor->execute(io.pipeline.getNumThreads());
io.pipeline.complete(std::move(table_out));
CompletedPipelineExecutor executor(io.pipeline);
executor.execute();
}
else
{

View File

@ -1,5 +1,8 @@
#include <Interpreters/IInterpreterUnionOrSelectQuery.h>
#include <Interpreters/QueryLog.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
namespace DB
{
@ -9,4 +12,15 @@ void IInterpreterUnionOrSelectQuery::extendQueryLogElemImpl(QueryLogElement & el
elem.query_kind = "Select";
}
QueryPipelineBuilder IInterpreterUnionOrSelectQuery::buildQueryPipeline()
{
QueryPlan query_plan;
buildQueryPlan(query_plan);
return std::move(*query_plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)));
}
}

View File

@ -28,6 +28,7 @@ public:
}
virtual void buildQueryPlan(QueryPlan & query_plan) = 0;
QueryPipelineBuilder buildQueryPipeline();
virtual void ignoreWithTotals() = 0;

View File

@ -136,7 +136,7 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter)
table->checkAlterPartitionIsPossible(partition_commands, metadata_snapshot, getContext()->getSettingsRef());
auto partition_commands_pipe = table->alterPartition(metadata_snapshot, partition_commands, getContext());
if (!partition_commands_pipe.empty())
res.pipeline.init(std::move(partition_commands_pipe));
res.pipeline = QueryPipeline(std::move(partition_commands_pipe));
}
if (!live_view_commands.empty())
@ -411,6 +411,11 @@ AccessRightsElements InterpreterAlterQuery::getRequiredAccessForCommand(const AS
break;
}
case ASTAlterCommand::NO_TYPE: break;
case ASTAlterCommand::MODIFY_COMMENT:
{
required_access.emplace_back(AccessType::ALTER_MODIFY_COMMENT, database, table);
break;
}
}
return required_access;

View File

@ -3,7 +3,7 @@
#include <Access/AccessFlags.h>
#include <Storages/IStorage.h>
#include <Parsers/ASTCheckQuery.h>
#include <DataStreams/OneBlockInputStream.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnsNumber.h>
@ -72,7 +72,7 @@ BlockIO InterpreterCheckQuery::execute()
}
BlockIO res;
res.in = std::make_shared<OneBlockInputStream>(block);
res.pipeline = QueryPipeline(std::make_shared<SourceFromSingleChunk>(std::move(block)));
return res;
}

View File

@ -8,7 +8,7 @@
#include <Interpreters/FunctionNameNormalizer.h>
#include <Interpreters/UserDefinedSQLObjectsLoader.h>
#include <Interpreters/UserDefinedSQLFunctionFactory.h>
#include <stack>
namespace DB
{

View File

@ -1,4 +1,5 @@
#include <Storages/IStorage.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <DataStreams/BlockIO.h>
#include <DataTypes/DataTypeString.h>
#include <Parsers/queryToString.h>
@ -15,7 +16,6 @@
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/TablePropertiesQueriesASTs.h>
#include <DataTypes/NestedUtils.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
namespace DB
@ -157,7 +157,7 @@ BlockIO InterpreterDescribeQuery::execute()
BlockIO res;
size_t num_rows = res_columns[0]->size();
auto source = std::make_shared<SourceFromSingleChunk>(sample_block, Chunk(std::move(res_columns), num_rows));
res.pipeline.init(Pipe(std::move(source)));
res.pipeline = QueryPipeline(std::move(source));
return res;
}

View File

@ -1,6 +1,6 @@
#include <Storages/IStorage.h>
#include <Parsers/TablePropertiesQueriesASTs.h>
#include <DataStreams/OneBlockInputStream.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <DataStreams/BlockIO.h>
#include <DataStreams/copyData.h>
#include <DataTypes/DataTypesNumber.h>
@ -21,7 +21,7 @@ namespace ErrorCodes
BlockIO InterpreterExistsQuery::execute()
{
BlockIO res;
res.in = executeImpl();
res.pipeline = executeImpl();
return res;
}
@ -35,7 +35,7 @@ Block InterpreterExistsQuery::getSampleBlock()
}
BlockInputStreamPtr InterpreterExistsQuery::executeImpl()
QueryPipeline InterpreterExistsQuery::executeImpl()
{
ASTQueryWithTableAndOutput * exists_query;
bool result = false;
@ -76,10 +76,10 @@ BlockInputStreamPtr InterpreterExistsQuery::executeImpl()
result = DatabaseCatalog::instance().isDictionaryExist({database, exists_query->table});
}
return std::make_shared<OneBlockInputStream>(Block{{
return QueryPipeline(std::make_shared<SourceFromSingleChunk>(Block{{
ColumnUInt8::create(1, result),
std::make_shared<DataTypeUInt8>(),
"result" }});
"result" }}));
}
}

View File

@ -21,7 +21,7 @@ public:
private:
ASTPtr query_ptr;
BlockInputStreamPtr executeImpl();
QueryPipeline executeImpl();
};

View File

@ -1,7 +1,7 @@
#include <Interpreters/InterpreterExplainQuery.h>
#include <DataStreams/BlockIO.h>
#include <DataStreams/OneBlockInputStream.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <DataTypes/DataTypeString.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
@ -73,7 +73,7 @@ namespace
BlockIO InterpreterExplainQuery::execute()
{
BlockIO res;
res.in = executeImpl();
res.pipeline = executeImpl();
return res;
}
@ -240,7 +240,7 @@ ExplainSettings<Settings> checkAndGetSettings(const ASTPtr & ast_settings)
}
BlockInputStreamPtr InterpreterExplainQuery::executeImpl()
QueryPipeline InterpreterExplainQuery::executeImpl()
{
const auto & ast = query->as<const ASTExplainQuery &>();
@ -304,33 +304,41 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl()
}
else if (ast.getKind() == ASTExplainQuery::QueryPipeline)
{
if (!dynamic_cast<const ASTSelectWithUnionQuery *>(ast.getExplainedQuery().get()))
throw Exception("Only SELECT is supported for EXPLAIN query", ErrorCodes::INCORRECT_QUERY);
auto settings = checkAndGetSettings<QueryPipelineSettings>(ast.getSettings());
QueryPlan plan;
InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), SelectQueryOptions());
interpreter.buildQueryPlan(plan);
auto pipeline = plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(getContext()),
BuildQueryPipelineSettings::fromContext(getContext()));
if (settings.graph)
if (dynamic_cast<const ASTSelectWithUnionQuery *>(ast.getExplainedQuery().get()))
{
/// Pipe holds QueryPlan, should not go out-of-scope
auto pipe = QueryPipeline::getPipe(std::move(*pipeline));
const auto & processors = pipe.getProcessors();
auto settings = checkAndGetSettings<QueryPipelineSettings>(ast.getSettings());
QueryPlan plan;
if (settings.compact)
printPipelineCompact(processors, buf, settings.query_pipeline_options.header);
InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), SelectQueryOptions());
interpreter.buildQueryPlan(plan);
auto pipeline = plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(getContext()),
BuildQueryPipelineSettings::fromContext(getContext()));
if (settings.graph)
{
/// Pipe holds QueryPlan, should not go out-of-scope
auto pipe = QueryPipelineBuilder::getPipe(std::move(*pipeline));
const auto & processors = pipe.getProcessors();
if (settings.compact)
printPipelineCompact(processors, buf, settings.query_pipeline_options.header);
else
printPipeline(processors, buf);
}
else
printPipeline(processors, buf);
{
plan.explainPipeline(buf, settings.query_pipeline_options);
}
}
else if (dynamic_cast<const ASTInsertQuery *>(ast.getExplainedQuery().get()))
{
InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext());
auto io = insert.execute();
printPipeline(io.pipeline.getProcessors(), buf);
}
else
{
plan.explainPipeline(buf, settings.query_pipeline_options);
}
throw Exception("Only SELECT and INSERT is supported for EXPLAIN PIPELINE query", ErrorCodes::INCORRECT_QUERY);
}
else if (ast.getKind() == ASTExplainQuery::QueryEstimates)
{
@ -359,7 +367,7 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl()
fillColumn(*res_columns[0], buf.str());
}
return std::make_shared<OneBlockInputStream>(sample_block.cloneWithColumns(std::move(res_columns)));
return QueryPipeline(std::make_shared<SourceFromSingleChunk>(sample_block.cloneWithColumns(std::move(res_columns))));
}
}

View File

@ -15,12 +15,12 @@ public:
BlockIO execute() override;
static Block getSampleBlock(const ASTExplainQuery::ExplainKind kind);
static Block getSampleBlock(ASTExplainQuery::ExplainKind kind);
private:
ASTPtr query;
BlockInputStreamPtr executeImpl();
QueryPipeline executeImpl();
};

View File

@ -1,14 +1,12 @@
#include <Interpreters/InterpreterInsertQuery.h>
#include <Access/AccessFlags.h>
#include <DataStreams/AddingDefaultBlockOutputStream.h>
#include <DataStreams/CheckConstraintsBlockOutputStream.h>
#include <DataStreams/CountingBlockOutputStream.h>
#include <Processors/Transforms/getSourceFromASTInsertQuery.h>
#include <DataStreams/PushingToViewsBlockOutputStream.h>
#include <DataStreams/SquashingBlockOutputStream.h>
#include <DataStreams/copyData.h>
#include <DataStreams/PushingToSinkBlockOutputStream.h>
#include <IO/ConnectionTimeoutsContext.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
#include <Interpreters/InterpreterWatchQuery.h>
@ -17,9 +15,10 @@
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Processors/Sources/SinkToOutputStream.h>
#include <Processors/Sources/SourceFromInputStream.h>
#include <Processors/Sinks/EmptySink.h>
#include <Processors/Transforms/ExpressionTransform.h>
#include <Processors/Transforms/SquashingChunksTransform.h>
#include <Storages/StorageDistributed.h>
#include <Storages/StorageMaterializedView.h>
#include <TableFunctions/TableFunctionFactory.h>
@ -28,6 +27,7 @@
#include <Interpreters/TranslateQualifiedNamesVisitor.h>
#include <Interpreters/getTableExpressions.h>
#include <Interpreters/processColumnTransformers.h>
#include <Interpreters/addMissingDefaults.h>
#include <DataTypes/DataTypeNullable.h>
#include <Columns/ColumnNullable.h>
@ -44,16 +44,18 @@ namespace ErrorCodes
}
InterpreterInsertQuery::InterpreterInsertQuery(
const ASTPtr & query_ptr_, ContextPtr context_, bool allow_materialized_, bool no_squash_, bool no_destination_)
const ASTPtr & query_ptr_, ContextPtr context_, bool allow_materialized_, bool no_squash_, bool no_destination_, bool async_insert_)
: WithContext(context_)
, query_ptr(query_ptr_)
, allow_materialized(allow_materialized_)
, no_squash(no_squash_)
, no_destination(no_destination_)
, async_insert(async_insert_)
{
checkStackSize();
}
StoragePtr InterpreterInsertQuery::getTable(ASTInsertQuery & query)
{
if (query.table_function)
@ -72,26 +74,37 @@ Block InterpreterInsertQuery::getSampleBlock(
const StoragePtr & table,
const StorageMetadataPtr & metadata_snapshot) const
{
Block table_sample_non_materialized = metadata_snapshot->getSampleBlockNonMaterialized();
/// If the query does not include information about columns
if (!query.columns)
{
if (no_destination)
return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtuals());
else
return table_sample_non_materialized;
return metadata_snapshot->getSampleBlockNonMaterialized();
}
Block table_sample = metadata_snapshot->getSampleBlock();
const auto columns_ast = processColumnTransformers(getContext()->getCurrentDatabase(), table, metadata_snapshot, query.columns);
/// Form the block based on the column names from the query
Block res;
Names names;
const auto columns_ast = processColumnTransformers(getContext()->getCurrentDatabase(), table, metadata_snapshot, query.columns);
for (const auto & identifier : columns_ast->children)
{
std::string current_name = identifier->getColumnName();
names.emplace_back(std::move(current_name));
}
return getSampleBlock(names, table, metadata_snapshot);
}
Block InterpreterInsertQuery::getSampleBlock(
const Names & names,
const StoragePtr & table,
const StorageMetadataPtr & metadata_snapshot) const
{
Block table_sample = metadata_snapshot->getSampleBlock();
Block table_sample_non_materialized = metadata_snapshot->getSampleBlockNonMaterialized();
Block res;
for (const auto & current_name : names)
{
/// The table does not have a column with that name
if (!table_sample.has(current_name))
throw Exception("No such column " + current_name + " in table " + table->getStorageID().getNameForLogs(),
@ -146,33 +159,125 @@ static bool isTrivialSelect(const ASTPtr & select)
return false;
};
std::pair<BlockIO, BlockOutputStreams> InterpreterInsertQuery::executeImpl(
const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, Block & sample_block)
Chain InterpreterInsertQuery::buildChain(
const StoragePtr & table,
const StorageMetadataPtr & metadata_snapshot,
const Names & columns,
ThreadStatus * thread_status,
std::atomic_uint64_t * elapsed_counter_ms)
{
const auto & settings = getContext()->getSettingsRef();
const auto & query = query_ptr->as<const ASTInsertQuery &>();
auto sample = getSampleBlock(columns, table, metadata_snapshot);
return buildChainImpl(table, metadata_snapshot, std::move(sample) , thread_status, elapsed_counter_ms);
}
Chain InterpreterInsertQuery::buildChainImpl(
const StoragePtr & table,
const StorageMetadataPtr & metadata_snapshot,
const Block & query_sample_block,
ThreadStatus * thread_status,
std::atomic_uint64_t * elapsed_counter_ms)
{
auto context_ptr = getContext();
const ASTInsertQuery * query = nullptr;
if (query_ptr)
query = query_ptr->as<ASTInsertQuery>();
const Settings & settings = context_ptr->getSettingsRef();
bool null_as_default = query && query->select && context_ptr->getSettingsRef().insert_null_as_default;
/// We create a pipeline of several streams, into which we will write data.
Chain out;
/// NOTE: we explicitly ignore bound materialized views when inserting into Kafka Storage.
/// Otherwise we'll get duplicates when MV reads same rows again from Kafka.
if (table->noPushingToViews() && !no_destination)
{
auto sink = table->write(query_ptr, metadata_snapshot, context_ptr);
sink->setRuntimeData(thread_status, elapsed_counter_ms);
out.addSource(std::move(sink));
}
else
{
out = buildPushingToViewsChain(table, metadata_snapshot, context_ptr, query_ptr, no_destination, thread_status, elapsed_counter_ms);
}
/// Note that we wrap transforms one on top of another, so we write them in reverse of data processing order.
/// Checking constraints. It must be done after calculation of all defaults, so we can check them on calculated columns.
if (const auto & constraints = metadata_snapshot->getConstraints(); !constraints.empty())
out.addSource(std::make_shared<CheckConstraintsTransform>(
table->getStorageID(), out.getInputHeader(), metadata_snapshot->getConstraints(), context_ptr));
auto adding_missing_defaults_dag = addMissingDefaults(
query_sample_block,
out.getInputHeader().getNamesAndTypesList(),
metadata_snapshot->getColumns(),
context_ptr,
null_as_default);
auto adding_missing_defaults_actions = std::make_shared<ExpressionActions>(adding_missing_defaults_dag);
/// Actually we don't know structure of input blocks from query/table,
/// because some clients break insertion protocol (columns != header)
out.addSource(std::make_shared<ConvertingTransform>(query_sample_block, adding_missing_defaults_actions));
/// It's important to squash blocks as early as possible (before other transforms),
/// because other transforms may work inefficient if block size is small.
/// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side.
/// Client-side bufferization might cause excessive timeouts (especially in case of big blocks).
if (!(settings.insert_distributed_sync && table->isRemote()) && !no_squash && !(query && query->watch))
{
bool table_prefers_large_blocks = table->prefersLargeBlocks();
out.addSource(std::make_shared<SquashingChunksTransform>(
out.getInputHeader(),
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0));
}
auto counting = std::make_shared<CountingTransform>(out.getInputHeader(), thread_status);
counting->setProcessListElement(context_ptr->getProcessListElement());
out.addSource(std::move(counting));
return out;
}
BlockIO InterpreterInsertQuery::execute()
{
const Settings & settings = getContext()->getSettingsRef();
auto & query = query_ptr->as<ASTInsertQuery &>();
QueryPipelineBuilder pipeline;
StoragePtr table = getTable(query);
if (query.partition_by && !table->supportsPartitionBy())
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage");
BlockIO res;
BlockOutputStreams out_streams;
auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout);
auto metadata_snapshot = table->getInMemoryMetadataPtr();
auto query_sample_block = getSampleBlock(query, table, metadata_snapshot);
if (!query.table_function)
getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames());
bool is_distributed_insert_select = false;
if (query.select && table->isRemote() && settings.parallel_distributed_insert_select)
{
// Distributed INSERT SELECT
if (auto maybe_pipeline = table->distributedWrite(query, getContext()))
{
res.pipeline = std::move(*maybe_pipeline);
pipeline = std::move(*maybe_pipeline);
is_distributed_insert_select = true;
}
}
std::vector<Chain> out_chains;
if (!is_distributed_insert_select || query.watch)
{
size_t out_streams_size = 1;
if (query.select)
{
bool is_trivial_insert_select = false;
@ -216,28 +321,28 @@ std::pair<BlockIO, BlockOutputStreams> InterpreterInsertQuery::executeImpl(
InterpreterSelectWithUnionQuery interpreter_select{
query.select, new_context, SelectQueryOptions(QueryProcessingStage::Complete, 1)};
res = interpreter_select.execute();
pipeline = interpreter_select.buildQueryPipeline();
}
else
{
/// Passing 1 as subquery_depth will disable limiting size of intermediate result.
InterpreterSelectWithUnionQuery interpreter_select{
query.select, getContext(), SelectQueryOptions(QueryProcessingStage::Complete, 1)};
res = interpreter_select.execute();
pipeline = interpreter_select.buildQueryPipeline();
}
res.pipeline.dropTotalsAndExtremes();
pipeline.dropTotalsAndExtremes();
if (table->supportsParallelInsert() && settings.max_insert_threads > 1)
out_streams_size = std::min(size_t(settings.max_insert_threads), res.pipeline.getNumStreams());
out_streams_size = std::min(size_t(settings.max_insert_threads), pipeline.getNumStreams());
res.pipeline.resize(out_streams_size);
pipeline.resize(out_streams_size);
/// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values.
if (getContext()->getSettingsRef().insert_null_as_default)
{
const auto & input_columns = res.pipeline.getHeader().getColumnsWithTypeAndName();
const auto & query_columns = sample_block.getColumnsWithTypeAndName();
const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName();
const auto & query_columns = query_sample_block.getColumnsWithTypeAndName();
const auto & output_columns = metadata_snapshot->getColumns();
if (input_columns.size() == query_columns.size())
@ -247,7 +352,7 @@ std::pair<BlockIO, BlockOutputStreams> InterpreterInsertQuery::executeImpl(
/// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with
/// default column values (in AddingDefaultBlockOutputStream), so all values will be cast correctly.
if (input_columns[col_idx].type->isNullable() && !query_columns[col_idx].type->isNullable() && output_columns.hasDefault(query_columns[col_idx].name))
sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullable(query_columns[col_idx].column), makeNullable(query_columns[col_idx].type), query_columns[col_idx].name));
query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullable(query_columns[col_idx].column), makeNullable(query_columns[col_idx].type), query_columns[col_idx].name));
}
}
}
@ -255,128 +360,17 @@ std::pair<BlockIO, BlockOutputStreams> InterpreterInsertQuery::executeImpl(
else if (query.watch)
{
InterpreterWatchQuery interpreter_watch{ query.watch, getContext() };
res = interpreter_watch.execute();
pipeline = interpreter_watch.buildQueryPipeline();
}
for (size_t i = 0; i < out_streams_size; i++)
{
/// We create a pipeline of several streams, into which we will write data.
BlockOutputStreamPtr out;
/// NOTE: we explicitly ignore bound materialized views when inserting into Kafka Storage.
/// Otherwise we'll get duplicates when MV reads same rows again from Kafka.
if (table->noPushingToViews() && !no_destination)
out = std::make_shared<PushingToSinkBlockOutputStream>(table->write(query_ptr, metadata_snapshot, getContext()));
else
out = std::make_shared<PushingToViewsBlockOutputStream>(table, metadata_snapshot, getContext(), query_ptr, no_destination);
/// Note that we wrap transforms one on top of another, so we write them in reverse of data processing order.
/// Checking constraints. It must be done after calculation of all defaults, so we can check them on calculated columns.
if (const auto & constraints = metadata_snapshot->getConstraints(); !constraints.empty())
out = std::make_shared<CheckConstraintsBlockOutputStream>(
query.table_id, out, out->getHeader(), metadata_snapshot->getConstraints(), getContext());
bool null_as_default = query.select && getContext()->getSettingsRef().insert_null_as_default;
/// Actually we don't know structure of input blocks from query/table,
/// because some clients break insertion protocol (columns != header)
out = std::make_shared<AddingDefaultBlockOutputStream>(
out, sample_block, metadata_snapshot->getColumns(), getContext(), null_as_default);
/// It's important to squash blocks as early as possible (before other transforms),
/// because other transforms may work inefficient if block size is small.
/// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side.
/// Client-side bufferization might cause excessive timeouts (especially in case of big blocks).
if (!(settings.insert_distributed_sync && table->isRemote()) && !no_squash && !query.watch)
{
bool table_prefers_large_blocks = table->prefersLargeBlocks();
out = std::make_shared<SquashingBlockOutputStream>(
out,
out->getHeader(),
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0);
}
auto out_wrapper = std::make_shared<CountingBlockOutputStream>(out);
out_wrapper->setProcessListElement(getContext()->getProcessListElement());
out_streams.emplace_back(std::move(out_wrapper));
auto out = buildChainImpl(table, metadata_snapshot, query_sample_block, nullptr, nullptr);
out_chains.emplace_back(std::move(out));
}
}
return {std::move(res), std::move(out_streams)};
}
BlockIO InterpreterInsertQuery::execute()
{
const auto & settings = getContext()->getSettingsRef();
auto & query = query_ptr->as<ASTInsertQuery &>();
auto table = getTable(query);
auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout);
auto metadata_snapshot = table->getInMemoryMetadataPtr();
auto sample_block = getSampleBlock(query, table, metadata_snapshot);
if (!query.table_function)
getContext()->checkAccess(AccessType::INSERT, query.table_id, sample_block.getNames());
BlockIO res;
BlockOutputStreams out_streams;
std::tie(res, out_streams) = executeImpl(table, metadata_snapshot, sample_block);
/// What type of query: INSERT or INSERT SELECT or INSERT WATCH?
if (out_streams.empty())
{
/// Pipeline was already built.
}
else if (query.select || query.watch)
{
/// XXX: is this branch also triggered for select+input() case?
const auto & header = out_streams.at(0)->getHeader();
auto actions_dag = ActionsDAG::makeConvertingActions(
res.pipeline.getHeader().getColumnsWithTypeAndName(),
header.getColumnsWithTypeAndName(),
ActionsDAG::MatchColumnsMode::Position);
auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes));
res.pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
{
return std::make_shared<ExpressionTransform>(in_header, actions);
});
res.pipeline.setSinks([&](const Block &, QueryPipeline::StreamType type) -> ProcessorPtr
{
if (type != QueryPipeline::StreamType::Main)
return nullptr;
auto stream = std::move(out_streams.back());
out_streams.pop_back();
return std::make_shared<SinkToOutputStream>(std::move(stream));
});
if (!allow_materialized)
{
for (const auto & column : metadata_snapshot->getColumns())
if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name))
throw Exception("Cannot insert column " + column.name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN);
}
}
else if (query.hasInlinedData())
{
auto pipe = getSourceFromASTInsertQuery(query_ptr, true, sample_block, getContext(), nullptr);
res.pipeline.init(std::move(pipe));
res.pipeline.resize(1);
res.pipeline.setSinks([&](const Block &, Pipe::StreamType)
{
return std::make_shared<SinkToOutputStream>(out_streams.at(0));
});
}
else
res.out = std::move(out_streams.at(0));
res.pipeline.addStorageHolder(table);
if (const auto * mv = dynamic_cast<const StorageMaterializedView *>(table.get()))
@ -385,31 +379,63 @@ BlockIO InterpreterInsertQuery::execute()
res.pipeline.addStorageHolder(inner_table);
}
/// What type of query: INSERT or INSERT SELECT or INSERT WATCH?
if (is_distributed_insert_select)
{
res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline));
}
else if (query.select || query.watch)
{
const auto & header = out_chains.at(0).getInputHeader();
auto actions_dag = ActionsDAG::makeConvertingActions(
pipeline.getHeader().getColumnsWithTypeAndName(),
header.getColumnsWithTypeAndName(),
ActionsDAG::MatchColumnsMode::Position);
auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes));
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
{
return std::make_shared<ExpressionTransform>(in_header, actions);
});
auto num_select_threads = pipeline.getNumThreads();
pipeline.addChains(std::move(out_chains));
pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr
{
return std::make_shared<EmptySink>(cur_header);
});
/// Don't use more threads for insert then for select to reduce memory consumption.
if (!settings.parallel_view_processing && pipeline.getNumThreads() > num_select_threads)
pipeline.setMaxThreads(num_select_threads);
if (!allow_materialized)
{
for (const auto & column : metadata_snapshot->getColumns())
if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name))
throw Exception("Cannot insert column " + column.name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN);
}
res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline));
}
else
{
res.pipeline = QueryPipeline(std::move(out_chains.at(0)));
res.pipeline.setNumThreads(std::min<size_t>(res.pipeline.getNumThreads(), settings.max_threads));
if (query.hasInlinedData() && !async_insert)
{
/// can execute without additional data
auto pipe = getSourceFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr);
res.pipeline.complete(std::move(pipe));
}
}
return res;
}
Processors InterpreterInsertQuery::getSinks()
{
const auto & settings = getContext()->getSettingsRef();
auto & query = query_ptr->as<ASTInsertQuery &>();
auto table = getTable(query);
auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout);
auto metadata_snapshot = table->getInMemoryMetadataPtr();
auto sample_block = getSampleBlock(query, table, metadata_snapshot);
if (!query.table_function)
getContext()->checkAccess(AccessType::INSERT, query.table_id, sample_block.getNames());
auto out_streams = executeImpl(table, metadata_snapshot, sample_block).second;
Processors sinks;
sinks.reserve(out_streams.size());
for (const auto & out : out_streams)
sinks.emplace_back(std::make_shared<SinkToOutputStream>(out));
return sinks;
}
StorageID InterpreterInsertQuery::getDatabaseTable() const
{

View File

@ -5,11 +5,12 @@
#include <Interpreters/IInterpreter.h>
#include <Parsers/ASTInsertQuery.h>
#include <Storages/StorageInMemoryMetadata.h>
#include <IO/ReadBuffer.h>
namespace DB
{
class Chain;
class ThreadStatus;
/** Interprets the INSERT query.
*/
@ -21,7 +22,8 @@ public:
ContextPtr context_,
bool allow_materialized_ = false,
bool no_squash_ = false,
bool no_destination_ = false);
bool no_destination_ = false,
bool async_insert_ = false);
/** Prepare a request for execution. Return block streams
* - the stream into which you can write data to execute the query, if INSERT;
@ -30,23 +32,35 @@ public:
*/
BlockIO execute() override;
/// Returns only sinks, without input sources.
Processors getSinks();
StorageID getDatabaseTable() const;
Chain buildChain(
const StoragePtr & table,
const StorageMetadataPtr & metadata_snapshot,
const Names & columns,
ThreadStatus * thread_status = nullptr,
std::atomic_uint64_t * elapsed_counter_ms = nullptr);
void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr context_) const override;
Block getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot) const;
StoragePtr getTable(ASTInsertQuery & query);
Block getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot) const;
private:
std::pair<BlockIO, BlockOutputStreams> executeImpl(
const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, Block & sample_block);
Block getSampleBlock(const Names & names, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot) const;
ASTPtr query_ptr;
const bool allow_materialized;
const bool no_squash;
const bool no_destination;
const bool async_insert;
Chain buildChainImpl(
const StoragePtr & table,
const StorageMetadataPtr & metadata_snapshot,
const Block & query_sample_block,
ThreadStatus * thread_status,
std::atomic_uint64_t * elapsed_counter_ms);
};

View File

@ -16,7 +16,9 @@
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataStreams/OneBlockInputStream.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Processors/Sources/SourceWithProgress.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Storages/IStorage.h>
#include <Common/quoteString.h>
#include <thread>
@ -121,15 +123,16 @@ static QueryDescriptors extractQueriesExceptMeAndCheckAccess(const Block & proce
}
class SyncKillQueryInputStream : public IBlockInputStream
class SyncKillQuerySource : public SourceWithProgress
{
public:
SyncKillQueryInputStream(ProcessList & process_list_, QueryDescriptors && processes_to_stop_, Block && processes_block_,
SyncKillQuerySource(ProcessList & process_list_, QueryDescriptors && processes_to_stop_, Block && processes_block_,
const Block & res_sample_block_)
: process_list(process_list_),
processes_to_stop(std::move(processes_to_stop_)),
processes_block(std::move(processes_block_)),
res_sample_block(res_sample_block_)
: SourceWithProgress(res_sample_block_)
, process_list(process_list_)
, processes_to_stop(std::move(processes_to_stop_))
, processes_block(std::move(processes_block_))
, res_sample_block(std::move(res_sample_block_))
{
addTotalRowsApprox(processes_to_stop.size());
}
@ -139,14 +142,12 @@ public:
return "SynchronousQueryKiller";
}
Block getHeader() const override { return res_sample_block; }
Block readImpl() override
Chunk generate() override
{
size_t num_result_queries = processes_to_stop.size();
if (num_processed_queries >= num_result_queries)
return Block();
return {};
MutableColumns columns = res_sample_block.cloneEmptyColumns();
@ -179,7 +180,8 @@ public:
/// Don't produce empty block
} while (columns.empty() || columns[0]->empty());
return res_sample_block.cloneWithColumns(std::move(columns));
size_t num_rows = columns.empty() ? 0 : columns.front()->size();
return Chunk(std::move(columns), num_rows);
}
ProcessList & process_list;
@ -221,12 +223,12 @@ BlockIO InterpreterKillQueryQuery::execute()
insertResultRow(query_desc.source_num, code, processes_block, header, res_columns);
}
res_io.in = std::make_shared<OneBlockInputStream>(header.cloneWithColumns(std::move(res_columns)));
res_io.pipeline = QueryPipeline(std::make_shared<SourceFromSingleChunk>(header.cloneWithColumns(std::move(res_columns))));
}
else
{
res_io.in = std::make_shared<SyncKillQueryInputStream>(
process_list, std::move(queries_to_stop), std::move(processes_block), header);
res_io.pipeline = QueryPipeline(std::make_shared<SyncKillQuerySource>(
process_list, std::move(queries_to_stop), std::move(processes_block), header));
}
break;
@ -286,7 +288,7 @@ BlockIO InterpreterKillQueryQuery::execute()
"Not allowed to kill mutation. To execute this query it's necessary to have the grant " + required_access_rights.toString(),
ErrorCodes::ACCESS_DENIED);
res_io.in = std::make_shared<OneBlockInputStream>(header.cloneWithColumns(std::move(res_columns)));
res_io.pipeline = QueryPipeline(Pipe(std::make_shared<SourceFromSingleChunk>(header.cloneWithColumns(std::move(res_columns)))));
break;
}
@ -302,10 +304,15 @@ Block InterpreterKillQueryQuery::getSelectResult(const String & columns, const S
if (where_expression)
select_query += " WHERE " + queryToString(where_expression);
auto stream = executeQuery(select_query, getContext(), true).getInputStream();
Block res = stream->read();
auto io = executeQuery(select_query, getContext(), true);
PullingPipelineExecutor executor(io.pipeline);
Block res;
while (!res && executor.pull(res));
if (res && stream->read())
Block tmp_block;
while (executor.pull(tmp_block));
if (tmp_block)
throw Exception("Expected one block from input stream", ErrorCodes::LOGICAL_ERROR);
return res;

View File

@ -133,8 +133,10 @@ BlockIO InterpreterSelectIntersectExceptQuery::execute()
QueryPlanOptimizationSettings::fromContext(context),
BuildQueryPipelineSettings::fromContext(context));
res.pipeline = std::move(*pipeline);
res.pipeline.addInterpreterContext(context);
pipeline->addInterpreterContext(context);
res.pipeline = QueryPipelineBuilder::getPipeline(std::move(*query_plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context))));
return res;
}

View File

@ -603,8 +603,8 @@ BlockIO InterpreterSelectQuery::execute()
buildQueryPlan(query_plan);
res.pipeline = std::move(*query_plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)));
res.pipeline = QueryPipelineBuilder::getPipeline(std::move(*query_plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context))));
return res;
}

View File

@ -320,13 +320,13 @@ BlockIO InterpreterSelectWithUnionQuery::execute()
QueryPlan query_plan;
buildQueryPlan(query_plan);
auto pipeline = query_plan.buildQueryPipeline(
auto pipeline_builder = query_plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(context),
BuildQueryPipelineSettings::fromContext(context));
res.pipeline = std::move(*pipeline);
res.pipeline.addInterpreterContext(context);
pipeline_builder->addInterpreterContext(context);
res.pipeline = QueryPipelineBuilder::getPipeline(std::move(*pipeline_builder));
return res;
}

View File

@ -5,7 +5,7 @@
#include <Interpreters/InterpreterShowCreateAccessEntityQuery.h>
#include <Interpreters/InterpreterShowGrantsQuery.h>
#include <Columns/ColumnString.h>
#include <DataStreams/OneBlockInputStream.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <DataTypes/DataTypeString.h>
#include <Access/AccessFlags.h>
#include <Access/AccessControlManager.h>
@ -22,12 +22,12 @@ using EntityType = IAccessEntity::Type;
BlockIO InterpreterShowAccessQuery::execute()
{
BlockIO res;
res.in = executeImpl();
res.pipeline = executeImpl();
return res;
}
BlockInputStreamPtr InterpreterShowAccessQuery::executeImpl() const
QueryPipeline InterpreterShowAccessQuery::executeImpl() const
{
/// Build a create query.
ASTs queries = getCreateAndGrantQueries();
@ -43,7 +43,7 @@ BlockInputStreamPtr InterpreterShowAccessQuery::executeImpl() const
}
String desc = "ACCESS";
return std::make_shared<OneBlockInputStream>(Block{{std::move(column), std::make_shared<DataTypeString>(), desc}});
return QueryPipeline(std::make_shared<SourceFromSingleChunk>(Block{{std::move(column), std::make_shared<DataTypeString>(), desc}}));
}

View File

@ -23,7 +23,7 @@ public:
bool ignoreLimits() const override { return true; }
private:
BlockInputStreamPtr executeImpl() const;
QueryPipeline executeImpl() const;
ASTs getCreateAndGrantQueries() const;
std::vector<AccessEntityPtr> getEntities() const;

View File

@ -20,7 +20,7 @@
#include <Access/Role.h>
#include <Access/SettingsProfile.h>
#include <Columns/ColumnString.h>
#include <DataStreams/OneBlockInputStream.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <DataTypes/DataTypeString.h>
#include <Common/StringUtils/StringUtils.h>
#include <Core/Defines.h>
@ -241,12 +241,12 @@ InterpreterShowCreateAccessEntityQuery::InterpreterShowCreateAccessEntityQuery(c
BlockIO InterpreterShowCreateAccessEntityQuery::execute()
{
BlockIO res;
res.in = executeImpl();
res.pipeline = executeImpl();
return res;
}
BlockInputStreamPtr InterpreterShowCreateAccessEntityQuery::executeImpl()
QueryPipeline InterpreterShowCreateAccessEntityQuery::executeImpl()
{
/// Build a create queries.
ASTs create_queries = getCreateQueries();
@ -270,7 +270,7 @@ BlockInputStreamPtr InterpreterShowCreateAccessEntityQuery::executeImpl()
if (startsWith(desc, prefix))
desc = desc.substr(prefix.length()); /// `desc` always starts with "SHOW ", so we can trim this prefix.
return std::make_shared<OneBlockInputStream>(Block{{std::move(column), std::make_shared<DataTypeString>(), desc}});
return QueryPipeline(std::make_shared<SourceFromSingleChunk>(Block{{std::move(column), std::make_shared<DataTypeString>(), desc}}));
}

View File

@ -30,7 +30,7 @@ public:
static ASTPtr getAttachQuery(const IAccessEntity & entity);
private:
BlockInputStreamPtr executeImpl();
QueryPipeline executeImpl();
std::vector<AccessEntityPtr> getEntities() const;
ASTs getCreateQueries() const;
AccessRightsElements getRequiredAccess() const;

View File

@ -1,7 +1,7 @@
#include <Storages/IStorage.h>
#include <Parsers/TablePropertiesQueriesASTs.h>
#include <Parsers/formatAST.h>
#include <DataStreams/OneBlockInputStream.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <DataStreams/BlockIO.h>
#include <DataStreams/copyData.h>
#include <DataTypes/DataTypesNumber.h>
@ -26,7 +26,7 @@ namespace ErrorCodes
BlockIO InterpreterShowCreateQuery::execute()
{
BlockIO res;
res.in = executeImpl();
res.pipeline = executeImpl();
return res;
}
@ -40,7 +40,7 @@ Block InterpreterShowCreateQuery::getSampleBlock()
}
BlockInputStreamPtr InterpreterShowCreateQuery::executeImpl()
QueryPipeline InterpreterShowCreateQuery::executeImpl()
{
ASTPtr create_query;
ASTQueryWithTableAndOutput * show_query;
@ -100,10 +100,10 @@ BlockInputStreamPtr InterpreterShowCreateQuery::executeImpl()
MutableColumnPtr column = ColumnString::create();
column->insert(res);
return std::make_shared<OneBlockInputStream>(Block{{
return QueryPipeline(std::make_shared<SourceFromSingleChunk>(Block{{
std::move(column),
std::make_shared<DataTypeString>(),
"statement"}});
"statement"}}));
}
}

View File

@ -21,7 +21,7 @@ public:
private:
ASTPtr query_ptr;
BlockInputStreamPtr executeImpl();
QueryPipeline executeImpl();
};

View File

@ -5,7 +5,7 @@
#include <Parsers/formatAST.h>
#include <Interpreters/Context.h>
#include <Columns/ColumnString.h>
#include <DataStreams/OneBlockInputStream.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <DataTypes/DataTypeString.h>
#include <Access/AccessControlManager.h>
#include <Access/User.h>
@ -100,12 +100,12 @@ namespace
BlockIO InterpreterShowGrantsQuery::execute()
{
BlockIO res;
res.in = executeImpl();
res.pipeline = executeImpl();
return res;
}
BlockInputStreamPtr InterpreterShowGrantsQuery::executeImpl()
QueryPipeline InterpreterShowGrantsQuery::executeImpl()
{
/// Build a create query.
ASTs grant_queries = getGrantQueries();
@ -129,7 +129,7 @@ BlockInputStreamPtr InterpreterShowGrantsQuery::executeImpl()
if (desc.starts_with(prefix))
desc = desc.substr(prefix.length()); /// `desc` always starts with "SHOW ", so we can trim this prefix.
return std::make_shared<OneBlockInputStream>(Block{{std::move(column), std::make_shared<DataTypeString>(), desc}});
return QueryPipeline(std::make_shared<SourceFromSingleChunk>(Block{{std::move(column), std::make_shared<DataTypeString>(), desc}}));
}

View File

@ -27,7 +27,7 @@ public:
bool ignoreLimits() const override { return true; }
private:
BlockInputStreamPtr executeImpl();
QueryPipeline executeImpl();
ASTs getGrantQueries() const;
std::vector<AccessEntityPtr> getEntities() const;

View File

@ -31,11 +31,17 @@ namespace ErrorCodes
BlockIO InterpreterWatchQuery::execute()
{
BlockIO res;
res.pipeline = QueryPipelineBuilder::getPipeline(buildQueryPipeline());
return res;
}
QueryPipelineBuilder InterpreterWatchQuery::buildQueryPipeline()
{
if (!getContext()->getSettingsRef().allow_experimental_live_view)
throw Exception("Experimental LIVE VIEW feature is not enabled (the setting 'allow_experimental_live_view')", ErrorCodes::SUPPORT_IS_DISABLED);
BlockIO res;
const ASTWatchQuery & query = typeid_cast<const ASTWatchQuery &>(*query_ptr);
auto table_id = getContext()->resolveStorageID(query, Context::ResolveOrdinary);
@ -85,10 +91,9 @@ BlockIO InterpreterWatchQuery::execute()
pipe.setQuota(getContext()->getQuota());
}
res.pipeline.init(std::move(pipe));
return res;
QueryPipelineBuilder pipeline;
pipeline.init(std::move(pipe));
return pipeline;
}
}

View File

@ -31,6 +31,7 @@ public:
InterpreterWatchQuery(const ASTPtr & query_ptr_, ContextPtr context_) : WithContext(context_), query_ptr(query_ptr_) {}
BlockIO execute() override;
QueryPipelineBuilder buildQueryPipeline();
private:
ASTPtr query_ptr;

View File

@ -13,7 +13,7 @@
#include <Interpreters/join_common.h>
#include <Interpreters/sortBlock.h>
#include <Processors/Sources/BlocksListSource.h>
#include <Processors/QueryPipeline.h>
#include <Processors/QueryPipelineBuilder.h>
#include <Processors/Transforms/MergeSortingTransform.h>
#include <Processors/Executors/PipelineExecutingBlockInputStream.h>
@ -580,13 +580,14 @@ void MergeJoin::mergeInMemoryRightBlocks()
Pipe source(std::make_shared<BlocksListSource>(std::move(right_blocks.blocks)));
right_blocks.clear();
QueryPipeline pipeline;
pipeline.init(std::move(source));
QueryPipelineBuilder builder;
builder.init(std::move(source));
/// TODO: there should be no split keys by blocks for RIGHT|FULL JOIN
pipeline.addTransform(std::make_shared<MergeSortingTransform>(
pipeline.getHeader(), right_sort_description, max_rows_in_right_block, 0, 0, 0, 0, nullptr, 0));
builder.addTransform(std::make_shared<MergeSortingTransform>(
builder.getHeader(), right_sort_description, max_rows_in_right_block, 0, 0, 0, 0, nullptr, 0));
auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder));
auto sorted_input = PipelineExecutingBlockInputStream(std::move(pipeline));
while (Block block = sorted_input.read())

Some files were not shown because too many files have changed in this diff Show More