diff --git a/contrib/rocksdb b/contrib/rocksdb index 5ea892c8673..296c1b8b95f 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 5ea892c8673e6c5a052887653673b967d44cc59b +Subproject commit 296c1b8b95fd448b8097a1b2cc9f704ff4a73a2c diff --git a/docs/en/getting-started/example-datasets/index.md b/docs/en/getting-started/example-datasets/index.md index 78eb96a3f39..6dae6c20073 100644 --- a/docs/en/getting-started/example-datasets/index.md +++ b/docs/en/getting-started/example-datasets/index.md @@ -13,16 +13,16 @@ The list of documented datasets: - [GitHub Events](../../getting-started/example-datasets/github-events.md) - [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md) - [Recipes](../../getting-started/example-datasets/recipes.md) -- [OnTime](../../getting-started/example-datasets/ontime.md) -- [OpenSky](../../getting-started/example-datasets/opensky.md) -- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md) -- [UK Property Price Paid](../../getting-started/example-datasets/uk-price-paid.md) -- [What's on the Menu?](../../getting-started/example-datasets/menus.md) - [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md) - [WikiStat](../../getting-started/example-datasets/wikistat.md) - [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md) - [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md) - [Brown University Benchmark](../../getting-started/example-datasets/brown-benchmark.md) +- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md) +- [OpenSky](../../getting-started/example-datasets/opensky.md) +- [UK Property Price Paid](../../getting-started/example-datasets/uk-price-paid.md) - [Cell Towers](../../getting-started/example-datasets/cell-towers.md) +- [What's on the Menu?](../../getting-started/example-datasets/menus.md) +- [OnTime](../../getting-started/example-datasets/ontime.md) [Original article](https://clickhouse.com/docs/en/getting_started/example_datasets) diff --git a/docs/en/getting-started/example-datasets/opensky.md b/docs/en/getting-started/example-datasets/opensky.md index c9f6729cdd7..05e10ee50bd 100644 --- a/docs/en/getting-started/example-datasets/opensky.md +++ b/docs/en/getting-started/example-datasets/opensky.md @@ -3,7 +3,7 @@ toc_priority: 20 toc_title: OpenSky --- -# Crowdsourced air traffic data from The OpenSky Network 2020 +# Crowdsourced air traffic data from The OpenSky Network 2020 {#opensky} "The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic. It spans all flights seen by the network's more than 2500 members since 1 January 2019. More data will be periodically included in the dataset until the end of the COVID-19 pandemic". @@ -14,17 +14,19 @@ Martin Strohmeier, Xavier Olive, Jannis Lübbe, Matthias Schäfer, and Vincent L Earth System Science Data 13(2), 2021 https://doi.org/10.5194/essd-13-357-2021 -## Download the Dataset +## Download the Dataset {#download-dataset} -``` +Run the command: + +```bash wget -O- https://zenodo.org/record/5092942 | grep -oP 'https://zenodo.org/record/5092942/files/flightlist_\d+_\d+\.csv\.gz' | xargs wget ``` Download will take about 2 minutes with good internet connection. There are 30 files with total size of 4.3 GB. -## Create the Table +## Create the Table {#create-table} -``` +```sql CREATE TABLE opensky ( callsign String, @@ -46,69 +48,101 @@ CREATE TABLE opensky ) ENGINE = MergeTree ORDER BY (origin, destination, callsign); ``` -## Import Data +## Import Data {#import-data} Upload data into ClickHouse in parallel: -``` -ls -1 flightlist_*.csv.gz | xargs -P100 -I{} bash -c ' - gzip -c -d "{}" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"' +```bash +ls -1 flightlist_*.csv.gz | xargs -P100 -I{} bash -c 'gzip -c -d "{}" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"' ``` -Here we pass the list of files (`ls -1 flightlist_*.csv.gz`) to `xargs` for parallel processing. +- Here we pass the list of files (`ls -1 flightlist_*.csv.gz`) to `xargs` for parallel processing. `xargs -P100` specifies to use up to 100 parallel workers but as we only have 30 files, the number of workers will be only 30. +- For every file, `xargs` will run a script with `bash -c`. The script has substitution in form of `{}` and the `xargs` command will substitute the filename to it (we have asked it for `xargs` with `-I{}`). +- The script will decompress the file (`gzip -c -d "{}"`) to standard output (`-c` parameter) and the output is redirected to `clickhouse-client`. +- We also asked to parse [DateTime](../../sql-reference/data-types/datetime.md) fields with extended parser ([--date_time_input_format best_effort](../../operations/settings/settings.md#settings-date_time_input_format)) to recognize ISO-8601 format with timezone offsets. -For every file, `xargs` will run a script with `bash -c`. The script has substitution in form of `{}` and the `xargs` command will substitute the filename to it (we have asked it for xargs with `-I{}`). - -The script will decompress the file (`gzip -c -d "{}"`) to standard output (`-c` parameter) and the output is redirected to `clickhouse-client`. - -Finally, `clickhouse-client` will do insertion. It will read input data in `CSVWithNames` format. We also asked to parse DateTime fields with extended parser (`--date_time_input_format best_effort`) to recognize ISO-8601 format with timezone offsets. +Finally, `clickhouse-client` will do insertion. It will read input data in [CSVWithNames](../../interfaces/formats.md#csvwithnames) format. Parallel upload takes 24 seconds. If you don't like parallel upload, here is sequential variant: -``` + +```bash for file in flightlist_*.csv.gz; do gzip -c -d "$file" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"; done ``` -## Validate the Data +## Validate the Data {#validate-data} -``` -SELECT count() FROM opensky -66010819 +Query: + +```sql +SELECT count() FROM opensky; ``` -The size of dataset in ClickHouse is just 2.64 GiB: +Result: -``` -SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'opensky' -2.64 GiB +```text +┌──count()─┐ +│ 66010819 │ +└──────────┘ ``` -## Run Some Queries +The size of dataset in ClickHouse is just 2.66 GiB, check it. -Total distance travelled is 68 billion kilometers: +Query: +```sql +SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'opensky'; ``` -SELECT formatReadableQuantity(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) / 1000) FROM opensky +Result: + +```text +┌─formatReadableSize(total_bytes)─┐ +│ 2.66 GiB │ +└─────────────────────────────────┘ +``` + +## Run Some Queries {#run-queries} + +Total distance travelled is 68 billion kilometers. + +Query: + +```sql +SELECT formatReadableQuantity(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) / 1000) FROM opensky; +``` + +Result: + +```text ┌─formatReadableQuantity(divide(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 1000))─┐ │ 68.72 billion │ └──────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` Average flight distance is around 1000 km. -``` -SELECT avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) FROM opensky +Query: + +```sql +SELECT avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) FROM opensky; +``` + +Result: + +```text ┌─avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))─┐ │ 1041090.6465708319 │ └────────────────────────────────────────────────────────────────────┘ ``` -### Most busy origin airports and the average distance seen: +### Most busy origin airports and the average distance seen {#busy-airports-average-distance} -``` +Query: + +```sql SELECT origin, count(), @@ -118,10 +152,12 @@ FROM opensky WHERE origin != '' GROUP BY origin ORDER BY count() DESC -LIMIT 100 +LIMIT 100; +``` -Query id: f9010ea5-97d0-45a3-a5bd-9657906cd105 +Result: +```text ┌─origin─┬─count()─┬─distance─┬─bar────────────────────────────────────┐ 1. │ KORD │ 745007 │ 1546108 │ ███████████████▍ │ 2. │ KDFW │ 696702 │ 1358721 │ █████████████▌ │ @@ -224,13 +260,13 @@ Query id: f9010ea5-97d0-45a3-a5bd-9657906cd105 99. │ EDDT │ 115122 │ 941740 │ █████████▍ │ 100. │ EFHK │ 114860 │ 1629143 │ ████████████████▎ │ └────────┴─────────┴──────────┴────────────────────────────────────────┘ - -100 rows in set. Elapsed: 0.186 sec. Processed 48.31 million rows, 2.17 GB (259.27 million rows/s., 11.67 GB/s.) ``` -### Number of flights from three major Moscow airports, weekly: +### Number of flights from three major Moscow airports, weekly {#flights-from-moscow} -``` +Query: + +```sql SELECT toMonday(day) AS k, count() AS c, @@ -238,10 +274,12 @@ SELECT FROM opensky WHERE origin IN ('UUEE', 'UUDD', 'UUWW') GROUP BY k -ORDER BY k ASC +ORDER BY k ASC; +``` -Query id: 1b446157-9519-4cc4-a1cb-178dfcc15a8e +Result: +```text ┌──────────k─┬────c─┬─bar──────────────────────────────────────────────────────────────────────────┐ 1. │ 2018-12-31 │ 5248 │ ████████████████████████████████████████████████████▍ │ 2. │ 2019-01-07 │ 6302 │ ███████████████████████████████████████████████████████████████ │ @@ -375,10 +413,8 @@ Query id: 1b446157-9519-4cc4-a1cb-178dfcc15a8e 130. │ 2021-06-21 │ 6061 │ ████████████████████████████████████████████████████████████▌ │ 131. │ 2021-06-28 │ 2554 │ █████████████████████████▌ │ └────────────┴──────┴──────────────────────────────────────────────────────────────────────────────┘ - -131 rows in set. Elapsed: 0.014 sec. Processed 655.36 thousand rows, 11.14 MB (47.56 million rows/s., 808.48 MB/s.) ``` -### Test it in Playground +### Online Playground {#playground} -The data is uploaded to ClickHouse Playground, [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUCiAgICBvcmlnaW4sCiAgICBjb3VudCgpLAogICAgcm91bmQoYXZnKGdlb0Rpc3RhbmNlKGxvbmdpdHVkZV8xLCBsYXRpdHVkZV8xLCBsb25naXR1ZGVfMiwgbGF0aXR1ZGVfMikpKSBBUyBkaXN0YW5jZSwKICAgIGJhcihkaXN0YW5jZSwgMCwgMTAwMDAwMDAsIDEwMCkgQVMgYmFyCkZST00gb3BlbnNreQpXSEVSRSBvcmlnaW4gIT0gJycKR1JPVVAgQlkgb3JpZ2luCk9SREVSIEJZIGNvdW50KCkgREVTQwpMSU1JVCAxMDA=). +You can test other queries to this data set using the interactive resource [Online Playground](https://gh-api.clickhouse.tech/play?user=play). For example, [like this](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUCiAgICBvcmlnaW4sCiAgICBjb3VudCgpLAogICAgcm91bmQoYXZnKGdlb0Rpc3RhbmNlKGxvbmdpdHVkZV8xLCBsYXRpdHVkZV8xLCBsb25naXR1ZGVfMiwgbGF0aXR1ZGVfMikpKSBBUyBkaXN0YW5jZSwKICAgIGJhcihkaXN0YW5jZSwgMCwgMTAwMDAwMDAsIDEwMCkgQVMgYmFyCkZST00gb3BlbnNreQpXSEVSRSBvcmlnaW4gIT0gJycKR1JPVVAgQlkgb3JpZ2luCk9SREVSIEJZIGNvdW50KCkgREVTQwpMSU1JVCAxMDA=). However, please note that you cannot create temporary tables here. diff --git a/docs/en/getting-started/example-datasets/uk-price-paid.md b/docs/en/getting-started/example-datasets/uk-price-paid.md index 948ebd35b21..72b6e4b3718 100644 --- a/docs/en/getting-started/example-datasets/uk-price-paid.md +++ b/docs/en/getting-started/example-datasets/uk-price-paid.md @@ -3,27 +3,29 @@ toc_priority: 20 toc_title: UK Property Price Paid --- -# UK Property Price Paid +# UK Property Price Paid {#uk-property-price-paid} The dataset contains data about prices paid for real-estate property in England and Wales. The data is available since year 1995. -The size of the dataset in uncompressed form is about 4 GiB and it will take about 226 MiB in ClickHouse. +The size of the dataset in uncompressed form is about 4 GiB and it will take about 278 MiB in ClickHouse. Source: https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0. -## Download the Dataset +## Download the Dataset {#download-dataset} -``` +Run the command: + +```bash wget http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv ``` Download will take about 2 minutes with good internet connection. -## Create the Table +## Create the Table {#create-table} -``` +```sql CREATE TABLE uk_price_paid ( price UInt32, @@ -44,7 +46,7 @@ CREATE TABLE uk_price_paid ) ENGINE = MergeTree ORDER BY (postcode1, postcode2, addr1, addr2); ``` -## Preprocess and Import Data +## Preprocess and Import Data {#preprocess-import-data} We will use `clickhouse-local` tool for data preprocessing and `clickhouse-client` to upload it. @@ -53,13 +55,13 @@ In this example, we define the structure of source data from the CSV file and sp The preprocessing is: - splitting the postcode to two different columns `postcode1` and `postcode2` that is better for storage and queries; - coverting the `time` field to date as it only contains 00:00 time; -- ignoring the `uuid` field because we don't need it for analysis; -- transforming `type` and `duration` to more readable Enum fields with function `transform`; -- transforming `is_new` and `category` fields from single-character string (`Y`/`N` and `A`/`B`) to UInt8 field with 0 and 1. +- ignoring the [UUid](../../sql-reference/data-types/uuid.md) field because we don't need it for analysis; +- transforming `type` and `duration` to more readable Enum fields with function [transform](../../sql-reference/functions/other-functions.md#transform); +- transforming `is_new` and `category` fields from single-character string (`Y`/`N` and `A`/`B`) to [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256) field with 0 and 1. Preprocessed data is piped directly to `clickhouse-client` to be inserted into ClickHouse table in streaming fashion. -``` +```bash clickhouse-local --input-format CSV --structure ' uuid String, price UInt32, @@ -100,103 +102,131 @@ clickhouse-local --input-format CSV --structure ' It will take about 40 seconds. -## Validate the Data +## Validate the Data {#validate-data} -``` -SELECT count() FROM uk_price_paid -26248711 +Query: + +```sql +SELECT count() FROM uk_price_paid; ``` -The size of dataset in ClickHouse is just 226 MiB: +Result: -``` -SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'uk_price_paid' -226.40 MiB +```text +┌──count()─┐ +│ 26321785 │ +└──────────┘ ``` -## Run Some Queries +The size of dataset in ClickHouse is just 278 MiB, check it. -### Average price per year: +Query: +```sql +SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'uk_price_paid'; ``` -SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 1000000, 80) FROM uk_price_paid GROUP BY year ORDER BY year +Result: + +```text +┌─formatReadableSize(total_bytes)─┐ +│ 278.80 MiB │ +└─────────────────────────────────┘ +``` + +## Run Some Queries {#run-queries} + +### Query 1. Average Price Per Year {#average-price} + +Query: + +```sql +SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 1000000, 80) FROM uk_price_paid GROUP BY year ORDER BY year; +``` + +Result: + +```text ┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐ │ 1995 │ 67932 │ █████▍ │ │ 1996 │ 71505 │ █████▋ │ │ 1997 │ 78532 │ ██████▎ │ -│ 1998 │ 85435 │ ██████▋ │ -│ 1999 │ 96036 │ ███████▋ │ -│ 2000 │ 107478 │ ████████▌ │ -│ 2001 │ 118886 │ █████████▌ │ -│ 2002 │ 137940 │ ███████████ │ -│ 2003 │ 155888 │ ████████████▍ │ +│ 1998 │ 85436 │ ██████▋ │ +│ 1999 │ 96037 │ ███████▋ │ +│ 2000 │ 107479 │ ████████▌ │ +│ 2001 │ 118885 │ █████████▌ │ +│ 2002 │ 137941 │ ███████████ │ +│ 2003 │ 155889 │ ████████████▍ │ │ 2004 │ 178885 │ ██████████████▎ │ -│ 2005 │ 189350 │ ███████████████▏ │ +│ 2005 │ 189351 │ ███████████████▏ │ │ 2006 │ 203528 │ ████████████████▎ │ -│ 2007 │ 219377 │ █████████████████▌ │ +│ 2007 │ 219378 │ █████████████████▌ │ │ 2008 │ 217056 │ █████████████████▎ │ │ 2009 │ 213419 │ █████████████████ │ -│ 2010 │ 236110 │ ██████████████████▊ │ -│ 2011 │ 232804 │ ██████████████████▌ │ -│ 2012 │ 238366 │ ███████████████████ │ +│ 2010 │ 236109 │ ██████████████████▊ │ +│ 2011 │ 232805 │ ██████████████████▌ │ +│ 2012 │ 238367 │ ███████████████████ │ │ 2013 │ 256931 │ ████████████████████▌ │ -│ 2014 │ 279917 │ ██████████████████████▍ │ -│ 2015 │ 297264 │ ███████████████████████▋ │ -│ 2016 │ 313197 │ █████████████████████████ │ -│ 2017 │ 346070 │ ███████████████████████████▋ │ -│ 2018 │ 350117 │ ████████████████████████████ │ -│ 2019 │ 351010 │ ████████████████████████████ │ -│ 2020 │ 368974 │ █████████████████████████████▌ │ -│ 2021 │ 384351 │ ██████████████████████████████▋ │ +│ 2014 │ 279915 │ ██████████████████████▍ │ +│ 2015 │ 297266 │ ███████████████████████▋ │ +│ 2016 │ 313201 │ █████████████████████████ │ +│ 2017 │ 346097 │ ███████████████████████████▋ │ +│ 2018 │ 350116 │ ████████████████████████████ │ +│ 2019 │ 351013 │ ████████████████████████████ │ +│ 2020 │ 369420 │ █████████████████████████████▌ │ +│ 2021 │ 386903 │ ██████████████████████████████▊ │ └──────┴────────┴────────────────────────────────────────┘ - -27 rows in set. Elapsed: 0.027 sec. Processed 26.25 million rows, 157.49 MB (955.96 million rows/s., 5.74 GB/s.) ``` -### Average price per year in London: +### Query 2. Average Price per Year in London {#average-price-london} +Query: + +```sql +SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 2000000, 100) FROM uk_price_paid WHERE town = 'LONDON' GROUP BY year ORDER BY year; ``` -SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 2000000, 100) FROM uk_price_paid WHERE town = 'LONDON' GROUP BY year ORDER BY year +Result: + +```text ┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐ -│ 1995 │ 109112 │ █████▍ │ +│ 1995 │ 109116 │ █████▍ │ │ 1996 │ 118667 │ █████▊ │ │ 1997 │ 136518 │ ██████▋ │ │ 1998 │ 152983 │ ███████▋ │ -│ 1999 │ 180633 │ █████████ │ -│ 2000 │ 215830 │ ██████████▋ │ -│ 2001 │ 232996 │ ███████████▋ │ -│ 2002 │ 263672 │ █████████████▏ │ +│ 1999 │ 180637 │ █████████ │ +│ 2000 │ 215838 │ ██████████▋ │ +│ 2001 │ 232994 │ ███████████▋ │ +│ 2002 │ 263670 │ █████████████▏ │ │ 2003 │ 278394 │ █████████████▊ │ -│ 2004 │ 304665 │ ███████████████▏ │ +│ 2004 │ 304666 │ ███████████████▏ │ │ 2005 │ 322875 │ ████████████████▏ │ -│ 2006 │ 356192 │ █████████████████▋ │ -│ 2007 │ 404055 │ ████████████████████▏ │ +│ 2006 │ 356191 │ █████████████████▋ │ +│ 2007 │ 404054 │ ████████████████████▏ │ │ 2008 │ 420741 │ █████████████████████ │ -│ 2009 │ 427754 │ █████████████████████▍ │ +│ 2009 │ 427753 │ █████████████████████▍ │ │ 2010 │ 480306 │ ████████████████████████ │ │ 2011 │ 496274 │ ████████████████████████▋ │ -│ 2012 │ 519441 │ █████████████████████████▊ │ -│ 2013 │ 616209 │ ██████████████████████████████▋ │ -│ 2014 │ 724144 │ ████████████████████████████████████▏ │ -│ 2015 │ 792112 │ ███████████████████████████████████████▌ │ -│ 2016 │ 843568 │ ██████████████████████████████████████████▏ │ -│ 2017 │ 982566 │ █████████████████████████████████████████████████▏ │ -│ 2018 │ 1016845 │ ██████████████████████████████████████████████████▋ │ -│ 2019 │ 1043277 │ ████████████████████████████████████████████████████▏ │ -│ 2020 │ 1003963 │ ██████████████████████████████████████████████████▏ │ -│ 2021 │ 940794 │ ███████████████████████████████████████████████ │ +│ 2012 │ 519442 │ █████████████████████████▊ │ +│ 2013 │ 616212 │ ██████████████████████████████▋ │ +│ 2014 │ 724154 │ ████████████████████████████████████▏ │ +│ 2015 │ 792129 │ ███████████████████████████████████████▌ │ +│ 2016 │ 843655 │ ██████████████████████████████████████████▏ │ +│ 2017 │ 982642 │ █████████████████████████████████████████████████▏ │ +│ 2018 │ 1016835 │ ██████████████████████████████████████████████████▋ │ +│ 2019 │ 1042849 │ ████████████████████████████████████████████████████▏ │ +│ 2020 │ 1011889 │ ██████████████████████████████████████████████████▌ │ +│ 2021 │ 960343 │ ████████████████████████████████████████████████ │ └──────┴─────────┴───────────────────────────────────────────────────────┘ - -27 rows in set. Elapsed: 0.024 sec. Processed 26.25 million rows, 76.88 MB (1.08 billion rows/s., 3.15 GB/s.) ``` Something happened in 2013. I don't have a clue. Maybe you have a clue what happened in 2020? -### The most expensive neighborhoods: +### Query 3. The Most Expensive Neighborhoods {#most-expensive-neighborhoods} -``` +Query: + +```sql SELECT town, district, @@ -210,127 +240,126 @@ GROUP BY district HAVING c >= 100 ORDER BY price DESC -LIMIT 100 +LIMIT 100; +``` + +Result: + +```text ┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐ -│ LONDON │ CITY OF WESTMINSTER │ 3372 │ 3305225 │ ██████████████████████████████████████████████████████████████████ │ -│ LONDON │ CITY OF LONDON │ 257 │ 3294478 │ █████████████████████████████████████████████████████████████████▊ │ -│ LONDON │ KENSINGTON AND CHELSEA │ 2367 │ 2342422 │ ██████████████████████████████████████████████▋ │ -│ LEATHERHEAD │ ELMBRIDGE │ 108 │ 1927143 │ ██████████████████████████████████████▌ │ -│ VIRGINIA WATER │ RUNNYMEDE │ 142 │ 1868819 │ █████████████████████████████████████▍ │ -│ LONDON │ CAMDEN │ 2815 │ 1736788 │ ██████████████████████████████████▋ │ -│ THORNTON HEATH │ CROYDON │ 521 │ 1733051 │ ██████████████████████████████████▋ │ -│ WINDLESHAM │ SURREY HEATH │ 103 │ 1717255 │ ██████████████████████████████████▎ │ -│ BARNET │ ENFIELD │ 115 │ 1503458 │ ██████████████████████████████ │ -│ OXFORD │ SOUTH OXFORDSHIRE │ 298 │ 1275200 │ █████████████████████████▌ │ -│ LONDON │ ISLINGTON │ 2458 │ 1274308 │ █████████████████████████▍ │ -│ COBHAM │ ELMBRIDGE │ 364 │ 1260005 │ █████████████████████████▏ │ -│ LONDON │ HOUNSLOW │ 618 │ 1215682 │ ████████████████████████▎ │ -│ ASCOT │ WINDSOR AND MAIDENHEAD │ 379 │ 1215146 │ ████████████████████████▎ │ -│ LONDON │ RICHMOND UPON THAMES │ 654 │ 1207551 │ ████████████████████████▏ │ -│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 307 │ 1186220 │ ███████████████████████▋ │ -│ RICHMOND │ RICHMOND UPON THAMES │ 805 │ 1100420 │ ██████████████████████ │ -│ LONDON │ HAMMERSMITH AND FULHAM │ 2888 │ 1062959 │ █████████████████████▎ │ -│ WEYBRIDGE │ ELMBRIDGE │ 607 │ 1027161 │ ████████████████████▌ │ -│ RADLETT │ HERTSMERE │ 265 │ 1015896 │ ████████████████████▎ │ -│ SALCOMBE │ SOUTH HAMS │ 124 │ 1014393 │ ████████████████████▎ │ -│ BURFORD │ WEST OXFORDSHIRE │ 102 │ 993100 │ ███████████████████▋ │ -│ ESHER │ ELMBRIDGE │ 454 │ 969770 │ ███████████████████▍ │ -│ HINDHEAD │ WAVERLEY │ 128 │ 967786 │ ███████████████████▎ │ -│ BROCKENHURST │ NEW FOREST │ 121 │ 967046 │ ███████████████████▎ │ -│ LEATHERHEAD │ GUILDFORD │ 191 │ 964489 │ ███████████████████▎ │ -│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 376 │ 958555 │ ███████████████████▏ │ -│ EAST MOLESEY │ ELMBRIDGE │ 181 │ 943457 │ ██████████████████▋ │ -│ OLNEY │ MILTON KEYNES │ 220 │ 942892 │ ██████████████████▋ │ -│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 135 │ 926950 │ ██████████████████▌ │ -│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 509 │ 905732 │ ██████████████████ │ -│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 889 │ 899689 │ █████████████████▊ │ -│ BELVEDERE │ BEXLEY │ 313 │ 895336 │ █████████████████▊ │ -│ CRANBROOK │ TUNBRIDGE WELLS │ 404 │ 888190 │ █████████████████▋ │ -│ LONDON │ EALING │ 2460 │ 865893 │ █████████████████▎ │ -│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 114 │ 863814 │ █████████████████▎ │ -│ LONDON │ MERTON │ 1958 │ 857192 │ █████████████████▏ │ -│ GUILDFORD │ WAVERLEY │ 131 │ 854447 │ █████████████████ │ -│ LONDON │ HACKNEY │ 3088 │ 846571 │ ████████████████▊ │ -│ LYMM │ WARRINGTON │ 285 │ 839920 │ ████████████████▋ │ -│ HARPENDEN │ ST ALBANS │ 606 │ 836994 │ ████████████████▋ │ -│ LONDON │ WANDSWORTH │ 6113 │ 832292 │ ████████████████▋ │ -│ LONDON │ SOUTHWARK │ 3612 │ 831319 │ ████████████████▋ │ -│ BERKHAMSTED │ DACORUM │ 502 │ 830356 │ ████████████████▌ │ -│ KINGS LANGLEY │ DACORUM │ 137 │ 821358 │ ████████████████▍ │ -│ TONBRIDGE │ TUNBRIDGE WELLS │ 339 │ 806736 │ ████████████████▏ │ -│ EPSOM │ REIGATE AND BANSTEAD │ 157 │ 805903 │ ████████████████ │ -│ WOKING │ GUILDFORD │ 161 │ 803283 │ ████████████████ │ -│ STOCKBRIDGE │ TEST VALLEY │ 168 │ 801973 │ ████████████████ │ -│ TEDDINGTON │ RICHMOND UPON THAMES │ 539 │ 798591 │ ███████████████▊ │ -│ OXFORD │ VALE OF WHITE HORSE │ 329 │ 792907 │ ███████████████▋ │ -│ LONDON │ BARNET │ 3624 │ 789583 │ ███████████████▋ │ -│ TWICKENHAM │ RICHMOND UPON THAMES │ 1090 │ 787760 │ ███████████████▋ │ -│ LUTON │ CENTRAL BEDFORDSHIRE │ 196 │ 786051 │ ███████████████▋ │ -│ TONBRIDGE │ MAIDSTONE │ 277 │ 785746 │ ███████████████▋ │ -│ TOWCESTER │ WEST NORTHAMPTONSHIRE │ 186 │ 783532 │ ███████████████▋ │ -│ LONDON │ LAMBETH │ 4832 │ 783422 │ ███████████████▋ │ -│ LUTTERWORTH │ HARBOROUGH │ 515 │ 781775 │ ███████████████▋ │ -│ WOODSTOCK │ WEST OXFORDSHIRE │ 135 │ 777499 │ ███████████████▌ │ -│ ALRESFORD │ WINCHESTER │ 196 │ 775577 │ ███████████████▌ │ -│ LONDON │ NEWHAM │ 2942 │ 768551 │ ███████████████▎ │ -│ ALDERLEY EDGE │ CHESHIRE EAST │ 168 │ 768280 │ ███████████████▎ │ -│ MARLOW │ BUCKINGHAMSHIRE │ 301 │ 762784 │ ███████████████▎ │ -│ BILLINGSHURST │ CHICHESTER │ 134 │ 760920 │ ███████████████▏ │ -│ LONDON │ TOWER HAMLETS │ 4183 │ 759635 │ ███████████████▏ │ -│ MIDHURST │ CHICHESTER │ 245 │ 759101 │ ███████████████▏ │ -│ THAMES DITTON │ ELMBRIDGE │ 227 │ 753347 │ ███████████████ │ -│ POTTERS BAR │ WELWYN HATFIELD │ 163 │ 752926 │ ███████████████ │ -│ REIGATE │ REIGATE AND BANSTEAD │ 555 │ 740961 │ ██████████████▋ │ -│ TADWORTH │ REIGATE AND BANSTEAD │ 477 │ 738997 │ ██████████████▋ │ -│ SEVENOAKS │ SEVENOAKS │ 1074 │ 734658 │ ██████████████▋ │ -│ PETWORTH │ CHICHESTER │ 138 │ 732432 │ ██████████████▋ │ -│ BOURNE END │ BUCKINGHAMSHIRE │ 127 │ 730742 │ ██████████████▌ │ -│ PURLEY │ CROYDON │ 540 │ 727721 │ ██████████████▌ │ -│ OXTED │ TANDRIDGE │ 320 │ 726078 │ ██████████████▌ │ -│ LONDON │ HARINGEY │ 2988 │ 724573 │ ██████████████▍ │ -│ BANSTEAD │ REIGATE AND BANSTEAD │ 373 │ 713834 │ ██████████████▎ │ -│ PINNER │ HARROW │ 480 │ 712166 │ ██████████████▏ │ -│ MALMESBURY │ WILTSHIRE │ 293 │ 707747 │ ██████████████▏ │ -│ RICKMANSWORTH │ THREE RIVERS │ 732 │ 705400 │ ██████████████ │ -│ SLOUGH │ BUCKINGHAMSHIRE │ 359 │ 705002 │ ██████████████ │ -│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 214 │ 704904 │ ██████████████ │ -│ READING │ SOUTH OXFORDSHIRE │ 295 │ 701697 │ ██████████████ │ -│ HYTHE │ FOLKESTONE AND HYTHE │ 457 │ 700334 │ ██████████████ │ -│ WELWYN │ WELWYN HATFIELD │ 217 │ 699649 │ █████████████▊ │ -│ CHIGWELL │ EPPING FOREST │ 242 │ 697869 │ █████████████▊ │ -│ BARNET │ BARNET │ 906 │ 695680 │ █████████████▊ │ -│ HASLEMERE │ CHICHESTER │ 120 │ 694028 │ █████████████▊ │ -│ LEATHERHEAD │ MOLE VALLEY │ 748 │ 692026 │ █████████████▋ │ -│ LONDON │ BRENT │ 1945 │ 690799 │ █████████████▋ │ -│ HASLEMERE │ WAVERLEY │ 258 │ 690765 │ █████████████▋ │ -│ NORTHWOOD │ HILLINGDON │ 252 │ 690753 │ █████████████▋ │ -│ WALTON-ON-THAMES │ ELMBRIDGE │ 871 │ 689431 │ █████████████▋ │ -│ INGATESTONE │ BRENTWOOD │ 150 │ 688345 │ █████████████▋ │ -│ OXFORD │ OXFORD │ 1761 │ 686114 │ █████████████▋ │ -│ CHISLEHURST │ BROMLEY │ 410 │ 682892 │ █████████████▋ │ -│ KINGS LANGLEY │ THREE RIVERS │ 109 │ 682320 │ █████████████▋ │ -│ ASHTEAD │ MOLE VALLEY │ 280 │ 680483 │ █████████████▌ │ -│ WOKING │ SURREY HEATH │ 269 │ 679035 │ █████████████▌ │ -│ ASCOT │ BRACKNELL FOREST │ 160 │ 678632 │ █████████████▌ │ +│ LONDON │ CITY OF WESTMINSTER │ 3606 │ 3280239 │ █████████████████████████████████████████████████████████████████▌ │ +│ LONDON │ CITY OF LONDON │ 274 │ 3160502 │ ███████████████████████████████████████████████████████████████▏ │ +│ LONDON │ KENSINGTON AND CHELSEA │ 2550 │ 2308478 │ ██████████████████████████████████████████████▏ │ +│ LEATHERHEAD │ ELMBRIDGE │ 114 │ 1897407 │ █████████████████████████████████████▊ │ +│ LONDON │ CAMDEN │ 3033 │ 1805404 │ ████████████████████████████████████ │ +│ VIRGINIA WATER │ RUNNYMEDE │ 156 │ 1753247 │ ███████████████████████████████████ │ +│ WINDLESHAM │ SURREY HEATH │ 108 │ 1677613 │ █████████████████████████████████▌ │ +│ THORNTON HEATH │ CROYDON │ 546 │ 1671721 │ █████████████████████████████████▍ │ +│ BARNET │ ENFIELD │ 124 │ 1505840 │ ██████████████████████████████ │ +│ COBHAM │ ELMBRIDGE │ 387 │ 1237250 │ ████████████████████████▋ │ +│ LONDON │ ISLINGTON │ 2668 │ 1236980 │ ████████████████████████▋ │ +│ OXFORD │ SOUTH OXFORDSHIRE │ 321 │ 1220907 │ ████████████████████████▍ │ +│ LONDON │ RICHMOND UPON THAMES │ 704 │ 1215551 │ ████████████████████████▎ │ +│ LONDON │ HOUNSLOW │ 671 │ 1207493 │ ████████████████████████▏ │ +│ ASCOT │ WINDSOR AND MAIDENHEAD │ 407 │ 1183299 │ ███████████████████████▋ │ +│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 330 │ 1175615 │ ███████████████████████▌ │ +│ RICHMOND │ RICHMOND UPON THAMES │ 874 │ 1110444 │ ██████████████████████▏ │ +│ LONDON │ HAMMERSMITH AND FULHAM │ 3086 │ 1053983 │ █████████████████████ │ +│ SURBITON │ ELMBRIDGE │ 100 │ 1011800 │ ████████████████████▏ │ +│ RADLETT │ HERTSMERE │ 283 │ 1011712 │ ████████████████████▏ │ +│ SALCOMBE │ SOUTH HAMS │ 127 │ 1011624 │ ████████████████████▏ │ +│ WEYBRIDGE │ ELMBRIDGE │ 655 │ 1007265 │ ████████████████████▏ │ +│ ESHER │ ELMBRIDGE │ 485 │ 986581 │ ███████████████████▋ │ +│ LEATHERHEAD │ GUILDFORD │ 202 │ 977320 │ ███████████████████▌ │ +│ BURFORD │ WEST OXFORDSHIRE │ 111 │ 966893 │ ███████████████████▎ │ +│ BROCKENHURST │ NEW FOREST │ 129 │ 956675 │ ███████████████████▏ │ +│ HINDHEAD │ WAVERLEY │ 137 │ 953753 │ ███████████████████ │ +│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 419 │ 951121 │ ███████████████████ │ +│ EAST MOLESEY │ ELMBRIDGE │ 192 │ 936769 │ ██████████████████▋ │ +│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 146 │ 925515 │ ██████████████████▌ │ +│ LONDON │ TOWER HAMLETS │ 4388 │ 918304 │ ██████████████████▎ │ +│ OLNEY │ MILTON KEYNES │ 235 │ 910646 │ ██████████████████▏ │ +│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 540 │ 902418 │ ██████████████████ │ +│ LONDON │ SOUTHWARK │ 3885 │ 892997 │ █████████████████▋ │ +│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 960 │ 885969 │ █████████████████▋ │ +│ LONDON │ EALING │ 2658 │ 871755 │ █████████████████▍ │ +│ CRANBROOK │ TUNBRIDGE WELLS │ 431 │ 862348 │ █████████████████▏ │ +│ LONDON │ MERTON │ 2099 │ 859118 │ █████████████████▏ │ +│ BELVEDERE │ BEXLEY │ 346 │ 842423 │ ████████████████▋ │ +│ GUILDFORD │ WAVERLEY │ 143 │ 841277 │ ████████████████▋ │ +│ HARPENDEN │ ST ALBANS │ 657 │ 841216 │ ████████████████▋ │ +│ LONDON │ HACKNEY │ 3307 │ 837090 │ ████████████████▋ │ +│ LONDON │ WANDSWORTH │ 6566 │ 832663 │ ████████████████▋ │ +│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 123 │ 824299 │ ████████████████▍ │ +│ KINGS LANGLEY │ DACORUM │ 145 │ 821331 │ ████████████████▍ │ +│ BERKHAMSTED │ DACORUM │ 543 │ 818415 │ ████████████████▎ │ +│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 226 │ 802807 │ ████████████████ │ +│ BILLINGSHURST │ CHICHESTER │ 144 │ 797829 │ ███████████████▊ │ +│ WOKING │ GUILDFORD │ 176 │ 793494 │ ███████████████▋ │ +│ STOCKBRIDGE │ TEST VALLEY │ 178 │ 793269 │ ███████████████▋ │ +│ EPSOM │ REIGATE AND BANSTEAD │ 172 │ 791862 │ ███████████████▋ │ +│ TONBRIDGE │ TUNBRIDGE WELLS │ 360 │ 787876 │ ███████████████▋ │ +│ TEDDINGTON │ RICHMOND UPON THAMES │ 595 │ 786492 │ ███████████████▋ │ +│ TWICKENHAM │ RICHMOND UPON THAMES │ 1155 │ 786193 │ ███████████████▋ │ +│ LYNDHURST │ NEW FOREST │ 102 │ 785593 │ ███████████████▋ │ +│ LONDON │ LAMBETH │ 5228 │ 774574 │ ███████████████▍ │ +│ LONDON │ BARNET │ 3955 │ 773259 │ ███████████████▍ │ +│ OXFORD │ VALE OF WHITE HORSE │ 353 │ 772088 │ ███████████████▍ │ +│ TONBRIDGE │ MAIDSTONE │ 305 │ 770740 │ ███████████████▍ │ +│ LUTTERWORTH │ HARBOROUGH │ 538 │ 768634 │ ███████████████▎ │ +│ WOODSTOCK │ WEST OXFORDSHIRE │ 140 │ 766037 │ ███████████████▎ │ +│ MIDHURST │ CHICHESTER │ 257 │ 764815 │ ███████████████▎ │ +│ MARLOW │ BUCKINGHAMSHIRE │ 327 │ 761876 │ ███████████████▏ │ +│ LONDON │ NEWHAM │ 3237 │ 761784 │ ███████████████▏ │ +│ ALDERLEY EDGE │ CHESHIRE EAST │ 178 │ 757318 │ ███████████████▏ │ +│ LUTON │ CENTRAL BEDFORDSHIRE │ 212 │ 754283 │ ███████████████ │ +│ PETWORTH │ CHICHESTER │ 154 │ 754220 │ ███████████████ │ +│ ALRESFORD │ WINCHESTER │ 219 │ 752718 │ ███████████████ │ +│ POTTERS BAR │ WELWYN HATFIELD │ 174 │ 748465 │ ██████████████▊ │ +│ HASLEMERE │ CHICHESTER │ 128 │ 746907 │ ██████████████▊ │ +│ TADWORTH │ REIGATE AND BANSTEAD │ 502 │ 743252 │ ██████████████▋ │ +│ THAMES DITTON │ ELMBRIDGE │ 244 │ 741913 │ ██████████████▋ │ +│ REIGATE │ REIGATE AND BANSTEAD │ 581 │ 738198 │ ██████████████▋ │ +│ BOURNE END │ BUCKINGHAMSHIRE │ 138 │ 735190 │ ██████████████▋ │ +│ SEVENOAKS │ SEVENOAKS │ 1156 │ 730018 │ ██████████████▌ │ +│ OXTED │ TANDRIDGE │ 336 │ 729123 │ ██████████████▌ │ +│ INGATESTONE │ BRENTWOOD │ 166 │ 728103 │ ██████████████▌ │ +│ LONDON │ BRENT │ 2079 │ 720605 │ ██████████████▍ │ +│ LONDON │ HARINGEY │ 3216 │ 717780 │ ██████████████▎ │ +│ PURLEY │ CROYDON │ 575 │ 716108 │ ██████████████▎ │ +│ WELWYN │ WELWYN HATFIELD │ 222 │ 710603 │ ██████████████▏ │ +│ RICKMANSWORTH │ THREE RIVERS │ 798 │ 704571 │ ██████████████ │ +│ BANSTEAD │ REIGATE AND BANSTEAD │ 401 │ 701293 │ ██████████████ │ +│ CHIGWELL │ EPPING FOREST │ 261 │ 701203 │ ██████████████ │ +│ PINNER │ HARROW │ 528 │ 698885 │ █████████████▊ │ +│ HASLEMERE │ WAVERLEY │ 280 │ 696659 │ █████████████▊ │ +│ SLOUGH │ BUCKINGHAMSHIRE │ 396 │ 694917 │ █████████████▊ │ +│ WALTON-ON-THAMES │ ELMBRIDGE │ 946 │ 692395 │ █████████████▋ │ +│ READING │ SOUTH OXFORDSHIRE │ 318 │ 691988 │ █████████████▋ │ +│ NORTHWOOD │ HILLINGDON │ 271 │ 690643 │ █████████████▋ │ +│ FELTHAM │ HOUNSLOW │ 763 │ 688595 │ █████████████▋ │ +│ ASHTEAD │ MOLE VALLEY │ 303 │ 687923 │ █████████████▋ │ +│ BARNET │ BARNET │ 975 │ 686980 │ █████████████▋ │ +│ WOKING │ SURREY HEATH │ 283 │ 686669 │ █████████████▋ │ +│ MALMESBURY │ WILTSHIRE │ 323 │ 683324 │ █████████████▋ │ +│ AMERSHAM │ BUCKINGHAMSHIRE │ 496 │ 680962 │ █████████████▌ │ +│ CHISLEHURST │ BROMLEY │ 430 │ 680209 │ █████████████▌ │ +│ HYTHE │ FOLKESTONE AND HYTHE │ 490 │ 676908 │ █████████████▌ │ +│ MAYFIELD │ WEALDEN │ 101 │ 676210 │ █████████████▌ │ +│ ASCOT │ BRACKNELL FOREST │ 168 │ 676004 │ █████████████▌ │ └──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘ - -100 rows in set. Elapsed: 0.039 sec. Processed 26.25 million rows, 278.03 MB (674.32 million rows/s., 7.14 GB/s.) ``` -### Test it in Playground +## Let's Speed Up Queries Using Projections {#speedup-with-projections} -The data is uploaded to ClickHouse Playground, [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==). +[Projections](../../sql-reference/statements/alter/projection.md) allow to improve queries speed by storing pre-aggregated data. -## Let's speed up queries using projections +### Build a Projection {#build-projection} -[Projections](https://../../sql-reference/statements/alter/projection/) allow to improve queries speed by storing pre-aggregated data. - -### Build a projection - -``` --- create an aggregate projection by dimensions (toYear(date), district, town) +Create an aggregate projection by dimensions `toYear(date)`, `district`, `town`: +```sql ALTER TABLE uk_price_paid ADD PROJECTION projection_by_year_district_town ( @@ -346,25 +375,31 @@ ALTER TABLE uk_price_paid district, town ); +``` --- populate the projection for existing data (without it projection will be --- created for only newly inserted data) +Populate the projection for existing data (without it projection will be created for only newly inserted data): +```sql ALTER TABLE uk_price_paid MATERIALIZE PROJECTION projection_by_year_district_town SETTINGS mutations_sync = 1; ``` -## Test performance +## Test Performance {#test-performance} Let's run the same 3 queries. +[Enable](../../operations/settings/settings.md#allow-experimental-projection-optimization) projections for selects: + +```sql +SET allow_experimental_projection_optimization = 1; ``` --- enable projections for selects -set allow_experimental_projection_optimization=1; --- Q1) Average price per year: +### Query 1. Average Price Per Year {#average-price-projections} +Query: + +```sql SELECT toYear(date) AS year, round(avg(price)) AS price, @@ -372,41 +407,47 @@ SELECT FROM uk_price_paid GROUP BY year ORDER BY year ASC; +``` +Result: + +```text ┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐ │ 1995 │ 67932 │ █████▍ │ │ 1996 │ 71505 │ █████▋ │ │ 1997 │ 78532 │ ██████▎ │ -│ 1998 │ 85435 │ ██████▋ │ -│ 1999 │ 96036 │ ███████▋ │ -│ 2000 │ 107478 │ ████████▌ │ -│ 2001 │ 118886 │ █████████▌ │ -│ 2002 │ 137940 │ ███████████ │ -│ 2003 │ 155888 │ ████████████▍ │ +│ 1998 │ 85436 │ ██████▋ │ +│ 1999 │ 96037 │ ███████▋ │ +│ 2000 │ 107479 │ ████████▌ │ +│ 2001 │ 118885 │ █████████▌ │ +│ 2002 │ 137941 │ ███████████ │ +│ 2003 │ 155889 │ ████████████▍ │ │ 2004 │ 178885 │ ██████████████▎ │ -│ 2005 │ 189350 │ ███████████████▏ │ +│ 2005 │ 189351 │ ███████████████▏ │ │ 2006 │ 203528 │ ████████████████▎ │ -│ 2007 │ 219377 │ █████████████████▌ │ +│ 2007 │ 219378 │ █████████████████▌ │ │ 2008 │ 217056 │ █████████████████▎ │ │ 2009 │ 213419 │ █████████████████ │ -│ 2010 │ 236110 │ ██████████████████▊ │ -│ 2011 │ 232804 │ ██████████████████▌ │ -│ 2012 │ 238366 │ ███████████████████ │ +│ 2010 │ 236109 │ ██████████████████▊ │ +│ 2011 │ 232805 │ ██████████████████▌ │ +│ 2012 │ 238367 │ ███████████████████ │ │ 2013 │ 256931 │ ████████████████████▌ │ -│ 2014 │ 279917 │ ██████████████████████▍ │ -│ 2015 │ 297264 │ ███████████████████████▋ │ -│ 2016 │ 313197 │ █████████████████████████ │ -│ 2017 │ 346070 │ ███████████████████████████▋ │ -│ 2018 │ 350117 │ ████████████████████████████ │ -│ 2019 │ 351010 │ ████████████████████████████ │ -│ 2020 │ 368974 │ █████████████████████████████▌ │ -│ 2021 │ 384351 │ ██████████████████████████████▋ │ +│ 2014 │ 279915 │ ██████████████████████▍ │ +│ 2015 │ 297266 │ ███████████████████████▋ │ +│ 2016 │ 313201 │ █████████████████████████ │ +│ 2017 │ 346097 │ ███████████████████████████▋ │ +│ 2018 │ 350116 │ ████████████████████████████ │ +│ 2019 │ 351013 │ ████████████████████████████ │ +│ 2020 │ 369420 │ █████████████████████████████▌ │ +│ 2021 │ 386903 │ ██████████████████████████████▊ │ └──────┴────────┴────────────────────────────────────────┘ +``` -27 rows in set. Elapsed: 0.003 sec. Processed 106.87 thousand rows, 3.21 MB (31.92 million rows/s., 959.03 MB/s.) +### Query 2. Average Price Per Year in London {#average-price-london-projections} --- Q2) Average price per year in London: +Query: +```sql SELECT toYear(date) AS year, round(avg(price)) AS price, @@ -415,42 +456,49 @@ FROM uk_price_paid WHERE town = 'LONDON' GROUP BY year ORDER BY year ASC; +``` +Result: + +```text ┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐ -│ 1995 │ 109112 │ █████▍ │ +│ 1995 │ 109116 │ █████▍ │ │ 1996 │ 118667 │ █████▊ │ │ 1997 │ 136518 │ ██████▋ │ │ 1998 │ 152983 │ ███████▋ │ -│ 1999 │ 180633 │ █████████ │ -│ 2000 │ 215830 │ ██████████▋ │ -│ 2001 │ 232996 │ ███████████▋ │ -│ 2002 │ 263672 │ █████████████▏ │ +│ 1999 │ 180637 │ █████████ │ +│ 2000 │ 215838 │ ██████████▋ │ +│ 2001 │ 232994 │ ███████████▋ │ +│ 2002 │ 263670 │ █████████████▏ │ │ 2003 │ 278394 │ █████████████▊ │ -│ 2004 │ 304665 │ ███████████████▏ │ +│ 2004 │ 304666 │ ███████████████▏ │ │ 2005 │ 322875 │ ████████████████▏ │ -│ 2006 │ 356192 │ █████████████████▋ │ -│ 2007 │ 404055 │ ████████████████████▏ │ +│ 2006 │ 356191 │ █████████████████▋ │ +│ 2007 │ 404054 │ ████████████████████▏ │ │ 2008 │ 420741 │ █████████████████████ │ -│ 2009 │ 427754 │ █████████████████████▍ │ +│ 2009 │ 427753 │ █████████████████████▍ │ │ 2010 │ 480306 │ ████████████████████████ │ │ 2011 │ 496274 │ ████████████████████████▋ │ -│ 2012 │ 519441 │ █████████████████████████▊ │ -│ 2013 │ 616209 │ ██████████████████████████████▋ │ -│ 2014 │ 724144 │ ████████████████████████████████████▏ │ -│ 2015 │ 792112 │ ███████████████████████████████████████▌ │ -│ 2016 │ 843568 │ ██████████████████████████████████████████▏ │ -│ 2017 │ 982566 │ █████████████████████████████████████████████████▏ │ -│ 2018 │ 1016845 │ ██████████████████████████████████████████████████▋ │ -│ 2019 │ 1043277 │ ████████████████████████████████████████████████████▏ │ -│ 2020 │ 1003963 │ ██████████████████████████████████████████████████▏ │ -│ 2021 │ 940794 │ ███████████████████████████████████████████████ │ +│ 2012 │ 519442 │ █████████████████████████▊ │ +│ 2013 │ 616212 │ ██████████████████████████████▋ │ +│ 2014 │ 724154 │ ████████████████████████████████████▏ │ +│ 2015 │ 792129 │ ███████████████████████████████████████▌ │ +│ 2016 │ 843655 │ ██████████████████████████████████████████▏ │ +│ 2017 │ 982642 │ █████████████████████████████████████████████████▏ │ +│ 2018 │ 1016835 │ ██████████████████████████████████████████████████▋ │ +│ 2019 │ 1042849 │ ████████████████████████████████████████████████████▏ │ +│ 2020 │ 1011889 │ ██████████████████████████████████████████████████▌ │ +│ 2021 │ 960343 │ ████████████████████████████████████████████████ │ └──────┴─────────┴───────────────────────────────────────────────────────┘ +``` -27 rows in set. Elapsed: 0.005 sec. Processed 106.87 thousand rows, 3.53 MB (23.49 million rows/s., 775.95 MB/s.) +### Query 3. The Most Expensive Neighborhoods {#most-expensive-neighborhoods-projections} --- Q3) The most expensive neighborhoods: --- the condition (date >= '2020-01-01') needs to be modified to match projection dimension (toYear(date) >= 2020) +The condition (date >= '2020-01-01') needs to be modified to match projection dimension (toYear(date) >= 2020). +Query: + +```sql SELECT town, district, @@ -464,118 +512,138 @@ GROUP BY district HAVING c >= 100 ORDER BY price DESC -LIMIT 100 - -┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐ -│ LONDON │ CITY OF WESTMINSTER │ 3372 │ 3305225 │ ██████████████████████████████████████████████████████████████████ │ -│ LONDON │ CITY OF LONDON │ 257 │ 3294478 │ █████████████████████████████████████████████████████████████████▊ │ -│ LONDON │ KENSINGTON AND CHELSEA │ 2367 │ 2342422 │ ██████████████████████████████████████████████▋ │ -│ LEATHERHEAD │ ELMBRIDGE │ 108 │ 1927143 │ ██████████████████████████████████████▌ │ -│ VIRGINIA WATER │ RUNNYMEDE │ 142 │ 1868819 │ █████████████████████████████████████▍ │ -│ LONDON │ CAMDEN │ 2815 │ 1736788 │ ██████████████████████████████████▋ │ -│ THORNTON HEATH │ CROYDON │ 521 │ 1733051 │ ██████████████████████████████████▋ │ -│ WINDLESHAM │ SURREY HEATH │ 103 │ 1717255 │ ██████████████████████████████████▎ │ -│ BARNET │ ENFIELD │ 115 │ 1503458 │ ██████████████████████████████ │ -│ OXFORD │ SOUTH OXFORDSHIRE │ 298 │ 1275200 │ █████████████████████████▌ │ -│ LONDON │ ISLINGTON │ 2458 │ 1274308 │ █████████████████████████▍ │ -│ COBHAM │ ELMBRIDGE │ 364 │ 1260005 │ █████████████████████████▏ │ -│ LONDON │ HOUNSLOW │ 618 │ 1215682 │ ████████████████████████▎ │ -│ ASCOT │ WINDSOR AND MAIDENHEAD │ 379 │ 1215146 │ ████████████████████████▎ │ -│ LONDON │ RICHMOND UPON THAMES │ 654 │ 1207551 │ ████████████████████████▏ │ -│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 307 │ 1186220 │ ███████████████████████▋ │ -│ RICHMOND │ RICHMOND UPON THAMES │ 805 │ 1100420 │ ██████████████████████ │ -│ LONDON │ HAMMERSMITH AND FULHAM │ 2888 │ 1062959 │ █████████████████████▎ │ -│ WEYBRIDGE │ ELMBRIDGE │ 607 │ 1027161 │ ████████████████████▌ │ -│ RADLETT │ HERTSMERE │ 265 │ 1015896 │ ████████████████████▎ │ -│ SALCOMBE │ SOUTH HAMS │ 124 │ 1014393 │ ████████████████████▎ │ -│ BURFORD │ WEST OXFORDSHIRE │ 102 │ 993100 │ ███████████████████▋ │ -│ ESHER │ ELMBRIDGE │ 454 │ 969770 │ ███████████████████▍ │ -│ HINDHEAD │ WAVERLEY │ 128 │ 967786 │ ███████████████████▎ │ -│ BROCKENHURST │ NEW FOREST │ 121 │ 967046 │ ███████████████████▎ │ -│ LEATHERHEAD │ GUILDFORD │ 191 │ 964489 │ ███████████████████▎ │ -│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 376 │ 958555 │ ███████████████████▏ │ -│ EAST MOLESEY │ ELMBRIDGE │ 181 │ 943457 │ ██████████████████▋ │ -│ OLNEY │ MILTON KEYNES │ 220 │ 942892 │ ██████████████████▋ │ -│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 135 │ 926950 │ ██████████████████▌ │ -│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 509 │ 905732 │ ██████████████████ │ -│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 889 │ 899689 │ █████████████████▊ │ -│ BELVEDERE │ BEXLEY │ 313 │ 895336 │ █████████████████▊ │ -│ CRANBROOK │ TUNBRIDGE WELLS │ 404 │ 888190 │ █████████████████▋ │ -│ LONDON │ EALING │ 2460 │ 865893 │ █████████████████▎ │ -│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 114 │ 863814 │ █████████████████▎ │ -│ LONDON │ MERTON │ 1958 │ 857192 │ █████████████████▏ │ -│ GUILDFORD │ WAVERLEY │ 131 │ 854447 │ █████████████████ │ -│ LONDON │ HACKNEY │ 3088 │ 846571 │ ████████████████▊ │ -│ LYMM │ WARRINGTON │ 285 │ 839920 │ ████████████████▋ │ -│ HARPENDEN │ ST ALBANS │ 606 │ 836994 │ ████████████████▋ │ -│ LONDON │ WANDSWORTH │ 6113 │ 832292 │ ████████████████▋ │ -│ LONDON │ SOUTHWARK │ 3612 │ 831319 │ ████████████████▋ │ -│ BERKHAMSTED │ DACORUM │ 502 │ 830356 │ ████████████████▌ │ -│ KINGS LANGLEY │ DACORUM │ 137 │ 821358 │ ████████████████▍ │ -│ TONBRIDGE │ TUNBRIDGE WELLS │ 339 │ 806736 │ ████████████████▏ │ -│ EPSOM │ REIGATE AND BANSTEAD │ 157 │ 805903 │ ████████████████ │ -│ WOKING │ GUILDFORD │ 161 │ 803283 │ ████████████████ │ -│ STOCKBRIDGE │ TEST VALLEY │ 168 │ 801973 │ ████████████████ │ -│ TEDDINGTON │ RICHMOND UPON THAMES │ 539 │ 798591 │ ███████████████▊ │ -│ OXFORD │ VALE OF WHITE HORSE │ 329 │ 792907 │ ███████████████▋ │ -│ LONDON │ BARNET │ 3624 │ 789583 │ ███████████████▋ │ -│ TWICKENHAM │ RICHMOND UPON THAMES │ 1090 │ 787760 │ ███████████████▋ │ -│ LUTON │ CENTRAL BEDFORDSHIRE │ 196 │ 786051 │ ███████████████▋ │ -│ TONBRIDGE │ MAIDSTONE │ 277 │ 785746 │ ███████████████▋ │ -│ TOWCESTER │ WEST NORTHAMPTONSHIRE │ 186 │ 783532 │ ███████████████▋ │ -│ LONDON │ LAMBETH │ 4832 │ 783422 │ ███████████████▋ │ -│ LUTTERWORTH │ HARBOROUGH │ 515 │ 781775 │ ███████████████▋ │ -│ WOODSTOCK │ WEST OXFORDSHIRE │ 135 │ 777499 │ ███████████████▌ │ -│ ALRESFORD │ WINCHESTER │ 196 │ 775577 │ ███████████████▌ │ -│ LONDON │ NEWHAM │ 2942 │ 768551 │ ███████████████▎ │ -│ ALDERLEY EDGE │ CHESHIRE EAST │ 168 │ 768280 │ ███████████████▎ │ -│ MARLOW │ BUCKINGHAMSHIRE │ 301 │ 762784 │ ███████████████▎ │ -│ BILLINGSHURST │ CHICHESTER │ 134 │ 760920 │ ███████████████▏ │ -│ LONDON │ TOWER HAMLETS │ 4183 │ 759635 │ ███████████████▏ │ -│ MIDHURST │ CHICHESTER │ 245 │ 759101 │ ███████████████▏ │ -│ THAMES DITTON │ ELMBRIDGE │ 227 │ 753347 │ ███████████████ │ -│ POTTERS BAR │ WELWYN HATFIELD │ 163 │ 752926 │ ███████████████ │ -│ REIGATE │ REIGATE AND BANSTEAD │ 555 │ 740961 │ ██████████████▋ │ -│ TADWORTH │ REIGATE AND BANSTEAD │ 477 │ 738997 │ ██████████████▋ │ -│ SEVENOAKS │ SEVENOAKS │ 1074 │ 734658 │ ██████████████▋ │ -│ PETWORTH │ CHICHESTER │ 138 │ 732432 │ ██████████████▋ │ -│ BOURNE END │ BUCKINGHAMSHIRE │ 127 │ 730742 │ ██████████████▌ │ -│ PURLEY │ CROYDON │ 540 │ 727721 │ ██████████████▌ │ -│ OXTED │ TANDRIDGE │ 320 │ 726078 │ ██████████████▌ │ -│ LONDON │ HARINGEY │ 2988 │ 724573 │ ██████████████▍ │ -│ BANSTEAD │ REIGATE AND BANSTEAD │ 373 │ 713834 │ ██████████████▎ │ -│ PINNER │ HARROW │ 480 │ 712166 │ ██████████████▏ │ -│ MALMESBURY │ WILTSHIRE │ 293 │ 707747 │ ██████████████▏ │ -│ RICKMANSWORTH │ THREE RIVERS │ 732 │ 705400 │ ██████████████ │ -│ SLOUGH │ BUCKINGHAMSHIRE │ 359 │ 705002 │ ██████████████ │ -│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 214 │ 704904 │ ██████████████ │ -│ READING │ SOUTH OXFORDSHIRE │ 295 │ 701697 │ ██████████████ │ -│ HYTHE │ FOLKESTONE AND HYTHE │ 457 │ 700334 │ ██████████████ │ -│ WELWYN │ WELWYN HATFIELD │ 217 │ 699649 │ █████████████▊ │ -│ CHIGWELL │ EPPING FOREST │ 242 │ 697869 │ █████████████▊ │ -│ BARNET │ BARNET │ 906 │ 695680 │ █████████████▊ │ -│ HASLEMERE │ CHICHESTER │ 120 │ 694028 │ █████████████▊ │ -│ LEATHERHEAD │ MOLE VALLEY │ 748 │ 692026 │ █████████████▋ │ -│ LONDON │ BRENT │ 1945 │ 690799 │ █████████████▋ │ -│ HASLEMERE │ WAVERLEY │ 258 │ 690765 │ █████████████▋ │ -│ NORTHWOOD │ HILLINGDON │ 252 │ 690753 │ █████████████▋ │ -│ WALTON-ON-THAMES │ ELMBRIDGE │ 871 │ 689431 │ █████████████▋ │ -│ INGATESTONE │ BRENTWOOD │ 150 │ 688345 │ █████████████▋ │ -│ OXFORD │ OXFORD │ 1761 │ 686114 │ █████████████▋ │ -│ CHISLEHURST │ BROMLEY │ 410 │ 682892 │ █████████████▋ │ -│ KINGS LANGLEY │ THREE RIVERS │ 109 │ 682320 │ █████████████▋ │ -│ ASHTEAD │ MOLE VALLEY │ 280 │ 680483 │ █████████████▌ │ -│ WOKING │ SURREY HEATH │ 269 │ 679035 │ █████████████▌ │ -│ ASCOT │ BRACKNELL FOREST │ 160 │ 678632 │ █████████████▌ │ -└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘ - -100 rows in set. Elapsed: 0.005 sec. Processed 12.85 thousand rows, 813.40 KB (2.73 million rows/s., 172.95 MB/s.) +LIMIT 100; ``` +Result: + +```text +┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐ +│ LONDON │ CITY OF WESTMINSTER │ 3606 │ 3280239 │ █████████████████████████████████████████████████████████████████▌ │ +│ LONDON │ CITY OF LONDON │ 274 │ 3160502 │ ███████████████████████████████████████████████████████████████▏ │ +│ LONDON │ KENSINGTON AND CHELSEA │ 2550 │ 2308478 │ ██████████████████████████████████████████████▏ │ +│ LEATHERHEAD │ ELMBRIDGE │ 114 │ 1897407 │ █████████████████████████████████████▊ │ +│ LONDON │ CAMDEN │ 3033 │ 1805404 │ ████████████████████████████████████ │ +│ VIRGINIA WATER │ RUNNYMEDE │ 156 │ 1753247 │ ███████████████████████████████████ │ +│ WINDLESHAM │ SURREY HEATH │ 108 │ 1677613 │ █████████████████████████████████▌ │ +│ THORNTON HEATH │ CROYDON │ 546 │ 1671721 │ █████████████████████████████████▍ │ +│ BARNET │ ENFIELD │ 124 │ 1505840 │ ██████████████████████████████ │ +│ COBHAM │ ELMBRIDGE │ 387 │ 1237250 │ ████████████████████████▋ │ +│ LONDON │ ISLINGTON │ 2668 │ 1236980 │ ████████████████████████▋ │ +│ OXFORD │ SOUTH OXFORDSHIRE │ 321 │ 1220907 │ ████████████████████████▍ │ +│ LONDON │ RICHMOND UPON THAMES │ 704 │ 1215551 │ ████████████████████████▎ │ +│ LONDON │ HOUNSLOW │ 671 │ 1207493 │ ████████████████████████▏ │ +│ ASCOT │ WINDSOR AND MAIDENHEAD │ 407 │ 1183299 │ ███████████████████████▋ │ +│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 330 │ 1175615 │ ███████████████████████▌ │ +│ RICHMOND │ RICHMOND UPON THAMES │ 874 │ 1110444 │ ██████████████████████▏ │ +│ LONDON │ HAMMERSMITH AND FULHAM │ 3086 │ 1053983 │ █████████████████████ │ +│ SURBITON │ ELMBRIDGE │ 100 │ 1011800 │ ████████████████████▏ │ +│ RADLETT │ HERTSMERE │ 283 │ 1011712 │ ████████████████████▏ │ +│ SALCOMBE │ SOUTH HAMS │ 127 │ 1011624 │ ████████████████████▏ │ +│ WEYBRIDGE │ ELMBRIDGE │ 655 │ 1007265 │ ████████████████████▏ │ +│ ESHER │ ELMBRIDGE │ 485 │ 986581 │ ███████████████████▋ │ +│ LEATHERHEAD │ GUILDFORD │ 202 │ 977320 │ ███████████████████▌ │ +│ BURFORD │ WEST OXFORDSHIRE │ 111 │ 966893 │ ███████████████████▎ │ +│ BROCKENHURST │ NEW FOREST │ 129 │ 956675 │ ███████████████████▏ │ +│ HINDHEAD │ WAVERLEY │ 137 │ 953753 │ ███████████████████ │ +│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 419 │ 951121 │ ███████████████████ │ +│ EAST MOLESEY │ ELMBRIDGE │ 192 │ 936769 │ ██████████████████▋ │ +│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 146 │ 925515 │ ██████████████████▌ │ +│ LONDON │ TOWER HAMLETS │ 4388 │ 918304 │ ██████████████████▎ │ +│ OLNEY │ MILTON KEYNES │ 235 │ 910646 │ ██████████████████▏ │ +│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 540 │ 902418 │ ██████████████████ │ +│ LONDON │ SOUTHWARK │ 3885 │ 892997 │ █████████████████▋ │ +│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 960 │ 885969 │ █████████████████▋ │ +│ LONDON │ EALING │ 2658 │ 871755 │ █████████████████▍ │ +│ CRANBROOK │ TUNBRIDGE WELLS │ 431 │ 862348 │ █████████████████▏ │ +│ LONDON │ MERTON │ 2099 │ 859118 │ █████████████████▏ │ +│ BELVEDERE │ BEXLEY │ 346 │ 842423 │ ████████████████▋ │ +│ GUILDFORD │ WAVERLEY │ 143 │ 841277 │ ████████████████▋ │ +│ HARPENDEN │ ST ALBANS │ 657 │ 841216 │ ████████████████▋ │ +│ LONDON │ HACKNEY │ 3307 │ 837090 │ ████████████████▋ │ +│ LONDON │ WANDSWORTH │ 6566 │ 832663 │ ████████████████▋ │ +│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 123 │ 824299 │ ████████████████▍ │ +│ KINGS LANGLEY │ DACORUM │ 145 │ 821331 │ ████████████████▍ │ +│ BERKHAMSTED │ DACORUM │ 543 │ 818415 │ ████████████████▎ │ +│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 226 │ 802807 │ ████████████████ │ +│ BILLINGSHURST │ CHICHESTER │ 144 │ 797829 │ ███████████████▊ │ +│ WOKING │ GUILDFORD │ 176 │ 793494 │ ███████████████▋ │ +│ STOCKBRIDGE │ TEST VALLEY │ 178 │ 793269 │ ███████████████▋ │ +│ EPSOM │ REIGATE AND BANSTEAD │ 172 │ 791862 │ ███████████████▋ │ +│ TONBRIDGE │ TUNBRIDGE WELLS │ 360 │ 787876 │ ███████████████▋ │ +│ TEDDINGTON │ RICHMOND UPON THAMES │ 595 │ 786492 │ ███████████████▋ │ +│ TWICKENHAM │ RICHMOND UPON THAMES │ 1155 │ 786193 │ ███████████████▋ │ +│ LYNDHURST │ NEW FOREST │ 102 │ 785593 │ ███████████████▋ │ +│ LONDON │ LAMBETH │ 5228 │ 774574 │ ███████████████▍ │ +│ LONDON │ BARNET │ 3955 │ 773259 │ ███████████████▍ │ +│ OXFORD │ VALE OF WHITE HORSE │ 353 │ 772088 │ ███████████████▍ │ +│ TONBRIDGE │ MAIDSTONE │ 305 │ 770740 │ ███████████████▍ │ +│ LUTTERWORTH │ HARBOROUGH │ 538 │ 768634 │ ███████████████▎ │ +│ WOODSTOCK │ WEST OXFORDSHIRE │ 140 │ 766037 │ ███████████████▎ │ +│ MIDHURST │ CHICHESTER │ 257 │ 764815 │ ███████████████▎ │ +│ MARLOW │ BUCKINGHAMSHIRE │ 327 │ 761876 │ ███████████████▏ │ +│ LONDON │ NEWHAM │ 3237 │ 761784 │ ███████████████▏ │ +│ ALDERLEY EDGE │ CHESHIRE EAST │ 178 │ 757318 │ ███████████████▏ │ +│ LUTON │ CENTRAL BEDFORDSHIRE │ 212 │ 754283 │ ███████████████ │ +│ PETWORTH │ CHICHESTER │ 154 │ 754220 │ ███████████████ │ +│ ALRESFORD │ WINCHESTER │ 219 │ 752718 │ ███████████████ │ +│ POTTERS BAR │ WELWYN HATFIELD │ 174 │ 748465 │ ██████████████▊ │ +│ HASLEMERE │ CHICHESTER │ 128 │ 746907 │ ██████████████▊ │ +│ TADWORTH │ REIGATE AND BANSTEAD │ 502 │ 743252 │ ██████████████▋ │ +│ THAMES DITTON │ ELMBRIDGE │ 244 │ 741913 │ ██████████████▋ │ +│ REIGATE │ REIGATE AND BANSTEAD │ 581 │ 738198 │ ██████████████▋ │ +│ BOURNE END │ BUCKINGHAMSHIRE │ 138 │ 735190 │ ██████████████▋ │ +│ SEVENOAKS │ SEVENOAKS │ 1156 │ 730018 │ ██████████████▌ │ +│ OXTED │ TANDRIDGE │ 336 │ 729123 │ ██████████████▌ │ +│ INGATESTONE │ BRENTWOOD │ 166 │ 728103 │ ██████████████▌ │ +│ LONDON │ BRENT │ 2079 │ 720605 │ ██████████████▍ │ +│ LONDON │ HARINGEY │ 3216 │ 717780 │ ██████████████▎ │ +│ PURLEY │ CROYDON │ 575 │ 716108 │ ██████████████▎ │ +│ WELWYN │ WELWYN HATFIELD │ 222 │ 710603 │ ██████████████▏ │ +│ RICKMANSWORTH │ THREE RIVERS │ 798 │ 704571 │ ██████████████ │ +│ BANSTEAD │ REIGATE AND BANSTEAD │ 401 │ 701293 │ ██████████████ │ +│ CHIGWELL │ EPPING FOREST │ 261 │ 701203 │ ██████████████ │ +│ PINNER │ HARROW │ 528 │ 698885 │ █████████████▊ │ +│ HASLEMERE │ WAVERLEY │ 280 │ 696659 │ █████████████▊ │ +│ SLOUGH │ BUCKINGHAMSHIRE │ 396 │ 694917 │ █████████████▊ │ +│ WALTON-ON-THAMES │ ELMBRIDGE │ 946 │ 692395 │ █████████████▋ │ +│ READING │ SOUTH OXFORDSHIRE │ 318 │ 691988 │ █████████████▋ │ +│ NORTHWOOD │ HILLINGDON │ 271 │ 690643 │ █████████████▋ │ +│ FELTHAM │ HOUNSLOW │ 763 │ 688595 │ █████████████▋ │ +│ ASHTEAD │ MOLE VALLEY │ 303 │ 687923 │ █████████████▋ │ +│ BARNET │ BARNET │ 975 │ 686980 │ █████████████▋ │ +│ WOKING │ SURREY HEATH │ 283 │ 686669 │ █████████████▋ │ +│ MALMESBURY │ WILTSHIRE │ 323 │ 683324 │ █████████████▋ │ +│ AMERSHAM │ BUCKINGHAMSHIRE │ 496 │ 680962 │ █████████████▌ │ +│ CHISLEHURST │ BROMLEY │ 430 │ 680209 │ █████████████▌ │ +│ HYTHE │ FOLKESTONE AND HYTHE │ 490 │ 676908 │ █████████████▌ │ +│ MAYFIELD │ WEALDEN │ 101 │ 676210 │ █████████████▌ │ +│ ASCOT │ BRACKNELL FOREST │ 168 │ 676004 │ █████████████▌ │ +└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘ +``` + +### Summary {#summary} + All 3 queries work much faster and read fewer rows. +```text +Query 1 + +no projection: 27 rows in set. Elapsed: 0.158 sec. Processed 26.32 million rows, 157.93 MB (166.57 million rows/s., 999.39 MB/s.) + projection: 27 rows in set. Elapsed: 0.007 sec. Processed 105.96 thousand rows, 3.33 MB (14.58 million rows/s., 458.13 MB/s.) + + +Query 2 + +no projection: 27 rows in set. Elapsed: 0.163 sec. Processed 26.32 million rows, 80.01 MB (161.75 million rows/s., 491.64 MB/s.) + projection: 27 rows in set. Elapsed: 0.008 sec. Processed 105.96 thousand rows, 3.67 MB (13.29 million rows/s., 459.89 MB/s.) + +Query 3 + +no projection: 100 rows in set. Elapsed: 0.069 sec. Processed 26.32 million rows, 62.47 MB (382.13 million rows/s., 906.93 MB/s.) + projection: 100 rows in set. Elapsed: 0.029 sec. Processed 8.08 thousand rows, 511.08 KB (276.06 thousand rows/s., 17.47 MB/s.) ``` -Q1) -no projection: 27 rows in set. Elapsed: 0.027 sec. Processed 26.25 million rows, 157.49 MB (955.96 million rows/s., 5.74 GB/s.) - projection: 27 rows in set. Elapsed: 0.003 sec. Processed 106.87 thousand rows, 3.21 MB (31.92 million rows/s., 959.03 MB/s.) -``` + +### Test It in Playground {#playground} + +The dataset is also available in the [Online Playground](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==). diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 9060cd37d6a..dcc0d812a03 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -56,6 +56,7 @@ toc_title: Adopters | Geniee | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | | Genotek | Bioinformatics | Main product | — | — | [Video, August 2020](https://youtu.be/v3KyZbz9lEE) | | Glaber | Monitoring | Main product | — | — | [Website](https://glaber.io/) | +| GraphCDN | CDN | Traffic Analytics | — | — | [Blog Post in English, August 2021](https://altinity.com/blog/delivering-insight-on-graphql-apis-with-clickhouse-at-graphcdn/) | | HUYA | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | | ICA | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) | | Idealista | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.com/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 17317a13908..0e4e9d3b489 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -93,6 +93,17 @@ Works with tables in the MergeTree family. If `force_primary_key=1`, ClickHouse checks to see if the query has a primary key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition reduces the amount of data to read. For more information about data ranges in MergeTree tables, see [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). +## use_skip_indexes {#settings-use_skip_indexes} + +Use data skipping indexes during query execution. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 1. + ## force_data_skipping_indices {#settings-force_data_skipping_indices} Disables query execution if passed data skipping indices wasn't used. @@ -3630,7 +3641,7 @@ Default value: `enable`. ## max_hyperscan_regexp_length {#max-hyperscan-regexp-length} -Defines the maximum length for each regular expression in the [hyperscan multi-match functions](../../sql-reference/functions/string-search-functions.md#multimatchanyhaystack-pattern1-pattern2-patternn). +Defines the maximum length for each regular expression in the [hyperscan multi-match functions](../../sql-reference/functions/string-search-functions.md#multimatchanyhaystack-pattern1-pattern2-patternn). Possible values: @@ -3673,7 +3684,7 @@ Exception: Regexp length too large. ## max_hyperscan_regexp_total_length {#max-hyperscan-regexp-total-length} -Sets the maximum length total of all regular expressions in each [hyperscan multi-match function](../../sql-reference/functions/string-search-functions.md#multimatchanyhaystack-pattern1-pattern2-patternn). +Sets the maximum length total of all regular expressions in each [hyperscan multi-match function](../../sql-reference/functions/string-search-functions.md#multimatchanyhaystack-pattern1-pattern2-patternn). Possible values: diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index faa86527c7d..3c3ed7b8932 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -594,14 +594,14 @@ Result: └─────┘ ``` -## h3ResIsClassIII {#h3resisclassIII} +## h3IsResClassIII {#h3isresclassIII} Returns whether [H3](#h3index) index has a resolution with Class III orientation. **Syntax** ``` sql -h3ResIsClassIII(index) +h3IsResClassIII(index) ``` **Parameter** @@ -620,7 +620,7 @@ Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Query: ``` sql -SELECT h3ResIsClassIII(617420388352917503) as res; +SELECT h3IsResClassIII(617420388352917503) as res; ``` Result: diff --git a/docs/ru/getting-started/example-datasets/index.md b/docs/ru/getting-started/example-datasets/index.md index 756b3a75dee..2049ddd5d86 100644 --- a/docs/ru/getting-started/example-datasets/index.md +++ b/docs/ru/getting-started/example-datasets/index.md @@ -9,12 +9,16 @@ toc_title: "Введение" Этот раздел описывает как получить тестовые массивы данных и загрузить их в ClickHouse. Для некоторых тестовых массивов данных также доступны тестовые запросы. -- [Анонимизированные данные Яндекс.Метрики](metrica.md) -- [Star Schema Benchmark](star-schema.md) -- [WikiStat](wikistat.md) -- [Терабайт логов кликов от Criteo](criteo.md) -- [AMPLab Big Data Benchmark](amplab-benchmark.md) -- [Данные о такси в Нью-Йорке](nyc-taxi.md) -- [OnTime](ontime.md) +- [Анонимизированные данные Яндекс.Метрики](../../getting-started/example-datasets/metrica.md) +- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md) +- [Набор данных кулинарных рецептов](../../getting-started/example-datasets/recipes.md) +- [WikiStat](../../getting-started/example-datasets/wikistat.md) +- [Терабайт логов кликов от Criteo](../../getting-started/example-datasets/criteo.md) +- [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md) +- [Данные о такси в Нью-Йорке](../../getting-started/example-datasets/nyc-taxi.md) +- [Набор данных о воздушном движении OpenSky Network 2020](../../getting-started/example-datasets/opensky.md) +- [Данные о стоимости недвижимости в Великобритании](../../getting-started/example-datasets/uk-price-paid.md) +- [OnTime](../../getting-started/example-datasets/ontime.md) - [Вышки сотовой связи](../../getting-started/example-datasets/cell-towers.md) +[Оригинальная статья](https://clickhouse.tech/docs/ru/getting_started/example_datasets) diff --git a/docs/ru/getting-started/example-datasets/opensky.md b/docs/ru/getting-started/example-datasets/opensky.md deleted file mode 120000 index 4305c0cac3c..00000000000 --- a/docs/ru/getting-started/example-datasets/opensky.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/getting-started/example-datasets/opensky.md \ No newline at end of file diff --git a/docs/ru/getting-started/example-datasets/opensky.md b/docs/ru/getting-started/example-datasets/opensky.md new file mode 100644 index 00000000000..bda5dec3c47 --- /dev/null +++ b/docs/ru/getting-started/example-datasets/opensky.md @@ -0,0 +1,422 @@ +--- +toc_priority: 20 +toc_title: Набор данных о воздушном движении OpenSky Network 2020 +--- + +# Набор данных о воздушном движении OpenSky Network 2020 {#opensky} + +"Данные в этом наборе получены и отфильтрованы из полного набора данных OpenSky, чтобы проиллюстрировать развитие воздушного движения во время пандемии COVID-19. Набор включает в себя все рейсы, которые видели более 2500 участников сети с 1 января 2019 года. Дополнительные данные будут периодически включаться в набор данных до окончания пандемии COVID-19". + +Источник: https://zenodo.org/record/5092942#.YRBCyTpRXYd + +Martin Strohmeier, Xavier Olive, Jannis Lübbe, Matthias Schäfer, and Vincent Lenders +"Crowdsourced air traffic data from the OpenSky Network 2019–2020" +Earth System Science Data 13(2), 2021 +https://doi.org/10.5194/essd-13-357-2021 + +## Загрузите набор данных {#download-dataset} + +Выполните команду: + +```bash +wget -O- https://zenodo.org/record/5092942 | grep -oP 'https://zenodo.org/record/5092942/files/flightlist_\d+_\d+\.csv\.gz' | xargs wget +``` + +Загрузка займет около 2 минут при хорошем подключении к интернету. Будет загружено 30 файлов общим размером 4,3 ГБ. + +## Создайте таблицу {#create-table} + +```sql +CREATE TABLE opensky +( + callsign String, + number String, + icao24 String, + registration String, + typecode String, + origin String, + destination String, + firstseen DateTime, + lastseen DateTime, + day DateTime, + latitude_1 Float64, + longitude_1 Float64, + altitude_1 Float64, + latitude_2 Float64, + longitude_2 Float64, + altitude_2 Float64 +) ENGINE = MergeTree ORDER BY (origin, destination, callsign); +``` + +## Импортируйте данные в ClickHouse {#import-data} + +Загрузите данные в ClickHouse параллельными потоками: + +```bash +ls -1 flightlist_*.csv.gz | xargs -P100 -I{} bash -c 'gzip -c -d "{}" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"' +``` + +- Список файлов передаётся (`ls -1 flightlist_*.csv.gz`) в `xargs` для параллельной обработки. +- `xargs -P100` указывает на возможность использования до 100 параллельных обработчиков, но поскольку у нас всего 30 файлов, то количество обработчиков будет всего 30. +- Для каждого файла `xargs` будет запускать скрипт с `bash -c`. Сценарий имеет подстановку в виде `{}`, а команда `xargs` заменяет имя файла на указанные в подстановке символы (мы указали это для `xargs` с помощью `-I{}`). +- Скрипт распакует файл (`gzip -c -d "{}"`) в стандартный вывод (параметр `-c`) и перенаправит его в `clickhouse-client`. +- Чтобы распознать формат ISO-8601 со смещениями часовых поясов в полях типа [DateTime](../../sql-reference/data-types/datetime.md), указывается параметр парсера [--date_time_input_format best_effort](../../operations/settings/settings.md#settings-date_time_input_format). + +В итоге: клиент clickhouse добавит данные в таблицу `opensky`. Входные данные импортируются в формате [CSVWithNames](../../interfaces/formats.md#csvwithnames). + + +Загрузка параллельными потоками займёт около 24 секунд. + +Также вы можете использовать вариант последовательной загрузки: + +```bash +for file in flightlist_*.csv.gz; do gzip -c -d "$file" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"; done +``` + +## Проверьте импортированные данные {#validate-data} + +Запрос: + +```sql +SELECT count() FROM opensky; +``` + +Результат: + +```text +┌──count()─┐ +│ 66010819 │ +└──────────┘ +``` + +Убедитесь, что размер набора данных в ClickHouse составляет всего 2,66 GiB. + +Запрос: + +```sql +SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'opensky'; +``` + +Результат: + +```text +┌─formatReadableSize(total_bytes)─┐ +│ 2.66 GiB │ +└─────────────────────────────────┘ +``` + +## Примеры {#run-queries} + +Общее пройденное расстояние составляет 68 миллиардов километров. + +Запрос: + +```sql +SELECT formatReadableQuantity(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) / 1000) FROM opensky; +``` + +Результат: + +```text +┌─formatReadableQuantity(divide(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 1000))─┐ +│ 68.72 billion │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +Средняя дальность полета составляет около 1000 км. + +Запрос: + +```sql +SELECT avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) FROM opensky; +``` + +Результат: + +```text +┌─avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))─┐ +│ 1041090.6465708319 │ +└────────────────────────────────────────────────────────────────────┘ +``` + +### Наиболее загруженные аэропорты в указанных координатах и среднее пройденное расстояние {#busy-airports-average-distance} + +Запрос: + +```sql +SELECT + origin, + count(), + round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))) AS distance, + bar(distance, 0, 10000000, 100) AS bar +FROM opensky +WHERE origin != '' +GROUP BY origin +ORDER BY count() DESC +LIMIT 100; +``` + +Результат: + +```text + ┌─origin─┬─count()─┬─distance─┬─bar────────────────────────────────────┐ + 1. │ KORD │ 745007 │ 1546108 │ ███████████████▍ │ + 2. │ KDFW │ 696702 │ 1358721 │ █████████████▌ │ + 3. │ KATL │ 667286 │ 1169661 │ ███████████▋ │ + 4. │ KDEN │ 582709 │ 1287742 │ ████████████▊ │ + 5. │ KLAX │ 581952 │ 2628393 │ ██████████████████████████▎ │ + 6. │ KLAS │ 447789 │ 1336967 │ █████████████▎ │ + 7. │ KPHX │ 428558 │ 1345635 │ █████████████▍ │ + 8. │ KSEA │ 412592 │ 1757317 │ █████████████████▌ │ + 9. │ KCLT │ 404612 │ 880355 │ ████████▋ │ + 10. │ VIDP │ 363074 │ 1445052 │ ██████████████▍ │ + 11. │ EDDF │ 362643 │ 2263960 │ ██████████████████████▋ │ + 12. │ KSFO │ 361869 │ 2445732 │ ████████████████████████▍ │ + 13. │ KJFK │ 349232 │ 2996550 │ █████████████████████████████▊ │ + 14. │ KMSP │ 346010 │ 1287328 │ ████████████▋ │ + 15. │ LFPG │ 344748 │ 2206203 │ ██████████████████████ │ + 16. │ EGLL │ 341370 │ 3216593 │ ████████████████████████████████▏ │ + 17. │ EHAM │ 340272 │ 2116425 │ █████████████████████▏ │ + 18. │ KEWR │ 337696 │ 1826545 │ ██████████████████▎ │ + 19. │ KPHL │ 320762 │ 1291761 │ ████████████▊ │ + 20. │ OMDB │ 308855 │ 2855706 │ ████████████████████████████▌ │ + 21. │ UUEE │ 307098 │ 1555122 │ ███████████████▌ │ + 22. │ KBOS │ 304416 │ 1621675 │ ████████████████▏ │ + 23. │ LEMD │ 291787 │ 1695097 │ ████████████████▊ │ + 24. │ YSSY │ 272979 │ 1875298 │ ██████████████████▋ │ + 25. │ KMIA │ 265121 │ 1923542 │ ███████████████████▏ │ + 26. │ ZGSZ │ 263497 │ 745086 │ ███████▍ │ + 27. │ EDDM │ 256691 │ 1361453 │ █████████████▌ │ + 28. │ WMKK │ 254264 │ 1626688 │ ████████████████▎ │ + 29. │ CYYZ │ 251192 │ 2175026 │ █████████████████████▋ │ + 30. │ KLGA │ 248699 │ 1106935 │ ███████████ │ + 31. │ VHHH │ 248473 │ 3457658 │ ██████████████████████████████████▌ │ + 32. │ RJTT │ 243477 │ 1272744 │ ████████████▋ │ + 33. │ KBWI │ 241440 │ 1187060 │ ███████████▋ │ + 34. │ KIAD │ 239558 │ 1683485 │ ████████████████▋ │ + 35. │ KIAH │ 234202 │ 1538335 │ ███████████████▍ │ + 36. │ KFLL │ 223447 │ 1464410 │ ██████████████▋ │ + 37. │ KDAL │ 212055 │ 1082339 │ ██████████▋ │ + 38. │ KDCA │ 207883 │ 1013359 │ ██████████▏ │ + 39. │ LIRF │ 207047 │ 1427965 │ ██████████████▎ │ + 40. │ PANC │ 206007 │ 2525359 │ █████████████████████████▎ │ + 41. │ LTFJ │ 205415 │ 860470 │ ████████▌ │ + 42. │ KDTW │ 204020 │ 1106716 │ ███████████ │ + 43. │ VABB │ 201679 │ 1300865 │ █████████████ │ + 44. │ OTHH │ 200797 │ 3759544 │ █████████████████████████████████████▌ │ + 45. │ KMDW │ 200796 │ 1232551 │ ████████████▎ │ + 46. │ KSAN │ 198003 │ 1495195 │ ██████████████▊ │ + 47. │ KPDX │ 197760 │ 1269230 │ ████████████▋ │ + 48. │ SBGR │ 197624 │ 2041697 │ ████████████████████▍ │ + 49. │ VOBL │ 189011 │ 1040180 │ ██████████▍ │ + 50. │ LEBL │ 188956 │ 1283190 │ ████████████▋ │ + 51. │ YBBN │ 188011 │ 1253405 │ ████████████▌ │ + 52. │ LSZH │ 187934 │ 1572029 │ ███████████████▋ │ + 53. │ YMML │ 187643 │ 1870076 │ ██████████████████▋ │ + 54. │ RCTP │ 184466 │ 2773976 │ ███████████████████████████▋ │ + 55. │ KSNA │ 180045 │ 778484 │ ███████▋ │ + 56. │ EGKK │ 176420 │ 1694770 │ ████████████████▊ │ + 57. │ LOWW │ 176191 │ 1274833 │ ████████████▋ │ + 58. │ UUDD │ 176099 │ 1368226 │ █████████████▋ │ + 59. │ RKSI │ 173466 │ 3079026 │ ██████████████████████████████▋ │ + 60. │ EKCH │ 172128 │ 1229895 │ ████████████▎ │ + 61. │ KOAK │ 171119 │ 1114447 │ ███████████▏ │ + 62. │ RPLL │ 170122 │ 1440735 │ ██████████████▍ │ + 63. │ KRDU │ 167001 │ 830521 │ ████████▎ │ + 64. │ KAUS │ 164524 │ 1256198 │ ████████████▌ │ + 65. │ KBNA │ 163242 │ 1022726 │ ██████████▏ │ + 66. │ KSDF │ 162655 │ 1380867 │ █████████████▋ │ + 67. │ ENGM │ 160732 │ 910108 │ █████████ │ + 68. │ LIMC │ 160696 │ 1564620 │ ███████████████▋ │ + 69. │ KSJC │ 159278 │ 1081125 │ ██████████▋ │ + 70. │ KSTL │ 157984 │ 1026699 │ ██████████▎ │ + 71. │ UUWW │ 156811 │ 1261155 │ ████████████▌ │ + 72. │ KIND │ 153929 │ 987944 │ █████████▊ │ + 73. │ ESSA │ 153390 │ 1203439 │ ████████████ │ + 74. │ KMCO │ 153351 │ 1508657 │ ███████████████ │ + 75. │ KDVT │ 152895 │ 74048 │ ▋ │ + 76. │ VTBS │ 152645 │ 2255591 │ ██████████████████████▌ │ + 77. │ CYVR │ 149574 │ 2027413 │ ████████████████████▎ │ + 78. │ EIDW │ 148723 │ 1503985 │ ███████████████ │ + 79. │ LFPO │ 143277 │ 1152964 │ ███████████▌ │ + 80. │ EGSS │ 140830 │ 1348183 │ █████████████▍ │ + 81. │ KAPA │ 140776 │ 420441 │ ████▏ │ + 82. │ KHOU │ 138985 │ 1068806 │ ██████████▋ │ + 83. │ KTPA │ 138033 │ 1338223 │ █████████████▍ │ + 84. │ KFFZ │ 137333 │ 55397 │ ▌ │ + 85. │ NZAA │ 136092 │ 1581264 │ ███████████████▋ │ + 86. │ YPPH │ 133916 │ 1271550 │ ████████████▋ │ + 87. │ RJBB │ 133522 │ 1805623 │ ██████████████████ │ + 88. │ EDDL │ 133018 │ 1265919 │ ████████████▋ │ + 89. │ ULLI │ 130501 │ 1197108 │ ███████████▊ │ + 90. │ KIWA │ 127195 │ 250876 │ ██▌ │ + 91. │ KTEB │ 126969 │ 1189414 │ ███████████▊ │ + 92. │ VOMM │ 125616 │ 1127757 │ ███████████▎ │ + 93. │ LSGG │ 123998 │ 1049101 │ ██████████▍ │ + 94. │ LPPT │ 122733 │ 1779187 │ █████████████████▋ │ + 95. │ WSSS │ 120493 │ 3264122 │ ████████████████████████████████▋ │ + 96. │ EBBR │ 118539 │ 1579939 │ ███████████████▋ │ + 97. │ VTBD │ 118107 │ 661627 │ ██████▌ │ + 98. │ KVNY │ 116326 │ 692960 │ ██████▊ │ + 99. │ EDDT │ 115122 │ 941740 │ █████████▍ │ +100. │ EFHK │ 114860 │ 1629143 │ ████████████████▎ │ + └────────┴─────────┴──────────┴────────────────────────────────────────┘ +``` + +### Номера рейсов из трех крупных аэропортов Москвы, еженедельно {#flights-from-moscow} + +Запрос: + +```sql +SELECT + toMonday(day) AS k, + count() AS c, + bar(c, 0, 10000, 100) AS bar +FROM opensky +WHERE origin IN ('UUEE', 'UUDD', 'UUWW') +GROUP BY k +ORDER BY k ASC; +``` + +Результат: + +```text + ┌──────────k─┬────c─┬─bar──────────────────────────────────────────────────────────────────────────┐ + 1. │ 2018-12-31 │ 5248 │ ████████████████████████████████████████████████████▍ │ + 2. │ 2019-01-07 │ 6302 │ ███████████████████████████████████████████████████████████████ │ + 3. │ 2019-01-14 │ 5701 │ █████████████████████████████████████████████████████████ │ + 4. │ 2019-01-21 │ 5638 │ ████████████████████████████████████████████████████████▍ │ + 5. │ 2019-01-28 │ 5731 │ █████████████████████████████████████████████████████████▎ │ + 6. │ 2019-02-04 │ 5683 │ ████████████████████████████████████████████████████████▋ │ + 7. │ 2019-02-11 │ 5759 │ █████████████████████████████████████████████████████████▌ │ + 8. │ 2019-02-18 │ 5736 │ █████████████████████████████████████████████████████████▎ │ + 9. │ 2019-02-25 │ 5873 │ ██████████████████████████████████████████████████████████▋ │ + 10. │ 2019-03-04 │ 5965 │ ███████████████████████████████████████████████████████████▋ │ + 11. │ 2019-03-11 │ 5900 │ ███████████████████████████████████████████████████████████ │ + 12. │ 2019-03-18 │ 5823 │ ██████████████████████████████████████████████████████████▏ │ + 13. │ 2019-03-25 │ 5899 │ ██████████████████████████████████████████████████████████▊ │ + 14. │ 2019-04-01 │ 6043 │ ████████████████████████████████████████████████████████████▍ │ + 15. │ 2019-04-08 │ 6098 │ ████████████████████████████████████████████████████████████▊ │ + 16. │ 2019-04-15 │ 6196 │ █████████████████████████████████████████████████████████████▊ │ + 17. │ 2019-04-22 │ 6486 │ ████████████████████████████████████████████████████████████████▋ │ + 18. │ 2019-04-29 │ 6682 │ ██████████████████████████████████████████████████████████████████▋ │ + 19. │ 2019-05-06 │ 6739 │ ███████████████████████████████████████████████████████████████████▍ │ + 20. │ 2019-05-13 │ 6600 │ ██████████████████████████████████████████████████████████████████ │ + 21. │ 2019-05-20 │ 6575 │ █████████████████████████████████████████████████████████████████▋ │ + 22. │ 2019-05-27 │ 6786 │ ███████████████████████████████████████████████████████████████████▋ │ + 23. │ 2019-06-03 │ 6872 │ ████████████████████████████████████████████████████████████████████▋ │ + 24. │ 2019-06-10 │ 7045 │ ██████████████████████████████████████████████████████████████████████▍ │ + 25. │ 2019-06-17 │ 7045 │ ██████████████████████████████████████████████████████████████████████▍ │ + 26. │ 2019-06-24 │ 6852 │ ████████████████████████████████████████████████████████████████████▌ │ + 27. │ 2019-07-01 │ 7248 │ ████████████████████████████████████████████████████████████████████████▍ │ + 28. │ 2019-07-08 │ 7284 │ ████████████████████████████████████████████████████████████████████████▋ │ + 29. │ 2019-07-15 │ 7142 │ ███████████████████████████████████████████████████████████████████████▍ │ + 30. │ 2019-07-22 │ 7108 │ ███████████████████████████████████████████████████████████████████████ │ + 31. │ 2019-07-29 │ 7251 │ ████████████████████████████████████████████████████████████████████████▌ │ + 32. │ 2019-08-05 │ 7403 │ ██████████████████████████████████████████████████████████████████████████ │ + 33. │ 2019-08-12 │ 7457 │ ██████████████████████████████████████████████████████████████████████████▌ │ + 34. │ 2019-08-19 │ 7502 │ ███████████████████████████████████████████████████████████████████████████ │ + 35. │ 2019-08-26 │ 7540 │ ███████████████████████████████████████████████████████████████████████████▍ │ + 36. │ 2019-09-02 │ 7237 │ ████████████████████████████████████████████████████████████████████████▎ │ + 37. │ 2019-09-09 │ 7328 │ █████████████████████████████████████████████████████████████████████████▎ │ + 38. │ 2019-09-16 │ 5566 │ ███████████████████████████████████████████████████████▋ │ + 39. │ 2019-09-23 │ 7049 │ ██████████████████████████████████████████████████████████████████████▍ │ + 40. │ 2019-09-30 │ 6880 │ ████████████████████████████████████████████████████████████████████▋ │ + 41. │ 2019-10-07 │ 6518 │ █████████████████████████████████████████████████████████████████▏ │ + 42. │ 2019-10-14 │ 6688 │ ██████████████████████████████████████████████████████████████████▊ │ + 43. │ 2019-10-21 │ 6667 │ ██████████████████████████████████████████████████████████████████▋ │ + 44. │ 2019-10-28 │ 6303 │ ███████████████████████████████████████████████████████████████ │ + 45. │ 2019-11-04 │ 6298 │ ██████████████████████████████████████████████████████████████▊ │ + 46. │ 2019-11-11 │ 6137 │ █████████████████████████████████████████████████████████████▎ │ + 47. │ 2019-11-18 │ 6051 │ ████████████████████████████████████████████████████████████▌ │ + 48. │ 2019-11-25 │ 5820 │ ██████████████████████████████████████████████████████████▏ │ + 49. │ 2019-12-02 │ 5942 │ ███████████████████████████████████████████████████████████▍ │ + 50. │ 2019-12-09 │ 4891 │ ████████████████████████████████████████████████▊ │ + 51. │ 2019-12-16 │ 5682 │ ████████████████████████████████████████████████████████▋ │ + 52. │ 2019-12-23 │ 6111 │ █████████████████████████████████████████████████████████████ │ + 53. │ 2019-12-30 │ 5870 │ ██████████████████████████████████████████████████████████▋ │ + 54. │ 2020-01-06 │ 5953 │ ███████████████████████████████████████████████████████████▌ │ + 55. │ 2020-01-13 │ 5698 │ ████████████████████████████████████████████████████████▊ │ + 56. │ 2020-01-20 │ 5339 │ █████████████████████████████████████████████████████▍ │ + 57. │ 2020-01-27 │ 5566 │ ███████████████████████████████████████████████████████▋ │ + 58. │ 2020-02-03 │ 5801 │ ██████████████████████████████████████████████████████████ │ + 59. │ 2020-02-10 │ 5692 │ ████████████████████████████████████████████████████████▊ │ + 60. │ 2020-02-17 │ 5912 │ ███████████████████████████████████████████████████████████ │ + 61. │ 2020-02-24 │ 6031 │ ████████████████████████████████████████████████████████████▎ │ + 62. │ 2020-03-02 │ 6105 │ █████████████████████████████████████████████████████████████ │ + 63. │ 2020-03-09 │ 5823 │ ██████████████████████████████████████████████████████████▏ │ + 64. │ 2020-03-16 │ 4659 │ ██████████████████████████████████████████████▌ │ + 65. │ 2020-03-23 │ 3720 │ █████████████████████████████████████▏ │ + 66. │ 2020-03-30 │ 1720 │ █████████████████▏ │ + 67. │ 2020-04-06 │ 849 │ ████████▍ │ + 68. │ 2020-04-13 │ 710 │ ███████ │ + 69. │ 2020-04-20 │ 725 │ ███████▏ │ + 70. │ 2020-04-27 │ 920 │ █████████▏ │ + 71. │ 2020-05-04 │ 859 │ ████████▌ │ + 72. │ 2020-05-11 │ 1047 │ ██████████▍ │ + 73. │ 2020-05-18 │ 1135 │ ███████████▎ │ + 74. │ 2020-05-25 │ 1266 │ ████████████▋ │ + 75. │ 2020-06-01 │ 1793 │ █████████████████▊ │ + 76. │ 2020-06-08 │ 1979 │ ███████████████████▋ │ + 77. │ 2020-06-15 │ 2297 │ ██████████████████████▊ │ + 78. │ 2020-06-22 │ 2788 │ ███████████████████████████▊ │ + 79. │ 2020-06-29 │ 3389 │ █████████████████████████████████▊ │ + 80. │ 2020-07-06 │ 3545 │ ███████████████████████████████████▍ │ + 81. │ 2020-07-13 │ 3569 │ ███████████████████████████████████▋ │ + 82. │ 2020-07-20 │ 3784 │ █████████████████████████████████████▋ │ + 83. │ 2020-07-27 │ 3960 │ ███████████████████████████████████████▌ │ + 84. │ 2020-08-03 │ 4323 │ ███████████████████████████████████████████▏ │ + 85. │ 2020-08-10 │ 4581 │ █████████████████████████████████████████████▋ │ + 86. │ 2020-08-17 │ 4791 │ ███████████████████████████████████████████████▊ │ + 87. │ 2020-08-24 │ 4928 │ █████████████████████████████████████████████████▎ │ + 88. │ 2020-08-31 │ 4687 │ ██████████████████████████████████████████████▋ │ + 89. │ 2020-09-07 │ 4643 │ ██████████████████████████████████████████████▍ │ + 90. │ 2020-09-14 │ 4594 │ █████████████████████████████████████████████▊ │ + 91. │ 2020-09-21 │ 4478 │ ████████████████████████████████████████████▋ │ + 92. │ 2020-09-28 │ 4382 │ ███████████████████████████████████████████▋ │ + 93. │ 2020-10-05 │ 4261 │ ██████████████████████████████████████████▌ │ + 94. │ 2020-10-12 │ 4243 │ ██████████████████████████████████████████▍ │ + 95. │ 2020-10-19 │ 3941 │ ███████████████████████████████████████▍ │ + 96. │ 2020-10-26 │ 3616 │ ████████████████████████████████████▏ │ + 97. │ 2020-11-02 │ 3586 │ ███████████████████████████████████▋ │ + 98. │ 2020-11-09 │ 3403 │ ██████████████████████████████████ │ + 99. │ 2020-11-16 │ 3336 │ █████████████████████████████████▎ │ +100. │ 2020-11-23 │ 3230 │ ████████████████████████████████▎ │ +101. │ 2020-11-30 │ 3183 │ ███████████████████████████████▋ │ +102. │ 2020-12-07 │ 3285 │ ████████████████████████████████▋ │ +103. │ 2020-12-14 │ 3367 │ █████████████████████████████████▋ │ +104. │ 2020-12-21 │ 3748 │ █████████████████████████████████████▍ │ +105. │ 2020-12-28 │ 3986 │ ███████████████████████████████████████▋ │ +106. │ 2021-01-04 │ 3906 │ ███████████████████████████████████████ │ +107. │ 2021-01-11 │ 3425 │ ██████████████████████████████████▎ │ +108. │ 2021-01-18 │ 3144 │ ███████████████████████████████▍ │ +109. │ 2021-01-25 │ 3115 │ ███████████████████████████████▏ │ +110. │ 2021-02-01 │ 3285 │ ████████████████████████████████▋ │ +111. │ 2021-02-08 │ 3321 │ █████████████████████████████████▏ │ +112. │ 2021-02-15 │ 3475 │ ██████████████████████████████████▋ │ +113. │ 2021-02-22 │ 3549 │ ███████████████████████████████████▍ │ +114. │ 2021-03-01 │ 3755 │ █████████████████████████████████████▌ │ +115. │ 2021-03-08 │ 3080 │ ██████████████████████████████▋ │ +116. │ 2021-03-15 │ 3789 │ █████████████████████████████████████▊ │ +117. │ 2021-03-22 │ 3804 │ ██████████████████████████████████████ │ +118. │ 2021-03-29 │ 4238 │ ██████████████████████████████████████████▍ │ +119. │ 2021-04-05 │ 4307 │ ███████████████████████████████████████████ │ +120. │ 2021-04-12 │ 4225 │ ██████████████████████████████████████████▎ │ +121. │ 2021-04-19 │ 4391 │ ███████████████████████████████████████████▊ │ +122. │ 2021-04-26 │ 4868 │ ████████████████████████████████████████████████▋ │ +123. │ 2021-05-03 │ 4977 │ █████████████████████████████████████████████████▋ │ +124. │ 2021-05-10 │ 5164 │ ███████████████████████████████████████████████████▋ │ +125. │ 2021-05-17 │ 4986 │ █████████████████████████████████████████████████▋ │ +126. │ 2021-05-24 │ 5024 │ ██████████████████████████████████████████████████▏ │ +127. │ 2021-05-31 │ 4824 │ ████████████████████████████████████████████████▏ │ +128. │ 2021-06-07 │ 5652 │ ████████████████████████████████████████████████████████▌ │ +129. │ 2021-06-14 │ 5613 │ ████████████████████████████████████████████████████████▏ │ +130. │ 2021-06-21 │ 6061 │ ████████████████████████████████████████████████████████████▌ │ +131. │ 2021-06-28 │ 2554 │ █████████████████████████▌ │ + └────────────┴──────┴──────────────────────────────────────────────────────────────────────────────┘ +``` + +### Online Playground {#playground} + +Вы можете протестировать другие запросы к этому набору данным с помощью интерактивного ресурса [Online Playground](https://gh-api.clickhouse.tech/play?user=play). Например, [вот так](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUCiAgICBvcmlnaW4sCiAgICBjb3VudCgpLAogICAgcm91bmQoYXZnKGdlb0Rpc3RhbmNlKGxvbmdpdHVkZV8xLCBsYXRpdHVkZV8xLCBsb25naXR1ZGVfMiwgbGF0aXR1ZGVfMikpKSBBUyBkaXN0YW5jZSwKICAgIGJhcihkaXN0YW5jZSwgMCwgMTAwMDAwMDAsIDEwMCkgQVMgYmFyCkZST00gb3BlbnNreQpXSEVSRSBvcmlnaW4gIT0gJycKR1JPVVAgQlkgb3JpZ2luCk9SREVSIEJZIGNvdW50KCkgREVTQwpMSU1JVCAxMDA=). Однако обратите внимание, что здесь нельзя создавать временные таблицы. + diff --git a/docs/ru/getting-started/example-datasets/uk-price-paid.md b/docs/ru/getting-started/example-datasets/uk-price-paid.md deleted file mode 120000 index e48bcf21d7b..00000000000 --- a/docs/ru/getting-started/example-datasets/uk-price-paid.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/getting-started/example-datasets/uk-price-paid.md \ No newline at end of file diff --git a/docs/ru/getting-started/example-datasets/uk-price-paid.md b/docs/ru/getting-started/example-datasets/uk-price-paid.md new file mode 100644 index 00000000000..1a0991015bd --- /dev/null +++ b/docs/ru/getting-started/example-datasets/uk-price-paid.md @@ -0,0 +1,650 @@ +--- +toc_priority: 20 +toc_title: Набор данных о стоимости недвижимости в Великобритании +--- + +# Набор данных о стоимости недвижимости в Великобритании {#uk-property-price-paid} + +Набор содержит данные о стоимости недвижимости в Англии и Уэльсе. Данные доступны с 1995 года. +Размер набора данных в несжатом виде составляет около 4 GiB, а в ClickHouse он займет около 278 MiB. + +Источник: https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads +Описание полей таблицы: https://www.gov.uk/guidance/about-the-price-paid-data + +Набор содержит данные HM Land Registry data © Crown copyright and database right 2021. Эти данные лицензированы в соответствии с Open Government Licence v3.0. + +## Загрузите набор данных {#download-dataset} + +Выполните команду: + +```bash +wget http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv +``` + +Загрузка займет около 2 минут при хорошем подключении к интернету. + +## Создайте таблицу {#create-table} + +```sql +CREATE TABLE uk_price_paid +( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4, 'other' = 0), + is_new UInt8, + duration Enum8('freehold' = 1, 'leasehold' = 2, 'unknown' = 0), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String), + category UInt8 +) ENGINE = MergeTree ORDER BY (postcode1, postcode2, addr1, addr2); +``` + +## Обработайте и импортируйте данные {#preprocess-import-data} + +В этом примере используется `clickhouse-local` для предварительной обработки данных и `clickhouse-client` для импорта данных. + +Указывается структура исходных данных CSV-файла и запрос для предварительной обработки данных с помощью `clickhouse-local`. + +Предварительная обработка включает: +- разделение почтового индекса на два разных столбца `postcode1` и `postcode2`, что лучше подходит для хранения данных и выполнения запросов к ним; +- преобразование поля `time` в дату, поскольку оно содержит только время 00:00; +- поле [UUid](../../sql-reference/data-types/uuid.md) игнорируется, потому что оно не будет использовано для анализа; +- преобразование полей `type` и `duration` в более читаемые поля типа `Enum` с помощью функции [transform](../../sql-reference/functions/other-functions.md#transform); +- преобразование полей `is_new` и `category` из односимвольной строки (`Y`/`N` и `A`/`B`) в поле [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256) со значениями 0 и 1 соответственно. + +Обработанные данные передаются в `clickhouse-client` и импортируются в таблицу ClickHouse потоковым способом. + +```bash +clickhouse-local --input-format CSV --structure ' + uuid String, + price UInt32, + time DateTime, + postcode String, + a String, + b String, + c String, + addr1 String, + addr2 String, + street String, + locality String, + town String, + district String, + county String, + d String, + e String +' --query " + WITH splitByChar(' ', postcode) AS p + SELECT + price, + toDate(time) AS date, + p[1] AS postcode1, + p[2] AS postcode2, + transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type, + b = 'Y' AS is_new, + transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration, + addr1, + addr2, + street, + locality, + town, + district, + county, + d = 'B' AS category + FROM table" --date_time_input_format best_effort < pp-complete.csv | clickhouse-client --query "INSERT INTO uk_price_paid FORMAT TSV" +``` + +Выполнение запроса займет около 40 секунд. + +## Проверьте импортированные данные {#validate-data} + +Запрос: + +```sql +SELECT count() FROM uk_price_paid; +``` + +Результат: + +```text +┌──count()─┐ +│ 26321785 │ +└──────────┘ +``` + +Размер набора данных в ClickHouse составляет всего 278 MiB, проверьте это. + +Запрос: + +```sql +SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'uk_price_paid'; +``` + +Результат: + +```text +┌─formatReadableSize(total_bytes)─┐ +│ 278.80 MiB │ +└─────────────────────────────────┘ +``` + +## Примеры запросов {#run-queries} + +### Запрос 1. Средняя цена за год {#average-price} + +Запрос: + +```sql +SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 1000000, 80) FROM uk_price_paid GROUP BY year ORDER BY year; +``` + +Результат: + +```text +┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐ +│ 1995 │ 67932 │ █████▍ │ +│ 1996 │ 71505 │ █████▋ │ +│ 1997 │ 78532 │ ██████▎ │ +│ 1998 │ 85436 │ ██████▋ │ +│ 1999 │ 96037 │ ███████▋ │ +│ 2000 │ 107479 │ ████████▌ │ +│ 2001 │ 118885 │ █████████▌ │ +│ 2002 │ 137941 │ ███████████ │ +│ 2003 │ 155889 │ ████████████▍ │ +│ 2004 │ 178885 │ ██████████████▎ │ +│ 2005 │ 189351 │ ███████████████▏ │ +│ 2006 │ 203528 │ ████████████████▎ │ +│ 2007 │ 219378 │ █████████████████▌ │ +│ 2008 │ 217056 │ █████████████████▎ │ +│ 2009 │ 213419 │ █████████████████ │ +│ 2010 │ 236109 │ ██████████████████▊ │ +│ 2011 │ 232805 │ ██████████████████▌ │ +│ 2012 │ 238367 │ ███████████████████ │ +│ 2013 │ 256931 │ ████████████████████▌ │ +│ 2014 │ 279915 │ ██████████████████████▍ │ +│ 2015 │ 297266 │ ███████████████████████▋ │ +│ 2016 │ 313201 │ █████████████████████████ │ +│ 2017 │ 346097 │ ███████████████████████████▋ │ +│ 2018 │ 350116 │ ████████████████████████████ │ +│ 2019 │ 351013 │ ████████████████████████████ │ +│ 2020 │ 369420 │ █████████████████████████████▌ │ +│ 2021 │ 386903 │ ██████████████████████████████▊ │ +└──────┴────────┴────────────────────────────────────────┘ +``` + +### Запрос 2. Средняя цена за год в Лондоне {#average-price-london} + +Запрос: + +```sql +SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 2000000, 100) FROM uk_price_paid WHERE town = 'LONDON' GROUP BY year ORDER BY year; +``` + +Результат: + +```text +┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐ +│ 1995 │ 109116 │ █████▍ │ +│ 1996 │ 118667 │ █████▊ │ +│ 1997 │ 136518 │ ██████▋ │ +│ 1998 │ 152983 │ ███████▋ │ +│ 1999 │ 180637 │ █████████ │ +│ 2000 │ 215838 │ ██████████▋ │ +│ 2001 │ 232994 │ ███████████▋ │ +│ 2002 │ 263670 │ █████████████▏ │ +│ 2003 │ 278394 │ █████████████▊ │ +│ 2004 │ 304666 │ ███████████████▏ │ +│ 2005 │ 322875 │ ████████████████▏ │ +│ 2006 │ 356191 │ █████████████████▋ │ +│ 2007 │ 404054 │ ████████████████████▏ │ +│ 2008 │ 420741 │ █████████████████████ │ +│ 2009 │ 427753 │ █████████████████████▍ │ +│ 2010 │ 480306 │ ████████████████████████ │ +│ 2011 │ 496274 │ ████████████████████████▋ │ +│ 2012 │ 519442 │ █████████████████████████▊ │ +│ 2013 │ 616212 │ ██████████████████████████████▋ │ +│ 2014 │ 724154 │ ████████████████████████████████████▏ │ +│ 2015 │ 792129 │ ███████████████████████████████████████▌ │ +│ 2016 │ 843655 │ ██████████████████████████████████████████▏ │ +│ 2017 │ 982642 │ █████████████████████████████████████████████████▏ │ +│ 2018 │ 1016835 │ ██████████████████████████████████████████████████▋ │ +│ 2019 │ 1042849 │ ████████████████████████████████████████████████████▏ │ +│ 2020 │ 1011889 │ ██████████████████████████████████████████████████▌ │ +│ 2021 │ 960343 │ ████████████████████████████████████████████████ │ +└──────┴─────────┴───────────────────────────────────────────────────────┘ +``` + +Что-то случилось в 2013 году. Я понятия не имею. Может быть, вы имеете представление о том, что произошло в 2020 году? + +### Запрос 3. Самые дорогие районы {#most-expensive-neighborhoods} + +Запрос: + +```sql +SELECT + town, + district, + count() AS c, + round(avg(price)) AS price, + bar(price, 0, 5000000, 100) +FROM uk_price_paid +WHERE date >= '2020-01-01' +GROUP BY + town, + district +HAVING c >= 100 +ORDER BY price DESC +LIMIT 100; +``` + +Результат: + +```text + +┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐ +│ LONDON │ CITY OF WESTMINSTER │ 3606 │ 3280239 │ █████████████████████████████████████████████████████████████████▌ │ +│ LONDON │ CITY OF LONDON │ 274 │ 3160502 │ ███████████████████████████████████████████████████████████████▏ │ +│ LONDON │ KENSINGTON AND CHELSEA │ 2550 │ 2308478 │ ██████████████████████████████████████████████▏ │ +│ LEATHERHEAD │ ELMBRIDGE │ 114 │ 1897407 │ █████████████████████████████████████▊ │ +│ LONDON │ CAMDEN │ 3033 │ 1805404 │ ████████████████████████████████████ │ +│ VIRGINIA WATER │ RUNNYMEDE │ 156 │ 1753247 │ ███████████████████████████████████ │ +│ WINDLESHAM │ SURREY HEATH │ 108 │ 1677613 │ █████████████████████████████████▌ │ +│ THORNTON HEATH │ CROYDON │ 546 │ 1671721 │ █████████████████████████████████▍ │ +│ BARNET │ ENFIELD │ 124 │ 1505840 │ ██████████████████████████████ │ +│ COBHAM │ ELMBRIDGE │ 387 │ 1237250 │ ████████████████████████▋ │ +│ LONDON │ ISLINGTON │ 2668 │ 1236980 │ ████████████████████████▋ │ +│ OXFORD │ SOUTH OXFORDSHIRE │ 321 │ 1220907 │ ████████████████████████▍ │ +│ LONDON │ RICHMOND UPON THAMES │ 704 │ 1215551 │ ████████████████████████▎ │ +│ LONDON │ HOUNSLOW │ 671 │ 1207493 │ ████████████████████████▏ │ +│ ASCOT │ WINDSOR AND MAIDENHEAD │ 407 │ 1183299 │ ███████████████████████▋ │ +│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 330 │ 1175615 │ ███████████████████████▌ │ +│ RICHMOND │ RICHMOND UPON THAMES │ 874 │ 1110444 │ ██████████████████████▏ │ +│ LONDON │ HAMMERSMITH AND FULHAM │ 3086 │ 1053983 │ █████████████████████ │ +│ SURBITON │ ELMBRIDGE │ 100 │ 1011800 │ ████████████████████▏ │ +│ RADLETT │ HERTSMERE │ 283 │ 1011712 │ ████████████████████▏ │ +│ SALCOMBE │ SOUTH HAMS │ 127 │ 1011624 │ ████████████████████▏ │ +│ WEYBRIDGE │ ELMBRIDGE │ 655 │ 1007265 │ ████████████████████▏ │ +│ ESHER │ ELMBRIDGE │ 485 │ 986581 │ ███████████████████▋ │ +│ LEATHERHEAD │ GUILDFORD │ 202 │ 977320 │ ███████████████████▌ │ +│ BURFORD │ WEST OXFORDSHIRE │ 111 │ 966893 │ ███████████████████▎ │ +│ BROCKENHURST │ NEW FOREST │ 129 │ 956675 │ ███████████████████▏ │ +│ HINDHEAD │ WAVERLEY │ 137 │ 953753 │ ███████████████████ │ +│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 419 │ 951121 │ ███████████████████ │ +│ EAST MOLESEY │ ELMBRIDGE │ 192 │ 936769 │ ██████████████████▋ │ +│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 146 │ 925515 │ ██████████████████▌ │ +│ LONDON │ TOWER HAMLETS │ 4388 │ 918304 │ ██████████████████▎ │ +│ OLNEY │ MILTON KEYNES │ 235 │ 910646 │ ██████████████████▏ │ +│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 540 │ 902418 │ ██████████████████ │ +│ LONDON │ SOUTHWARK │ 3885 │ 892997 │ █████████████████▋ │ +│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 960 │ 885969 │ █████████████████▋ │ +│ LONDON │ EALING │ 2658 │ 871755 │ █████████████████▍ │ +│ CRANBROOK │ TUNBRIDGE WELLS │ 431 │ 862348 │ █████████████████▏ │ +│ LONDON │ MERTON │ 2099 │ 859118 │ █████████████████▏ │ +│ BELVEDERE │ BEXLEY │ 346 │ 842423 │ ████████████████▋ │ +│ GUILDFORD │ WAVERLEY │ 143 │ 841277 │ ████████████████▋ │ +│ HARPENDEN │ ST ALBANS │ 657 │ 841216 │ ████████████████▋ │ +│ LONDON │ HACKNEY │ 3307 │ 837090 │ ████████████████▋ │ +│ LONDON │ WANDSWORTH │ 6566 │ 832663 │ ████████████████▋ │ +│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 123 │ 824299 │ ████████████████▍ │ +│ KINGS LANGLEY │ DACORUM │ 145 │ 821331 │ ████████████████▍ │ +│ BERKHAMSTED │ DACORUM │ 543 │ 818415 │ ████████████████▎ │ +│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 226 │ 802807 │ ████████████████ │ +│ BILLINGSHURST │ CHICHESTER │ 144 │ 797829 │ ███████████████▊ │ +│ WOKING │ GUILDFORD │ 176 │ 793494 │ ███████████████▋ │ +│ STOCKBRIDGE │ TEST VALLEY │ 178 │ 793269 │ ███████████████▋ │ +│ EPSOM │ REIGATE AND BANSTEAD │ 172 │ 791862 │ ███████████████▋ │ +│ TONBRIDGE │ TUNBRIDGE WELLS │ 360 │ 787876 │ ███████████████▋ │ +│ TEDDINGTON │ RICHMOND UPON THAMES │ 595 │ 786492 │ ███████████████▋ │ +│ TWICKENHAM │ RICHMOND UPON THAMES │ 1155 │ 786193 │ ███████████████▋ │ +│ LYNDHURST │ NEW FOREST │ 102 │ 785593 │ ███████████████▋ │ +│ LONDON │ LAMBETH │ 5228 │ 774574 │ ███████████████▍ │ +│ LONDON │ BARNET │ 3955 │ 773259 │ ███████████████▍ │ +│ OXFORD │ VALE OF WHITE HORSE │ 353 │ 772088 │ ███████████████▍ │ +│ TONBRIDGE │ MAIDSTONE │ 305 │ 770740 │ ███████████████▍ │ +│ LUTTERWORTH │ HARBOROUGH │ 538 │ 768634 │ ███████████████▎ │ +│ WOODSTOCK │ WEST OXFORDSHIRE │ 140 │ 766037 │ ███████████████▎ │ +│ MIDHURST │ CHICHESTER │ 257 │ 764815 │ ███████████████▎ │ +│ MARLOW │ BUCKINGHAMSHIRE │ 327 │ 761876 │ ███████████████▏ │ +│ LONDON │ NEWHAM │ 3237 │ 761784 │ ███████████████▏ │ +│ ALDERLEY EDGE │ CHESHIRE EAST │ 178 │ 757318 │ ███████████████▏ │ +│ LUTON │ CENTRAL BEDFORDSHIRE │ 212 │ 754283 │ ███████████████ │ +│ PETWORTH │ CHICHESTER │ 154 │ 754220 │ ███████████████ │ +│ ALRESFORD │ WINCHESTER │ 219 │ 752718 │ ███████████████ │ +│ POTTERS BAR │ WELWYN HATFIELD │ 174 │ 748465 │ ██████████████▊ │ +│ HASLEMERE │ CHICHESTER │ 128 │ 746907 │ ██████████████▊ │ +│ TADWORTH │ REIGATE AND BANSTEAD │ 502 │ 743252 │ ██████████████▋ │ +│ THAMES DITTON │ ELMBRIDGE │ 244 │ 741913 │ ██████████████▋ │ +│ REIGATE │ REIGATE AND BANSTEAD │ 581 │ 738198 │ ██████████████▋ │ +│ BOURNE END │ BUCKINGHAMSHIRE │ 138 │ 735190 │ ██████████████▋ │ +│ SEVENOAKS │ SEVENOAKS │ 1156 │ 730018 │ ██████████████▌ │ +│ OXTED │ TANDRIDGE │ 336 │ 729123 │ ██████████████▌ │ +│ INGATESTONE │ BRENTWOOD │ 166 │ 728103 │ ██████████████▌ │ +│ LONDON │ BRENT │ 2079 │ 720605 │ ██████████████▍ │ +│ LONDON │ HARINGEY │ 3216 │ 717780 │ ██████████████▎ │ +│ PURLEY │ CROYDON │ 575 │ 716108 │ ██████████████▎ │ +│ WELWYN │ WELWYN HATFIELD │ 222 │ 710603 │ ██████████████▏ │ +│ RICKMANSWORTH │ THREE RIVERS │ 798 │ 704571 │ ██████████████ │ +│ BANSTEAD │ REIGATE AND BANSTEAD │ 401 │ 701293 │ ██████████████ │ +│ CHIGWELL │ EPPING FOREST │ 261 │ 701203 │ ██████████████ │ +│ PINNER │ HARROW │ 528 │ 698885 │ █████████████▊ │ +│ HASLEMERE │ WAVERLEY │ 280 │ 696659 │ █████████████▊ │ +│ SLOUGH │ BUCKINGHAMSHIRE │ 396 │ 694917 │ █████████████▊ │ +│ WALTON-ON-THAMES │ ELMBRIDGE │ 946 │ 692395 │ █████████████▋ │ +│ READING │ SOUTH OXFORDSHIRE │ 318 │ 691988 │ █████████████▋ │ +│ NORTHWOOD │ HILLINGDON │ 271 │ 690643 │ █████████████▋ │ +│ FELTHAM │ HOUNSLOW │ 763 │ 688595 │ █████████████▋ │ +│ ASHTEAD │ MOLE VALLEY │ 303 │ 687923 │ █████████████▋ │ +│ BARNET │ BARNET │ 975 │ 686980 │ █████████████▋ │ +│ WOKING │ SURREY HEATH │ 283 │ 686669 │ █████████████▋ │ +│ MALMESBURY │ WILTSHIRE │ 323 │ 683324 │ █████████████▋ │ +│ AMERSHAM │ BUCKINGHAMSHIRE │ 496 │ 680962 │ █████████████▌ │ +│ CHISLEHURST │ BROMLEY │ 430 │ 680209 │ █████████████▌ │ +│ HYTHE │ FOLKESTONE AND HYTHE │ 490 │ 676908 │ █████████████▌ │ +│ MAYFIELD │ WEALDEN │ 101 │ 676210 │ █████████████▌ │ +│ ASCOT │ BRACKNELL FOREST │ 168 │ 676004 │ █████████████▌ │ +└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘ +``` + +## Ускорьте запросы с помощью проекций {#speedup-with-projections} + +[Проекции](../../sql-reference/statements/alter/projection.md) позволяют повысить скорость запросов за счет хранения предварительно агрегированных данных. + +### Создайте проекцию {#build-projection} + +Создайте агрегирующую проекцию по параметрам `toYear(date)`, `district`, `town`: + +```sql +ALTER TABLE uk_price_paid + ADD PROJECTION projection_by_year_district_town + ( + SELECT + toYear(date), + district, + town, + avg(price), + sum(price), + count() + GROUP BY + toYear(date), + district, + town + ); +``` + +Заполните проекцию для текущих данных (иначе проекция будет создана только для добавляемых данных): + +```sql +ALTER TABLE uk_price_paid + MATERIALIZE PROJECTION projection_by_year_district_town +SETTINGS mutations_sync = 1; +``` + +## Проверьте производительность {#test-performance} + +Давайте выполним те же 3 запроса. + +[Включите](../../operations/settings/settings.md#allow-experimental-projection-optimization) поддержку проекций: + +```sql +SET allow_experimental_projection_optimization = 1; +``` + +### Запрос 1. Средняя цена за год {#average-price-projections} + +Запрос: + +```sql +SELECT + toYear(date) AS year, + round(avg(price)) AS price, + bar(price, 0, 1000000, 80) +FROM uk_price_paid +GROUP BY year +ORDER BY year ASC; +``` + +Результат: + +```text +┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐ +│ 1995 │ 67932 │ █████▍ │ +│ 1996 │ 71505 │ █████▋ │ +│ 1997 │ 78532 │ ██████▎ │ +│ 1998 │ 85436 │ ██████▋ │ +│ 1999 │ 96037 │ ███████▋ │ +│ 2000 │ 107479 │ ████████▌ │ +│ 2001 │ 118885 │ █████████▌ │ +│ 2002 │ 137941 │ ███████████ │ +│ 2003 │ 155889 │ ████████████▍ │ +│ 2004 │ 178885 │ ██████████████▎ │ +│ 2005 │ 189351 │ ███████████████▏ │ +│ 2006 │ 203528 │ ████████████████▎ │ +│ 2007 │ 219378 │ █████████████████▌ │ +│ 2008 │ 217056 │ █████████████████▎ │ +│ 2009 │ 213419 │ █████████████████ │ +│ 2010 │ 236109 │ ██████████████████▊ │ +│ 2011 │ 232805 │ ██████████████████▌ │ +│ 2012 │ 238367 │ ███████████████████ │ +│ 2013 │ 256931 │ ████████████████████▌ │ +│ 2014 │ 279915 │ ██████████████████████▍ │ +│ 2015 │ 297266 │ ███████████████████████▋ │ +│ 2016 │ 313201 │ █████████████████████████ │ +│ 2017 │ 346097 │ ███████████████████████████▋ │ +│ 2018 │ 350116 │ ████████████████████████████ │ +│ 2019 │ 351013 │ ████████████████████████████ │ +│ 2020 │ 369420 │ █████████████████████████████▌ │ +│ 2021 │ 386903 │ ██████████████████████████████▊ │ +└──────┴────────┴────────────────────────────────────────┘ +``` + +### Запрос 2. Средняя цена за год в Лондоне {#average-price-london-projections} + +Запрос: + +```sql +SELECT + toYear(date) AS year, + round(avg(price)) AS price, + bar(price, 0, 2000000, 100) +FROM uk_price_paid +WHERE town = 'LONDON' +GROUP BY year +ORDER BY year ASC; +``` + +Результат: + +```text +┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐ +│ 1995 │ 109116 │ █████▍ │ +│ 1996 │ 118667 │ █████▊ │ +│ 1997 │ 136518 │ ██████▋ │ +│ 1998 │ 152983 │ ███████▋ │ +│ 1999 │ 180637 │ █████████ │ +│ 2000 │ 215838 │ ██████████▋ │ +│ 2001 │ 232994 │ ███████████▋ │ +│ 2002 │ 263670 │ █████████████▏ │ +│ 2003 │ 278394 │ █████████████▊ │ +│ 2004 │ 304666 │ ███████████████▏ │ +│ 2005 │ 322875 │ ████████████████▏ │ +│ 2006 │ 356191 │ █████████████████▋ │ +│ 2007 │ 404054 │ ████████████████████▏ │ +│ 2008 │ 420741 │ █████████████████████ │ +│ 2009 │ 427753 │ █████████████████████▍ │ +│ 2010 │ 480306 │ ████████████████████████ │ +│ 2011 │ 496274 │ ████████████████████████▋ │ +│ 2012 │ 519442 │ █████████████████████████▊ │ +│ 2013 │ 616212 │ ██████████████████████████████▋ │ +│ 2014 │ 724154 │ ████████████████████████████████████▏ │ +│ 2015 │ 792129 │ ███████████████████████████████████████▌ │ +│ 2016 │ 843655 │ ██████████████████████████████████████████▏ │ +│ 2017 │ 982642 │ █████████████████████████████████████████████████▏ │ +│ 2018 │ 1016835 │ ██████████████████████████████████████████████████▋ │ +│ 2019 │ 1042849 │ ████████████████████████████████████████████████████▏ │ +│ 2020 │ 1011889 │ ██████████████████████████████████████████████████▌ │ +│ 2021 │ 960343 │ ████████████████████████████████████████████████ │ +└──────┴─────────┴───────────────────────────────────────────────────────┘ +``` + +### Запрос 3. Самые дорогие районы {#most-expensive-neighborhoods-projections} + +Условие (date >= '2020-01-01') необходимо изменить, чтобы оно соответствовало проекции (toYear(date) >= 2020). + +Запрос: + +```sql +SELECT + town, + district, + count() AS c, + round(avg(price)) AS price, + bar(price, 0, 5000000, 100) +FROM uk_price_paid +WHERE toYear(date) >= 2020 +GROUP BY + town, + district +HAVING c >= 100 +ORDER BY price DESC +LIMIT 100; +``` + +Результат: + +```text +┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐ +│ LONDON │ CITY OF WESTMINSTER │ 3606 │ 3280239 │ █████████████████████████████████████████████████████████████████▌ │ +│ LONDON │ CITY OF LONDON │ 274 │ 3160502 │ ███████████████████████████████████████████████████████████████▏ │ +│ LONDON │ KENSINGTON AND CHELSEA │ 2550 │ 2308478 │ ██████████████████████████████████████████████▏ │ +│ LEATHERHEAD │ ELMBRIDGE │ 114 │ 1897407 │ █████████████████████████████████████▊ │ +│ LONDON │ CAMDEN │ 3033 │ 1805404 │ ████████████████████████████████████ │ +│ VIRGINIA WATER │ RUNNYMEDE │ 156 │ 1753247 │ ███████████████████████████████████ │ +│ WINDLESHAM │ SURREY HEATH │ 108 │ 1677613 │ █████████████████████████████████▌ │ +│ THORNTON HEATH │ CROYDON │ 546 │ 1671721 │ █████████████████████████████████▍ │ +│ BARNET │ ENFIELD │ 124 │ 1505840 │ ██████████████████████████████ │ +│ COBHAM │ ELMBRIDGE │ 387 │ 1237250 │ ████████████████████████▋ │ +│ LONDON │ ISLINGTON │ 2668 │ 1236980 │ ████████████████████████▋ │ +│ OXFORD │ SOUTH OXFORDSHIRE │ 321 │ 1220907 │ ████████████████████████▍ │ +│ LONDON │ RICHMOND UPON THAMES │ 704 │ 1215551 │ ████████████████████████▎ │ +│ LONDON │ HOUNSLOW │ 671 │ 1207493 │ ████████████████████████▏ │ +│ ASCOT │ WINDSOR AND MAIDENHEAD │ 407 │ 1183299 │ ███████████████████████▋ │ +│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 330 │ 1175615 │ ███████████████████████▌ │ +│ RICHMOND │ RICHMOND UPON THAMES │ 874 │ 1110444 │ ██████████████████████▏ │ +│ LONDON │ HAMMERSMITH AND FULHAM │ 3086 │ 1053983 │ █████████████████████ │ +│ SURBITON │ ELMBRIDGE │ 100 │ 1011800 │ ████████████████████▏ │ +│ RADLETT │ HERTSMERE │ 283 │ 1011712 │ ████████████████████▏ │ +│ SALCOMBE │ SOUTH HAMS │ 127 │ 1011624 │ ████████████████████▏ │ +│ WEYBRIDGE │ ELMBRIDGE │ 655 │ 1007265 │ ████████████████████▏ │ +│ ESHER │ ELMBRIDGE │ 485 │ 986581 │ ███████████████████▋ │ +│ LEATHERHEAD │ GUILDFORD │ 202 │ 977320 │ ███████████████████▌ │ +│ BURFORD │ WEST OXFORDSHIRE │ 111 │ 966893 │ ███████████████████▎ │ +│ BROCKENHURST │ NEW FOREST │ 129 │ 956675 │ ███████████████████▏ │ +│ HINDHEAD │ WAVERLEY │ 137 │ 953753 │ ███████████████████ │ +│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 419 │ 951121 │ ███████████████████ │ +│ EAST MOLESEY │ ELMBRIDGE │ 192 │ 936769 │ ██████████████████▋ │ +│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 146 │ 925515 │ ██████████████████▌ │ +│ LONDON │ TOWER HAMLETS │ 4388 │ 918304 │ ██████████████████▎ │ +│ OLNEY │ MILTON KEYNES │ 235 │ 910646 │ ██████████████████▏ │ +│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 540 │ 902418 │ ██████████████████ │ +│ LONDON │ SOUTHWARK │ 3885 │ 892997 │ █████████████████▋ │ +│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 960 │ 885969 │ █████████████████▋ │ +│ LONDON │ EALING │ 2658 │ 871755 │ █████████████████▍ │ +│ CRANBROOK │ TUNBRIDGE WELLS │ 431 │ 862348 │ █████████████████▏ │ +│ LONDON │ MERTON │ 2099 │ 859118 │ █████████████████▏ │ +│ BELVEDERE │ BEXLEY │ 346 │ 842423 │ ████████████████▋ │ +│ GUILDFORD │ WAVERLEY │ 143 │ 841277 │ ████████████████▋ │ +│ HARPENDEN │ ST ALBANS │ 657 │ 841216 │ ████████████████▋ │ +│ LONDON │ HACKNEY │ 3307 │ 837090 │ ████████████████▋ │ +│ LONDON │ WANDSWORTH │ 6566 │ 832663 │ ████████████████▋ │ +│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 123 │ 824299 │ ████████████████▍ │ +│ KINGS LANGLEY │ DACORUM │ 145 │ 821331 │ ████████████████▍ │ +│ BERKHAMSTED │ DACORUM │ 543 │ 818415 │ ████████████████▎ │ +│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 226 │ 802807 │ ████████████████ │ +│ BILLINGSHURST │ CHICHESTER │ 144 │ 797829 │ ███████████████▊ │ +│ WOKING │ GUILDFORD │ 176 │ 793494 │ ███████████████▋ │ +│ STOCKBRIDGE │ TEST VALLEY │ 178 │ 793269 │ ███████████████▋ │ +│ EPSOM │ REIGATE AND BANSTEAD │ 172 │ 791862 │ ███████████████▋ │ +│ TONBRIDGE │ TUNBRIDGE WELLS │ 360 │ 787876 │ ███████████████▋ │ +│ TEDDINGTON │ RICHMOND UPON THAMES │ 595 │ 786492 │ ███████████████▋ │ +│ TWICKENHAM │ RICHMOND UPON THAMES │ 1155 │ 786193 │ ███████████████▋ │ +│ LYNDHURST │ NEW FOREST │ 102 │ 785593 │ ███████████████▋ │ +│ LONDON │ LAMBETH │ 5228 │ 774574 │ ███████████████▍ │ +│ LONDON │ BARNET │ 3955 │ 773259 │ ███████████████▍ │ +│ OXFORD │ VALE OF WHITE HORSE │ 353 │ 772088 │ ███████████████▍ │ +│ TONBRIDGE │ MAIDSTONE │ 305 │ 770740 │ ███████████████▍ │ +│ LUTTERWORTH │ HARBOROUGH │ 538 │ 768634 │ ███████████████▎ │ +│ WOODSTOCK │ WEST OXFORDSHIRE │ 140 │ 766037 │ ███████████████▎ │ +│ MIDHURST │ CHICHESTER │ 257 │ 764815 │ ███████████████▎ │ +│ MARLOW │ BUCKINGHAMSHIRE │ 327 │ 761876 │ ███████████████▏ │ +│ LONDON │ NEWHAM │ 3237 │ 761784 │ ███████████████▏ │ +│ ALDERLEY EDGE │ CHESHIRE EAST │ 178 │ 757318 │ ███████████████▏ │ +│ LUTON │ CENTRAL BEDFORDSHIRE │ 212 │ 754283 │ ███████████████ │ +│ PETWORTH │ CHICHESTER │ 154 │ 754220 │ ███████████████ │ +│ ALRESFORD │ WINCHESTER │ 219 │ 752718 │ ███████████████ │ +│ POTTERS BAR │ WELWYN HATFIELD │ 174 │ 748465 │ ██████████████▊ │ +│ HASLEMERE │ CHICHESTER │ 128 │ 746907 │ ██████████████▊ │ +│ TADWORTH │ REIGATE AND BANSTEAD │ 502 │ 743252 │ ██████████████▋ │ +│ THAMES DITTON │ ELMBRIDGE │ 244 │ 741913 │ ██████████████▋ │ +│ REIGATE │ REIGATE AND BANSTEAD │ 581 │ 738198 │ ██████████████▋ │ +│ BOURNE END │ BUCKINGHAMSHIRE │ 138 │ 735190 │ ██████████████▋ │ +│ SEVENOAKS │ SEVENOAKS │ 1156 │ 730018 │ ██████████████▌ │ +│ OXTED │ TANDRIDGE │ 336 │ 729123 │ ██████████████▌ │ +│ INGATESTONE │ BRENTWOOD │ 166 │ 728103 │ ██████████████▌ │ +│ LONDON │ BRENT │ 2079 │ 720605 │ ██████████████▍ │ +│ LONDON │ HARINGEY │ 3216 │ 717780 │ ██████████████▎ │ +│ PURLEY │ CROYDON │ 575 │ 716108 │ ██████████████▎ │ +│ WELWYN │ WELWYN HATFIELD │ 222 │ 710603 │ ██████████████▏ │ +│ RICKMANSWORTH │ THREE RIVERS │ 798 │ 704571 │ ██████████████ │ +│ BANSTEAD │ REIGATE AND BANSTEAD │ 401 │ 701293 │ ██████████████ │ +│ CHIGWELL │ EPPING FOREST │ 261 │ 701203 │ ██████████████ │ +│ PINNER │ HARROW │ 528 │ 698885 │ █████████████▊ │ +│ HASLEMERE │ WAVERLEY │ 280 │ 696659 │ █████████████▊ │ +│ SLOUGH │ BUCKINGHAMSHIRE │ 396 │ 694917 │ █████████████▊ │ +│ WALTON-ON-THAMES │ ELMBRIDGE │ 946 │ 692395 │ █████████████▋ │ +│ READING │ SOUTH OXFORDSHIRE │ 318 │ 691988 │ █████████████▋ │ +│ NORTHWOOD │ HILLINGDON │ 271 │ 690643 │ █████████████▋ │ +│ FELTHAM │ HOUNSLOW │ 763 │ 688595 │ █████████████▋ │ +│ ASHTEAD │ MOLE VALLEY │ 303 │ 687923 │ █████████████▋ │ +│ BARNET │ BARNET │ 975 │ 686980 │ █████████████▋ │ +│ WOKING │ SURREY HEATH │ 283 │ 686669 │ █████████████▋ │ +│ MALMESBURY │ WILTSHIRE │ 323 │ 683324 │ █████████████▋ │ +│ AMERSHAM │ BUCKINGHAMSHIRE │ 496 │ 680962 │ █████████████▌ │ +│ CHISLEHURST │ BROMLEY │ 430 │ 680209 │ █████████████▌ │ +│ HYTHE │ FOLKESTONE AND HYTHE │ 490 │ 676908 │ █████████████▌ │ +│ MAYFIELD │ WEALDEN │ 101 │ 676210 │ █████████████▌ │ +│ ASCOT │ BRACKNELL FOREST │ 168 │ 676004 │ █████████████▌ │ +└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘ +``` + +### Резюме {#summary} + +Все три запроса работают намного быстрее и читают меньшее количество строк. + +```text +Query 1 + +no projection: 27 rows in set. Elapsed: 0.158 sec. Processed 26.32 million rows, 157.93 MB (166.57 million rows/s., 999.39 MB/s.) + projection: 27 rows in set. Elapsed: 0.007 sec. Processed 105.96 thousand rows, 3.33 MB (14.58 million rows/s., 458.13 MB/s.) + + +Query 2 + +no projection: 27 rows in set. Elapsed: 0.163 sec. Processed 26.32 million rows, 80.01 MB (161.75 million rows/s., 491.64 MB/s.) + projection: 27 rows in set. Elapsed: 0.008 sec. Processed 105.96 thousand rows, 3.67 MB (13.29 million rows/s., 459.89 MB/s.) + +Query 3 + +no projection: 100 rows in set. Elapsed: 0.069 sec. Processed 26.32 million rows, 62.47 MB (382.13 million rows/s., 906.93 MB/s.) + projection: 100 rows in set. Elapsed: 0.029 sec. Processed 8.08 thousand rows, 511.08 KB (276.06 thousand rows/s., 17.47 MB/s.) +``` + +### Online Playground {#playground} + +Этот набор данных доступен в [Online Playground](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==). + diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 89e244f14b4..732f5e302c0 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include @@ -2050,8 +2050,7 @@ private: }); } - QueryPipeline pipeline; - pipeline.init(std::move(pipe)); + QueryPipeline pipeline(std::move(pipe)); PullingAsyncPipelineExecutor executor(pipeline); Block block; diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index 1e8222f8769..30b99b69351 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -9,6 +9,11 @@ #include #include #include +#include +#include +#include +#include +#include #include namespace DB @@ -1446,7 +1451,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl( local_context->setSettings(task_cluster->settings_pull); local_context->setSetting("skip_unavailable_shards", true); - Block block = getBlockWithAllStreamData(InterpreterFactory::get(query_select_ast, local_context)->execute().getInputStream()); + Block block = getBlockWithAllStreamData(InterpreterFactory::get(query_select_ast, local_context)->execute().pipeline); count = (block) ? block.safeGetByPosition(0).column->getUInt(0) : 0; } @@ -1524,25 +1529,30 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl( context_insert->setSettings(task_cluster->settings_push); /// Custom INSERT SELECT implementation - BlockInputStreamPtr input; - BlockOutputStreamPtr output; + QueryPipeline input; + QueryPipeline output; { BlockIO io_select = InterpreterFactory::get(query_select_ast, context_select)->execute(); BlockIO io_insert = InterpreterFactory::get(query_insert_ast, context_insert)->execute(); - auto pure_input = io_select.getInputStream(); - output = io_insert.out; + output = std::move(io_insert.pipeline); /// Add converting actions to make it possible to copy blocks with slightly different schema - const auto & select_block = pure_input->getHeader(); - const auto & insert_block = output->getHeader(); + const auto & select_block = io_select.pipeline.getHeader(); + const auto & insert_block = output.getHeader(); auto actions_dag = ActionsDAG::makeConvertingActions( select_block.getColumnsWithTypeAndName(), insert_block.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Position); auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext())); - input = std::make_shared(pure_input, actions); + QueryPipelineBuilder builder; + builder.init(std::move(io_select.pipeline)); + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, actions); + }); + input = QueryPipelineBuilder::getPipeline(std::move(builder)); } /// Fail-fast optimization to abort copying when the current clean state expires @@ -1588,7 +1598,26 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl( }; /// Main work is here - copyData(*input, *output, cancel_check, update_stats); + PullingPipelineExecutor pulling_executor(input); + PushingPipelineExecutor pushing_executor(output); + + Block data; + bool is_cancelled = false; + while (pulling_executor.pull(data)) + { + if (cancel_check()) + { + is_cancelled = true; + pushing_executor.cancel(); + pushing_executor.cancel(); + break; + } + pushing_executor.push(data); + update_stats(data); + } + + if (!is_cancelled) + pushing_executor.finish(); // Just in case if (future_is_dirty_checker.valid()) @@ -1711,7 +1740,8 @@ String ClusterCopier::getRemoteCreateTable( String query = "SHOW CREATE TABLE " + getQuotedTable(table); Block block = getBlockWithAllStreamData( - std::make_shared(connection, query, InterpreterShowCreateQuery::getSampleBlock(), remote_context)); + QueryPipeline(std::make_shared( + std::make_shared(connection, query, InterpreterShowCreateQuery::getSampleBlock(), remote_context), false, false))); return typeid_cast(*block.safeGetByPosition(0).column).getDataAt(0).toString(); } @@ -1824,7 +1854,7 @@ std::set ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti auto local_context = Context::createCopy(context); local_context->setSettings(task_cluster->settings_pull); - Block block = getBlockWithAllStreamData(InterpreterFactory::get(query_ast, local_context)->execute().getInputStream()); + Block block = getBlockWithAllStreamData(InterpreterFactory::get(query_ast, local_context)->execute().pipeline); if (block) { @@ -1869,7 +1899,11 @@ bool ClusterCopier::checkShardHasPartition(const ConnectionTimeouts & timeouts, auto local_context = Context::createCopy(context); local_context->setSettings(task_cluster->settings_pull); - return InterpreterFactory::get(query_ast, local_context)->execute().getInputStream()->read().rows() != 0; + auto pipeline = InterpreterFactory::get(query_ast, local_context)->execute().pipeline; + PullingPipelineExecutor executor(pipeline); + Block block; + executor.pull(block); + return block.rows() != 0; } bool ClusterCopier::checkPresentPartitionPiecesOnCurrentShard(const ConnectionTimeouts & timeouts, @@ -1910,12 +1944,15 @@ bool ClusterCopier::checkPresentPartitionPiecesOnCurrentShard(const ConnectionTi auto local_context = Context::createCopy(context); local_context->setSettings(task_cluster->settings_pull); - auto result = InterpreterFactory::get(query_ast, local_context)->execute().getInputStream()->read().rows(); - if (result != 0) + auto pipeline = InterpreterFactory::get(query_ast, local_context)->execute().pipeline; + PullingPipelineExecutor executor(pipeline); + Block result; + executor.pull(result); + if (result.rows() != 0) LOG_INFO(log, "Partition {} piece number {} is PRESENT on shard {}", partition_quoted_name, std::to_string(current_piece_number), task_shard.getDescription()); else LOG_INFO(log, "Partition {} piece number {} is ABSENT on shard {}", partition_quoted_name, std::to_string(current_piece_number), task_shard.getDescription()); - return result != 0; + return result.rows() != 0; } diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp index bec612a8226..c5e702cd1dc 100644 --- a/programs/copier/Internals.cpp +++ b/programs/copier/Internals.cpp @@ -1,6 +1,8 @@ #include "Internals.h" #include #include +#include +#include namespace DB { @@ -63,9 +65,21 @@ BlockInputStreamPtr squashStreamIntoOneBlock(const BlockInputStreamPtr & stream) std::numeric_limits::max()); } -Block getBlockWithAllStreamData(const BlockInputStreamPtr & stream) +Block getBlockWithAllStreamData(QueryPipeline pipeline) { - return squashStreamIntoOneBlock(stream)->read(); + QueryPipelineBuilder builder; + builder.init(std::move(pipeline)); + builder.addTransform(std::make_shared( + builder.getHeader(), + std::numeric_limits::max(), + std::numeric_limits::max())); + + auto cur_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); + Block block; + PullingPipelineExecutor executor(cur_pipeline); + executor.pull(block); + + return block; } diff --git a/programs/copier/Internals.h b/programs/copier/Internals.h index 9e40d7ebd7b..8a3e676baf5 100644 --- a/programs/copier/Internals.h +++ b/programs/copier/Internals.h @@ -165,10 +165,7 @@ std::shared_ptr createASTStorageDistributed( const String & cluster_name, const String & database, const String & table, const ASTPtr & sharding_key_ast = nullptr); - -BlockInputStreamPtr squashStreamIntoOneBlock(const BlockInputStreamPtr & stream); - -Block getBlockWithAllStreamData(const BlockInputStreamPtr & stream); +Block getBlockWithAllStreamData(QueryPipeline pipeline); bool isExtendedDefinitionStorage(const ASTPtr & storage_ast); diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index b1acc34ef93..748c26675fd 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include @@ -1162,8 +1162,7 @@ try Pipe pipe(FormatFactory::instance().getInput(input_format, file_in, header, context, max_block_size)); - QueryPipeline pipeline; - pipeline.init(std::move(pipe)); + QueryPipeline pipeline(std::move(pipe)); PullingPipelineExecutor executor(pipeline); Block block; @@ -1200,8 +1199,7 @@ try }); } - QueryPipeline pipeline; - pipeline.init(std::move(pipe)); + QueryPipeline pipeline(std::move(pipe)); BlockOutputStreamPtr output = context->getOutputStreamParallelIfPossible(output_format, file_out, header); diff --git a/src/Access/AccessType.h b/src/Access/AccessType.h index d031606616d..3e1597dc890 100644 --- a/src/Access/AccessType.h +++ b/src/Access/AccessType.h @@ -45,6 +45,7 @@ enum class AccessType M(ALTER_RENAME_COLUMN, "RENAME COLUMN", COLUMN, ALTER_COLUMN) \ M(ALTER_MATERIALIZE_COLUMN, "MATERIALIZE COLUMN", COLUMN, ALTER_COLUMN) \ M(ALTER_COLUMN, "", GROUP, ALTER_TABLE) /* allow to execute ALTER {ADD|DROP|MODIFY...} COLUMN */\ + M(ALTER_MODIFY_COMMENT, "MODIFY COMMENT", TABLE, ALTER_TABLE) /* modify table comment */\ \ M(ALTER_ORDER_BY, "ALTER MODIFY ORDER BY, MODIFY ORDER BY", TABLE, ALTER_INDEX) \ M(ALTER_SAMPLE_BY, "ALTER MODIFY SAMPLE BY, MODIFY SAMPLE BY", TABLE, ALTER_INDEX) \ diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 5fb6f48e1ee..6c995f52f2b 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Access/ReplicatedAccessStorage.cpp b/src/Access/ReplicatedAccessStorage.cpp index f91b7c8fb06..7c71e264cd0 100644 --- a/src/Access/ReplicatedAccessStorage.cpp +++ b/src/Access/ReplicatedAccessStorage.cpp @@ -63,9 +63,12 @@ void ReplicatedAccessStorage::shutdown() bool prev_stop_flag = stop_flag.exchange(true); if (!prev_stop_flag) { - /// Notify the worker thread to stop waiting for new queue items - refresh_queue.push(UUIDHelpers::Nil); - worker_thread.join(); + if (worker_thread.joinable()) + { + /// Notify the worker thread to stop waiting for new queue items + refresh_queue.push(UUIDHelpers::Nil); + worker_thread.join(); + } } } diff --git a/src/Access/tests/gtest_replicated_access_storage.cpp b/src/Access/tests/gtest_replicated_access_storage.cpp new file mode 100644 index 00000000000..f2052e91749 --- /dev/null +++ b/src/Access/tests/gtest_replicated_access_storage.cpp @@ -0,0 +1,46 @@ +#include +#include + +using namespace DB; + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NO_ZOOKEEPER; +} +} + + +TEST(ReplicatedAccessStorage, ShutdownWithoutStartup) +{ + auto get_zk = []() + { + return std::shared_ptr(); + }; + + auto storage = ReplicatedAccessStorage("replicated", "/clickhouse/access", get_zk); + storage.shutdown(); +} + + +TEST(ReplicatedAccessStorage, ShutdownWithFailedStartup) +{ + auto get_zk = []() + { + return std::shared_ptr(); + }; + + auto storage = ReplicatedAccessStorage("replicated", "/clickhouse/access", get_zk); + try + { + storage.startup(); + } + catch (Exception & e) + { + if (e.code() != ErrorCodes::NO_ZOOKEEPER) + throw; + } + storage.shutdown(); +} + diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp index c9a44dba6f2..4c2c64ef5e1 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -155,7 +155,7 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl( } /// Combinators of aggregate functions. - /// For every aggregate function 'agg' and combiner '-Comb' there is combined aggregate function with name 'aggComb', + /// For every aggregate function 'agg' and combiner '-Comb' there is a combined aggregate function with the name 'aggComb', /// that can have different number and/or types of arguments, different result type and different behaviour. if (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name)) @@ -172,13 +172,12 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl( String nested_name = name.substr(0, name.size() - combinator_name.size()); /// Nested identical combinators (i.e. uniqCombinedIfIf) is not - /// supported (since they even don't work -- silently). + /// supported (since they don't work -- silently). /// - /// But non-identical does supported and works, for example - /// uniqCombinedIfMergeIf, it is useful in case when the underlying + /// But non-identical is supported and works. For example, + /// uniqCombinedIfMergeIf is useful in cases when the underlying /// storage stores AggregateFunction(uniqCombinedIf) and in SELECT you - /// need to filter aggregation result based on another column for - /// example. + /// need to filter aggregation result based on another column. if (!combinator->supportsNesting() && nested_name.ends_with(combinator_name)) { throw Exception(ErrorCodes::ILLEGAL_AGGREGATION, @@ -234,7 +233,7 @@ std::optional AggregateFunctionFactory::tryGetPrope return found.properties; /// Combinators of aggregate functions. - /// For every aggregate function 'agg' and combiner '-Comb' there is combined aggregate function with name 'aggComb', + /// For every aggregate function 'agg' and combiner '-Comb' there is a combined aggregate function with the name 'aggComb', /// that can have different number and/or types of arguments, different result type and different behaviour. if (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name)) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bb409e17697..6362abd03d9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -216,6 +216,7 @@ add_object_library(clickhouse_processors_formats Processors/Formats) add_object_library(clickhouse_processors_formats_impl Processors/Formats/Impl) add_object_library(clickhouse_processors_transforms Processors/Transforms) add_object_library(clickhouse_processors_sources Processors/Sources) +add_object_library(clickhouse_processors_sinks Processors/Sinks) add_object_library(clickhouse_processors_merges Processors/Merges) add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms) add_object_library(clickhouse_processors_queryplan Processors/QueryPlan) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index dffac97d8ce..e65ca26740a 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include @@ -700,14 +700,14 @@ void Connection::sendExternalTablesData(ExternalTablesData & data) if (!elem->pipe) elem->pipe = elem->creating_pipe_callback(); - QueryPipeline pipeline; + QueryPipelineBuilder pipeline; pipeline.init(std::move(*elem->pipe)); elem->pipe.reset(); pipeline.resize(1); auto sink = std::make_shared(pipeline.getHeader(), *this, *elem, std::move(on_cancel)); - pipeline.setSinks([&](const Block &, QueryPipeline::StreamType type) -> ProcessorPtr + pipeline.setSinks([&](const Block &, QueryPipelineBuilder::StreamType type) -> ProcessorPtr { - if (type != QueryPipeline::StreamType::Main) + if (type != QueryPipelineBuilder::StreamType::Main) return nullptr; return sink; }); diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 6a2abeeef76..7fe9247106f 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -145,7 +145,6 @@ protected: Poco::Logger * log = nullptr; friend class CurrentThread; - friend class PushingToViewsBlockOutputStream; /// Use ptr not to add extra dependencies in the header std::unique_ptr last_rusage; @@ -188,6 +187,11 @@ public: return query_context.lock(); } + void disableProfiling() + { + query_profiled_enabled = false; + } + /// Starts new query and create new thread group for it, current thread becomes master thread of the query void initializeQuery(); @@ -222,6 +226,7 @@ public: /// Detaches thread from the thread group and the query, dumps performance counters if they have not been dumped void detachQuery(bool exit_if_already_detached = false, bool thread_exits = false); + void logToQueryViewsLog(const ViewRuntimeData & vinfo); protected: void applyQuerySettings(); @@ -234,7 +239,6 @@ protected: void logToQueryThreadLog(QueryThreadLog & thread_log, const String & current_database, std::chrono::time_point now); - void logToQueryViewsLog(const ViewRuntimeData & vinfo); void assertState(const std::initializer_list & permitted_states, const char * description = nullptr) const; diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index 22ea5e4f60b..0820846b982 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -160,14 +161,17 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header, auto storage = temporary_table.getTable(); getContext()->addExternalTable(data->table_name, std::move(temporary_table)); auto sink = storage->write(ASTPtr(), storage->getInMemoryMetadataPtr(), getContext()); + auto exception_handling = std::make_shared(sink->getOutputPort().getHeader()); /// Write data data->pipe->resize(1); - connect(*data->pipe->getOutputPort(0), sink->getPort()); + connect(*data->pipe->getOutputPort(0), sink->getInputPort()); + connect(sink->getOutputPort(), exception_handling->getPort()); auto processors = Pipe::detachProcessors(std::move(*data->pipe)); processors.push_back(std::move(sink)); + processors.push_back(std::move(exception_handling)); auto executor = std::make_shared(processors); executor->execute(/*num_threads = */ 1); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index a728ba636ad..9dfa4d911b5 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -161,6 +161,7 @@ class IColumn; \ M(Bool, force_index_by_date, false, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \ M(Bool, force_primary_key, false, "Throw an exception if there is primary key in a table, and it is not used.", 0) \ + M(Bool, use_skip_indexes, true, "Use data skipping indexes during query execution.", 0) \ M(String, force_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be used during query execution, otherwise an exception will be thrown.", 0) \ \ M(Float, max_streams_to_max_threads_ratio, 1, "Allows you to use more sources than the number of threads - to more evenly distribute work across threads. It is assumed that this is a temporary solution, since it will be possible in the future to make the number of sources equal to the number of threads, but for each source to dynamically select available work for itself.", 0) \ diff --git a/src/DataStreams/AddingDefaultBlockOutputStream.cpp b/src/DataStreams/AddingDefaultBlockOutputStream.cpp deleted file mode 100644 index 6f7975d492d..00000000000 --- a/src/DataStreams/AddingDefaultBlockOutputStream.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include -#include -#include - - -namespace DB -{ - -AddingDefaultBlockOutputStream::AddingDefaultBlockOutputStream( - const BlockOutputStreamPtr & output_, - const Block & header_, - const ColumnsDescription & columns_, - ContextPtr context_, - bool null_as_default_) - : output(output_), header(header_) -{ - auto dag = addMissingDefaults(header_, output->getHeader().getNamesAndTypesList(), columns_, context_, null_as_default_); - adding_defaults_actions = std::make_shared(std::move(dag), ExpressionActionsSettings::fromContext(context_, CompileExpressions::yes)); -} - -void AddingDefaultBlockOutputStream::write(const Block & block) -{ - auto copy = block; - adding_defaults_actions->execute(copy); - output->write(copy); -} - -void AddingDefaultBlockOutputStream::flush() -{ - output->flush(); -} - -void AddingDefaultBlockOutputStream::writePrefix() -{ - output->writePrefix(); -} - -void AddingDefaultBlockOutputStream::writeSuffix() -{ - output->writeSuffix(); -} - -} diff --git a/src/DataStreams/AddingDefaultBlockOutputStream.h b/src/DataStreams/AddingDefaultBlockOutputStream.h deleted file mode 100644 index 45ff30a3daa..00000000000 --- a/src/DataStreams/AddingDefaultBlockOutputStream.h +++ /dev/null @@ -1,48 +0,0 @@ -#pragma once - -#include -#include -#include - - -namespace DB -{ - -class ExpressionActions; -using ExpressionActionsPtr = std::shared_ptr; - -class Context; - -/** This stream adds three types of columns into block - * 1. Columns, that are missed inside request, but present in table without defaults (missed columns) - * 2. Columns, that are missed inside request, but present in table with defaults (columns with default values) - * 3. Columns that materialized from other columns (materialized columns) - * Also the stream can substitute NULL into DEFAULT value in case of INSERT SELECT query (null_as_default) if according setting is 1. - * All three types of columns are materialized (not constants). - */ -class AddingDefaultBlockOutputStream : public IBlockOutputStream -{ -public: - AddingDefaultBlockOutputStream( - const BlockOutputStreamPtr & output_, - const Block & header_, - const ColumnsDescription & columns_, - ContextPtr context_, - bool null_as_default_ = false); - - Block getHeader() const override { return header; } - void write(const Block & block) override; - - void flush() override; - - void writePrefix() override; - void writeSuffix() override; - -private: - BlockOutputStreamPtr output; - const Block header; - ExpressionActionsPtr adding_defaults_actions; -}; - - -} diff --git a/src/DataStreams/BlockIO.cpp b/src/DataStreams/BlockIO.cpp index 3bbef4471db..5f1abdaf806 100644 --- a/src/DataStreams/BlockIO.cpp +++ b/src/DataStreams/BlockIO.cpp @@ -5,26 +5,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -BlockInputStreamPtr BlockIO::getInputStream() -{ - if (out) - throw Exception("Cannot get input stream from BlockIO because output stream is not empty", - ErrorCodes::LOGICAL_ERROR); - - if (in) - return in; - - if (pipeline.initialized()) - return std::make_shared(std::move(pipeline)); - - throw Exception("Cannot get input stream from BlockIO because query pipeline was not initialized", - ErrorCodes::LOGICAL_ERROR); -} void BlockIO::reset() { @@ -38,10 +18,6 @@ void BlockIO::reset() */ /// TODO simplify it all - out.reset(); - in.reset(); - if (process_list_entry) - process_list_entry->get().releaseQueryStreams(); pipeline.reset(); process_list_entry.reset(); @@ -57,8 +33,6 @@ BlockIO & BlockIO::operator= (BlockIO && rhs) reset(); process_list_entry = std::move(rhs.process_list_entry); - in = std::move(rhs.in); - out = std::move(rhs.out); pipeline = std::move(rhs.pipeline); finish_callback = std::move(rhs.finish_callback); diff --git a/src/DataStreams/BlockIO.h b/src/DataStreams/BlockIO.h index 31a0e1020d2..d699d525f2f 100644 --- a/src/DataStreams/BlockIO.h +++ b/src/DataStreams/BlockIO.h @@ -1,9 +1,6 @@ #pragma once -#include - #include - #include @@ -25,14 +22,11 @@ struct BlockIO std::shared_ptr process_list_entry; - BlockOutputStreamPtr out; - BlockInputStreamPtr in; - QueryPipeline pipeline; /// Callbacks for query logging could be set here. - std::function finish_callback; - std::function exception_callback; + std::function finish_callback; + std::function exception_callback; /// When it is true, don't bother sending any non-empty blocks to the out stream bool null_format = false; @@ -42,11 +36,7 @@ struct BlockIO { if (finish_callback) { - QueryPipeline * pipeline_ptr = nullptr; - if (pipeline.initialized()) - pipeline_ptr = &pipeline; - - finish_callback(in.get(), out.get(), pipeline_ptr); + finish_callback(pipeline); } } @@ -56,9 +46,6 @@ struct BlockIO exception_callback(); } - /// Returns in or converts pipeline to stream. Throws if out is not empty. - BlockInputStreamPtr getInputStream(); - private: void reset(); }; diff --git a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index fbf4a777032..26275f7d3c7 100644 --- a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -22,26 +22,24 @@ namespace ErrorCodes } -CheckConstraintsBlockOutputStream::CheckConstraintsBlockOutputStream( +CheckConstraintsTransform::CheckConstraintsTransform( const StorageID & table_id_, - const BlockOutputStreamPtr & output_, - const Block & header_, + const Block & header, const ConstraintsDescription & constraints_, ContextPtr context_) - : table_id(table_id_), - output(output_), - header(header_), - constraints(constraints_), - expressions(constraints_.getExpressions(context_, header.getNamesAndTypesList())) + : ExceptionKeepingTransform(header, header) + , table_id(table_id_) + , constraints(constraints_) + , expressions(constraints_.getExpressions(context_, header.getNamesAndTypesList())) { } -void CheckConstraintsBlockOutputStream::write(const Block & block) +void CheckConstraintsTransform::transform(Chunk & chunk) { - if (block.rows() > 0) + if (chunk.getNumRows() > 0) { - Block block_to_calculate = block; + Block block_to_calculate = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); for (size_t i = 0; i < expressions.size(); ++i) { auto constraint_expr = expressions[i]; @@ -65,8 +63,8 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) /// Check if constraint value is nullable const auto & null_map = column_nullable->getNullMapColumn(); - const PaddedPODArray & data = null_map.getData(); - bool null_map_contains_null = !memoryIsZero(data.raw_data(), data.size() * sizeof(UInt8)); + const PaddedPODArray & null_map_data = null_map.getData(); + bool null_map_contains_null = !memoryIsZero(null_map_data.raw_data(), null_map_data.size() * sizeof(UInt8)); if (null_map_contains_null) throw Exception( @@ -82,15 +80,15 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) const ColumnUInt8 & res_column_uint8 = assert_cast(*result_column); - const UInt8 * data = res_column_uint8.getData().data(); + const UInt8 * res_data = res_column_uint8.getData().data(); size_t size = res_column_uint8.size(); /// Is violated. - if (!memoryIsByte(data, size, 1)) + if (!memoryIsByte(res_data, size, 1)) { size_t row_idx = 0; for (; row_idx < size; ++row_idx) - if (data[row_idx] != 1) + if (res_data[row_idx] != 1) break; Names related_columns = constraint_expr->getRequiredColumns(); @@ -101,7 +99,7 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) column_values_msg.reserve(approx_bytes_for_col * related_columns.size()); for (const auto & name : related_columns) { - const IColumn & column = *block.getByName(name).column; + const IColumn & column = *chunk.getColumns()[getInputPort().getHeader().getPositionByName(name)]; assert(row_idx < column.size()); if (!first) @@ -124,23 +122,7 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) } } - output->write(block); - rows_written += block.rows(); -} - -void CheckConstraintsBlockOutputStream::flush() -{ - output->flush(); -} - -void CheckConstraintsBlockOutputStream::writePrefix() -{ - output->writePrefix(); -} - -void CheckConstraintsBlockOutputStream::writeSuffix() -{ - output->writeSuffix(); + rows_written += chunk.getNumRows(); } } diff --git a/src/DataStreams/CheckConstraintsBlockOutputStream.h b/src/DataStreams/CheckConstraintsBlockOutputStream.h index 0f115550eb8..13569bac0de 100644 --- a/src/DataStreams/CheckConstraintsBlockOutputStream.h +++ b/src/DataStreams/CheckConstraintsBlockOutputStream.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include @@ -12,28 +12,21 @@ namespace DB * Otherwise just pass block to output unchanged. */ -class CheckConstraintsBlockOutputStream : public IBlockOutputStream +class CheckConstraintsTransform final : public ExceptionKeepingTransform { public: - CheckConstraintsBlockOutputStream( + CheckConstraintsTransform( const StorageID & table_, - const BlockOutputStreamPtr & output_, - const Block & header_, + const Block & header, const ConstraintsDescription & constraints_, ContextPtr context_); - Block getHeader() const override { return header; } - void write(const Block & block) override; + String getName() const override { return "CheckConstraintsTransform"; } - void flush() override; - - void writePrefix() override; - void writeSuffix() override; + void transform(Chunk & chunk) override; private: StorageID table_id; - BlockOutputStreamPtr output; - Block header; const ConstraintsDescription constraints; const ConstraintsExpressions expressions; size_t rows_written = 0; diff --git a/src/DataStreams/CountingBlockOutputStream.cpp b/src/DataStreams/CountingBlockOutputStream.cpp index 6594b3b2ce1..398a9a31761 100644 --- a/src/DataStreams/CountingBlockOutputStream.cpp +++ b/src/DataStreams/CountingBlockOutputStream.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace ProfileEvents @@ -12,15 +13,23 @@ namespace ProfileEvents namespace DB { -void CountingBlockOutputStream::write(const Block & block) +void CountingTransform::transform(Chunk & chunk) { - stream->write(block); - - Progress local_progress(block.rows(), block.bytes(), 0); + Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); progress.incrementPiecewiseAtomically(local_progress); - ProfileEvents::increment(ProfileEvents::InsertedRows, local_progress.read_rows); - ProfileEvents::increment(ProfileEvents::InsertedBytes, local_progress.read_bytes); + //std::cerr << "============ counting adding progress for " << static_cast(thread_status) << ' ' << chunk.getNumRows() << " rows\n"; + + if (thread_status) + { + thread_status->performance_counters.increment(ProfileEvents::InsertedRows, local_progress.read_rows); + thread_status->performance_counters.increment(ProfileEvents::InsertedBytes, local_progress.read_bytes); + } + else + { + ProfileEvents::increment(ProfileEvents::InsertedRows, local_progress.read_rows); + ProfileEvents::increment(ProfileEvents::InsertedBytes, local_progress.read_bytes); + } if (process_elem) process_elem->updateProgressOut(local_progress); diff --git a/src/DataStreams/CountingBlockOutputStream.h b/src/DataStreams/CountingBlockOutputStream.h index a5a624ccdd2..9d0ccf11ace 100644 --- a/src/DataStreams/CountingBlockOutputStream.h +++ b/src/DataStreams/CountingBlockOutputStream.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include @@ -9,10 +9,13 @@ namespace DB /// Proxy class which counts number of written block, rows, bytes -class CountingBlockOutputStream final : public IBlockOutputStream +class CountingTransform final : public ExceptionKeepingTransform { public: - explicit CountingBlockOutputStream(const BlockOutputStreamPtr & stream_) : stream(stream_) {} + explicit CountingTransform(const Block & header, ThreadStatus * thread_status_ = nullptr) + : ExceptionKeepingTransform(header, header), thread_status(thread_status_) {} + + String getName() const override { return "CountingTransform"; } void setProgressCallback(const ProgressCallback & callback) { @@ -29,20 +32,13 @@ public: return progress; } - Block getHeader() const override { return stream->getHeader(); } - void write(const Block & block) override; - - void writePrefix() override { stream->writePrefix(); } - void writeSuffix() override { stream->writeSuffix(); } - void flush() override { stream->flush(); } - void onProgress(const Progress & current_progress) override { stream->onProgress(current_progress); } - String getContentType() const override { return stream->getContentType(); } + void transform(Chunk & chunk) override; protected: - BlockOutputStreamPtr stream; Progress progress; ProgressCallback progress_callback; QueryStatus * process_elem = nullptr; + ThreadStatus * thread_status = nullptr; }; } diff --git a/src/DataStreams/PushingToSinkBlockOutputStream.h b/src/DataStreams/PushingToSinkBlockOutputStream.h deleted file mode 100644 index eeca8506d8e..00000000000 --- a/src/DataStreams/PushingToSinkBlockOutputStream.h +++ /dev/null @@ -1,114 +0,0 @@ -#pragma once -#include -#include -#include -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -class PushingToSinkBlockOutputStream : public IBlockOutputStream -{ -public: - explicit PushingToSinkBlockOutputStream(SinkToStoragePtr sink_) - : sink(std::move(sink_)), port(sink->getPort().getHeader(), sink.get()) {} - - Block getHeader() const override { return sink->getPort().getHeader(); } - - void write(const Block & block) override - { - /// In case writePrefix was not called. - if (!port.isConnected()) - writePrefix(); - - if (!block) - return; - - size_t num_rows = block.rows(); - Chunk chunk(block.getColumns(), num_rows); - port.push(std::move(chunk)); - - while (true) - { - auto status = sink->prepare(); - switch (status) - { - case IProcessor::Status::Ready: - sink->work(); - continue; - case IProcessor::Status::NeedData: - return; - case IProcessor::Status::Async: [[fallthrough]]; - case IProcessor::Status::ExpandPipeline: [[fallthrough]]; - case IProcessor::Status::Finished: [[fallthrough]]; - case IProcessor::Status::PortFull: - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Status {} in not expected in PushingToSinkBlockOutputStream::writePrefix", - IProcessor::statusToName(status)); - } - } - } - - void writePrefix() override - { - connect(port, sink->getPort()); - - while (true) - { - auto status = sink->prepare(); - switch (status) - { - case IProcessor::Status::Ready: - sink->work(); - continue; - case IProcessor::Status::NeedData: - return; - case IProcessor::Status::Async: [[fallthrough]]; - case IProcessor::Status::ExpandPipeline: [[fallthrough]]; - case IProcessor::Status::Finished: [[fallthrough]]; - case IProcessor::Status::PortFull: - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Status {} in not expected in PushingToSinkBlockOutputStream::writePrefix", - IProcessor::statusToName(status)); - } - } - } - - void writeSuffix() override - { - port.finish(); - while (true) - { - auto status = sink->prepare(); - switch (status) - { - case IProcessor::Status::Ready: - sink->work(); - continue; - case IProcessor::Status::Finished: - - ///flush(); - return; - case IProcessor::Status::NeedData: - case IProcessor::Status::Async: - case IProcessor::Status::ExpandPipeline: - case IProcessor::Status::PortFull: - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Status {} in not expected in PushingToSinkBlockOutputStream::writeSuffix", - IProcessor::statusToName(status)); - } - } - } - -private: - SinkToStoragePtr sink; - OutputPort port; -}; - -} diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 4a7ef67f012..5a55c8f4b48 100644 --- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -1,28 +1,23 @@ -#include -#include -#include -#include #include -#include -#include #include #include #include #include #include +#include +#include +#include #include #include #include #include #include #include -#include #include #include #include -#include -#include #include +#include #include #include @@ -30,37 +25,150 @@ namespace DB { -PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( - const StoragePtr & storage_, - const StorageMetadataPtr & metadata_snapshot_, - ContextPtr context_, - const ASTPtr & query_ptr_, - bool no_destination) - : WithContext(context_) - , storage(storage_) - , metadata_snapshot(metadata_snapshot_) - , log(&Poco::Logger::get("PushingToViewsBlockOutputStream")) - , query_ptr(query_ptr_) +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +struct ViewsData +{ + /// Separate information for every view. + std::list views; + /// Some common info about source storage. + ContextPtr context; + StorageID source_storage_id; + StorageMetadataPtr source_metadata_snapshot; + StoragePtr source_storage; + /// This value is actually only for logs. + size_t max_threads = 1; + + /// In case of exception happened while inserting into main table, it is pushed to pipeline. + /// Remember the first one, we should keep them after view processing. + std::atomic_bool has_exception = false; + std::exception_ptr first_exception; + + ViewsData(ContextPtr context_, StorageID source_storage_id_, StorageMetadataPtr source_metadata_snapshot_ , StoragePtr source_storage_) + : context(std::move(context_)) + , source_storage_id(std::move(source_storage_id_)) + , source_metadata_snapshot(std::move(source_metadata_snapshot_)) + , source_storage(std::move(source_storage_)) + { + } +}; + +using ViewsDataPtr = std::shared_ptr; + +/// Copies data inserted into table for every dependent table. +class CopyingDataToViewsTransform final : public IProcessor +{ +public: + CopyingDataToViewsTransform(const Block & header, ViewsDataPtr data); + + String getName() const override { return "CopyingDataToViewsTransform"; } + Status prepare() override; + InputPort & getInputPort() { return input; } + +private: + InputPort & input; + ViewsDataPtr views_data; +}; + +/// For source chunk, execute view query over it. +class ExecutingInnerQueryFromViewTransform final : public ExceptionKeepingTransform +{ +public: + ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_); + + String getName() const override { return "ExecutingInnerQueryFromView"; } + +protected: + void transform(Chunk & chunk) override; + +private: + ViewsDataPtr views_data; + ViewRuntimeData & view; +}; + +/// Insert into LiveView. +class PushingToLiveViewSink final : public SinkToStorage +{ +public: + PushingToLiveViewSink(const Block & header, StorageLiveView & live_view_, StoragePtr storage_holder_, ContextPtr context_); + String getName() const override { return "PushingToLiveViewSink"; } + void consume(Chunk chunk) override; + +private: + StorageLiveView & live_view; + StoragePtr storage_holder; + ContextPtr context; +}; + +/// For every view, collect exception. +/// Has single output with empty header. +/// If any exception happen before view processing, pass it. +/// Othervise return any exception from any view. +class FinalizingViewsTransform final : public IProcessor +{ + struct ExceptionStatus + { + std::exception_ptr exception; + bool is_first = false; + }; + + static InputPorts initPorts(std::vector headers); + +public: + FinalizingViewsTransform(std::vector headers, ViewsDataPtr data); + + String getName() const override { return "FinalizingViewsTransform"; } + Status prepare() override; + void work() override; + +private: + OutputPort & output; + ViewsDataPtr views_data; + std::vector statuses; + std::exception_ptr any_exception; +}; + + +Chain buildPushingToViewsChain( + const StoragePtr & storage, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr context, + const ASTPtr & query_ptr, + bool no_destination, + ThreadStatus * thread_status, + std::atomic_uint64_t * elapsed_counter_ms, + const Block & live_view_header) { checkStackSize(); + Chain result_chain; + + /// If we don't write directly to the destination + /// then expect that we're inserting with precalculated virtual columns + auto storage_header = no_destination ? metadata_snapshot->getSampleBlockWithVirtuals(storage->getVirtuals()) + : metadata_snapshot->getSampleBlock(); /** TODO This is a very important line. At any insertion into the table one of streams should own lock. * Although now any insertion into the table is done via PushingToViewsBlockOutputStream, * but it's clear that here is not the best place for this functionality. */ - addTableLock( - storage->lockForShare(getContext()->getInitialQueryId(), getContext()->getSettingsRef().lock_acquire_timeout)); + result_chain.addTableLock(storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout)); /// If the "root" table deduplicates blocks, there are no need to make deduplication for children /// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks bool disable_deduplication_for_children = false; - if (!getContext()->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) + if (!context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) disable_deduplication_for_children = !no_destination && storage->supportsDeduplication(); auto table_id = storage->getStorageID(); Dependencies dependencies = DatabaseCatalog::instance().getDependencies(table_id); /// We need special context for materialized views insertions + ContextMutablePtr select_context; + ContextMutablePtr insert_context; + ViewsDataPtr views_data; if (!dependencies.empty()) { select_context = Context::createCopy(context); @@ -70,79 +178,34 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( // Do not deduplicate insertions into MV if the main insertion is Ok if (disable_deduplication_for_children) - insert_context->setSetting("insert_deduplicate", false); + insert_context->setSetting("insert_deduplicate", Field{false}); // Separate min_insert_block_size_rows/min_insert_block_size_bytes for children if (insert_settings.min_insert_block_size_rows_for_materialized_views) insert_context->setSetting("min_insert_block_size_rows", insert_settings.min_insert_block_size_rows_for_materialized_views.value); if (insert_settings.min_insert_block_size_bytes_for_materialized_views) insert_context->setSetting("min_insert_block_size_bytes", insert_settings.min_insert_block_size_bytes_for_materialized_views.value); + + views_data = std::make_shared(select_context, table_id, metadata_snapshot, storage); } + std::vector chains; + for (const auto & database_table : dependencies) { - auto dependent_table = DatabaseCatalog::instance().getTable(database_table, getContext()); + auto dependent_table = DatabaseCatalog::instance().getTable(database_table, context); auto dependent_metadata_snapshot = dependent_table->getInMemoryMetadataPtr(); ASTPtr query; - BlockOutputStreamPtr out; - QueryViewsLogElement::ViewType type = QueryViewsLogElement::ViewType::DEFAULT; - String target_name = database_table.getFullTableName(); - - if (auto * materialized_view = dynamic_cast(dependent_table.get())) - { - type = QueryViewsLogElement::ViewType::MATERIALIZED; - addTableLock( - materialized_view->lockForShare(getContext()->getInitialQueryId(), getContext()->getSettingsRef().lock_acquire_timeout)); - - StoragePtr inner_table = materialized_view->getTargetTable(); - auto inner_table_id = inner_table->getStorageID(); - auto inner_metadata_snapshot = inner_table->getInMemoryMetadataPtr(); - query = dependent_metadata_snapshot->getSelectQuery().inner_query; - target_name = inner_table_id.getFullTableName(); - - std::unique_ptr insert = std::make_unique(); - insert->table_id = inner_table_id; - - /// Get list of columns we get from select query. - auto header = InterpreterSelectQuery(query, select_context, SelectQueryOptions().analyze()) - .getSampleBlock(); - - /// Insert only columns returned by select. - auto list = std::make_shared(); - const auto & inner_table_columns = inner_metadata_snapshot->getColumns(); - for (const auto & column : header) - { - /// But skip columns which storage doesn't have. - if (inner_table_columns.hasPhysical(column.name)) - list->children.emplace_back(std::make_shared(column.name)); - } - - insert->columns = std::move(list); - - ASTPtr insert_query_ptr(insert.release()); - InterpreterInsertQuery interpreter(insert_query_ptr, insert_context); - BlockIO io = interpreter.execute(); - out = io.out; - } - else if (const auto * live_view = dynamic_cast(dependent_table.get())) - { - type = QueryViewsLogElement::ViewType::LIVE; - query = live_view->getInnerQuery(); // Used only to log in system.query_views_log - out = std::make_shared( - dependent_table, dependent_metadata_snapshot, insert_context, ASTPtr(), true); - } - else - out = std::make_shared( - dependent_table, dependent_metadata_snapshot, insert_context, ASTPtr()); + Chain out; /// If the materialized view is executed outside of a query, for example as a result of SYSTEM FLUSH LOGS or /// SYSTEM FLUSH DISTRIBUTED ..., we can't attach to any thread group and we won't log, so there is no point on collecting metrics - std::unique_ptr thread_status = nullptr; + std::unique_ptr view_thread_status_ptr = nullptr; ThreadGroupStatusPtr running_group = current_thread && current_thread->getThreadGroup() ? current_thread->getThreadGroup() - : MainThreadStatus::getInstance().thread_group; + : MainThreadStatus::getInstance().getThreadGroup(); if (running_group) { /// We are creating a ThreadStatus per view to store its metrics individually @@ -152,252 +215,165 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( auto * original_thread = current_thread; SCOPE_EXIT({ current_thread = original_thread; }); - thread_status = std::make_unique(); + view_thread_status_ptr = std::make_unique(); /// Disable query profiler for this ThreadStatus since the running (main query) thread should already have one /// If we didn't disable it, then we could end up with N + 1 (N = number of dependencies) profilers which means /// N times more interruptions - thread_status->query_profiled_enabled = false; - thread_status->setupState(running_group); + view_thread_status_ptr->disableProfiling(); + view_thread_status_ptr->attachQuery(running_group); } - QueryViewsLogElement::ViewRuntimeStats runtime_stats{ - target_name, - type, - std::move(thread_status), - 0, - std::chrono::system_clock::now(), - QueryViewsLogElement::ViewStatus::EXCEPTION_BEFORE_START}; - views.emplace_back(ViewRuntimeData{std::move(query), database_table, std::move(out), nullptr, std::move(runtime_stats)}); + auto runtime_stats = std::make_unique(); + runtime_stats->target_name = database_table.getFullTableName(); + runtime_stats->thread_status = std::move(view_thread_status_ptr); + runtime_stats->event_time = std::chrono::system_clock::now(); + runtime_stats->event_status = QueryViewsLogElement::ViewStatus::EXCEPTION_BEFORE_START; + auto & type = runtime_stats->type; + auto & target_name = runtime_stats->target_name; + auto * view_thread_status = runtime_stats->thread_status.get(); + auto * view_counter_ms = &runtime_stats->elapsed_ms; + + if (auto * materialized_view = dynamic_cast(dependent_table.get())) + { + type = QueryViewsLogElement::ViewType::MATERIALIZED; + result_chain.addTableLock(materialized_view->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout)); + + StoragePtr inner_table = materialized_view->getTargetTable(); + auto inner_table_id = inner_table->getStorageID(); + auto inner_metadata_snapshot = inner_table->getInMemoryMetadataPtr(); + query = dependent_metadata_snapshot->getSelectQuery().inner_query; + target_name = inner_table_id.getFullTableName(); + + /// Get list of columns we get from select query. + auto header = InterpreterSelectQuery(query, select_context, SelectQueryOptions().analyze()) + .getSampleBlock(); + + /// Insert only columns returned by select. + Names insert_columns; + const auto & inner_table_columns = inner_metadata_snapshot->getColumns(); + for (const auto & column : header) + { + /// But skip columns which storage doesn't have. + if (inner_table_columns.hasPhysical(column.name)) + insert_columns.emplace_back(column.name); + } + + InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false); + out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, view_thread_status, view_counter_ms); + out.addStorageHolder(dependent_table); + out.addStorageHolder(inner_table); + } + else if (auto * live_view = dynamic_cast(dependent_table.get())) + { + runtime_stats->type = QueryViewsLogElement::ViewType::LIVE; + query = live_view->getInnerQuery(); // Used only to log in system.query_views_log + out = buildPushingToViewsChain( + dependent_table, dependent_metadata_snapshot, insert_context, ASTPtr(), true, view_thread_status, view_counter_ms, storage_header); + } + else + out = buildPushingToViewsChain( + dependent_table, dependent_metadata_snapshot, insert_context, ASTPtr(), false, view_thread_status, view_counter_ms); + + views_data->views.emplace_back(ViewRuntimeData{ //-V614 + std::move(query), + out.getInputHeader(), + database_table, + nullptr, + std::move(runtime_stats)}); + + if (type == QueryViewsLogElement::ViewType::MATERIALIZED) + { + auto executing_inner_query = std::make_shared( + storage_header, views_data->views.back(), views_data); + executing_inner_query->setRuntimeData(view_thread_status, elapsed_counter_ms); + + out.addSource(std::move(executing_inner_query)); + } + + chains.emplace_back(std::move(out)); /// Add the view to the query access info so it can appear in system.query_log if (!no_destination) { - getContext()->getQueryContext()->addQueryAccessInfo( - backQuoteIfNeed(database_table.getDatabaseName()), target_name, {}, "", database_table.getFullTableName()); + context->getQueryContext()->addQueryAccessInfo( + backQuoteIfNeed(database_table.getDatabaseName()), views_data->views.back().runtime_stats->target_name, {}, "", database_table.getFullTableName()); } } - /// Do not push to destination table if the flag is set - if (!no_destination) + if (views_data) { - auto sink = storage->write(query_ptr, storage->getInMemoryMetadataPtr(), getContext()); + size_t num_views = views_data->views.size(); + const Settings & settings = context->getSettingsRef(); + if (settings.parallel_view_processing) + views_data->max_threads = settings.max_threads ? std::min(static_cast(settings.max_threads), num_views) : num_views; - metadata_snapshot->check(sink->getPort().getHeader().getColumnsWithTypeAndName()); + std::vector headers; + headers.reserve(num_views); + for (const auto & chain : chains) + headers.push_back(chain.getOutputHeader()); - replicated_output = dynamic_cast(sink.get()); - output = std::make_shared(std::move(sink)); + auto copying_data = std::make_shared(storage_header, views_data); + auto finalizing_views = std::make_shared(std::move(headers), views_data); + auto out = copying_data->getOutputs().begin(); + auto in = finalizing_views->getInputs().begin(); + + size_t max_parallel_streams = 0; + + std::list processors; + + for (auto & chain : chains) + { + max_parallel_streams += std::max(chain.getNumThreads(), 1); + result_chain.attachResources(chain.detachResources()); + connect(*out, chain.getInputPort()); + connect(chain.getOutputPort(), *in); + ++in; + ++out; + processors.splice(processors.end(), Chain::getProcessors(std::move(chain))); + } + + processors.emplace_front(std::move(copying_data)); + processors.emplace_back(std::move(finalizing_views)); + result_chain = Chain(std::move(processors)); + result_chain.setNumThreads(max_parallel_streams); } -} - -Block PushingToViewsBlockOutputStream::getHeader() const -{ - /// If we don't write directly to the destination - /// then expect that we're inserting with precalculated virtual columns - if (output) - return metadata_snapshot->getSampleBlock(); - else - return metadata_snapshot->getSampleBlockWithVirtuals(storage->getVirtuals()); -} - -/// Auxiliary function to do the setup and teardown to run a view individually and collect its metrics inside the view ThreadStatus -void inline runViewStage(ViewRuntimeData & view, const std::string & action, std::function stage) -{ - Stopwatch watch; - - auto * original_thread = current_thread; - SCOPE_EXIT({ current_thread = original_thread; }); - - if (view.runtime_stats.thread_status) - { - /// Change thread context to store individual metrics per view. Once the work in done, go back to the original thread - view.runtime_stats.thread_status->resetPerformanceCountersLastUsage(); - current_thread = view.runtime_stats.thread_status.get(); - } - - try - { - stage(); - } - catch (Exception & ex) - { - ex.addMessage(action + " " + view.table_id.getNameForLogs()); - view.setException(std::current_exception()); - } - catch (...) - { - view.setException(std::current_exception()); - } - - if (view.runtime_stats.thread_status) - view.runtime_stats.thread_status->updatePerformanceCounters(); - view.runtime_stats.elapsed_ms += watch.elapsedMilliseconds(); -} - -void PushingToViewsBlockOutputStream::write(const Block & block) -{ - /** Throw an exception if the sizes of arrays - elements of nested data structures doesn't match. - * We have to make this assertion before writing to table, because storage engine may assume that they have equal sizes. - * NOTE It'd better to do this check in serialization of nested structures (in place when this assumption is required), - * but currently we don't have methods for serialization of nested structures "as a whole". - */ - Nested::validateArraySizes(block); if (auto * live_view = dynamic_cast(storage.get())) { - StorageLiveView::writeIntoLiveView(*live_view, block, getContext()); + auto sink = std::make_shared(live_view_header, *live_view, storage, context); + sink->setRuntimeData(thread_status, elapsed_counter_ms); + result_chain.addSource(std::move(sink)); } - else + /// Do not push to destination table if the flag is set + else if (!no_destination) { - if (output) - /// TODO: to support virtual and alias columns inside MVs, we should return here the inserted block extended - /// with additional columns directly from storage and pass it to MVs instead of raw block. - output->write(block); + auto sink = storage->write(query_ptr, storage->getInMemoryMetadataPtr(), context); + metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); + sink->setRuntimeData(thread_status, elapsed_counter_ms); + result_chain.addSource(std::move(sink)); } - if (views.empty()) - return; + /// TODO: add pushing to live view + if (result_chain.empty()) + result_chain.addSink(std::make_shared(storage_header)); - /// Don't process materialized views if this block is duplicate - const Settings & settings = getContext()->getSettingsRef(); - if (!settings.deduplicate_blocks_in_dependent_materialized_views && replicated_output && replicated_output->lastBlockIsDuplicate()) - return; - - size_t max_threads = 1; - if (settings.parallel_view_processing) - max_threads = settings.max_threads ? std::min(static_cast(settings.max_threads), views.size()) : views.size(); - if (max_threads > 1) - { - ThreadPool pool(max_threads); - for (auto & view : views) - { - pool.scheduleOrThrowOnError([&] { - setThreadName("PushingToViews"); - runViewStage(view, "while pushing to view", [&]() { process(block, view); }); - }); - } - pool.wait(); - } - else - { - for (auto & view : views) - { - runViewStage(view, "while pushing to view", [&]() { process(block, view); }); - } - } + return result_chain; } -void PushingToViewsBlockOutputStream::writePrefix() +static void process(Block & block, ViewRuntimeData & view, const ViewsData & views_data) { - if (output) - output->writePrefix(); + const auto & context = views_data.context; - for (auto & view : views) - { - runViewStage(view, "while writing prefix to view", [&] { view.out->writePrefix(); }); - if (view.exception) - { - logQueryViews(); - std::rethrow_exception(view.exception); - } - } -} - -void PushingToViewsBlockOutputStream::writeSuffix() -{ - if (output) - output->writeSuffix(); - - if (views.empty()) - return; - - auto process_suffix = [](ViewRuntimeData & view) - { - view.out->writeSuffix(); - view.runtime_stats.setStatus(QueryViewsLogElement::ViewStatus::QUERY_FINISH); - }; - static std::string stage_step = "while writing suffix to view"; - - /// Run writeSuffix() for views in separate thread pool. - /// In could have been done in PushingToViewsBlockOutputStream::process, however - /// it is not good if insert into main table fail but into view succeed. - const Settings & settings = getContext()->getSettingsRef(); - size_t max_threads = 1; - if (settings.parallel_view_processing) - max_threads = settings.max_threads ? std::min(static_cast(settings.max_threads), views.size()) : views.size(); - bool exception_happened = false; - if (max_threads > 1) - { - ThreadPool pool(max_threads); - std::atomic_uint8_t exception_count = 0; - for (auto & view : views) - { - if (view.exception) - { - exception_happened = true; - continue; - } - pool.scheduleOrThrowOnError([&] { - setThreadName("PushingToViews"); - - runViewStage(view, stage_step, [&] { process_suffix(view); }); - if (view.exception) - exception_count.fetch_add(1, std::memory_order_relaxed); - }); - } - pool.wait(); - exception_happened |= exception_count.load(std::memory_order_relaxed) != 0; - } - else - { - for (auto & view : views) - { - if (view.exception) - { - exception_happened = true; - continue; - } - runViewStage(view, stage_step, [&] { process_suffix(view); }); - if (view.exception) - exception_happened = true; - } - } - - for (auto & view : views) - { - if (!view.exception) - LOG_TRACE( - log, - "Pushing ({}) from {} to {} took {} ms.", - max_threads <= 1 ? "sequentially" : ("parallel " + std::to_string(max_threads)), - storage->getStorageID().getNameForLogs(), - view.table_id.getNameForLogs(), - view.runtime_stats.elapsed_ms); - } - - if (exception_happened) - checkExceptionsInViews(); - - if (views.size() > 1) - { - UInt64 milliseconds = main_watch.elapsedMilliseconds(); - LOG_DEBUG(log, "Pushing from {} to {} views took {} ms.", storage->getStorageID().getNameForLogs(), views.size(), milliseconds); - } - logQueryViews(); -} - -void PushingToViewsBlockOutputStream::flush() -{ - if (output) - output->flush(); - - for (auto & view : views) - view.out->flush(); -} - -void PushingToViewsBlockOutputStream::process(const Block & block, ViewRuntimeData & view) -{ - BlockInputStreamPtr in; + /// We create a table with the same name as original table and the same alias columns, + /// but it will contain single block (that is INSERT-ed into main table). + /// InterpreterSelectQuery will do processing of alias columns. + auto local_context = Context::createCopy(context); + local_context->addViewSource(StorageValues::create( + views_data.source_storage_id, + views_data.source_metadata_snapshot->getColumns(), + block, + views_data.source_storage->getVirtuals())); /// We need keep InterpreterSelectQuery, until the processing will be finished, since: /// @@ -410,62 +386,50 @@ void PushingToViewsBlockOutputStream::process(const Block & block, ViewRuntimeDa /// (the problem raises only when function uses context from the /// execute*() method, like FunctionDictGet do) /// - These objects live inside query pipeline (DataStreams) and the reference become dangling. - std::optional select; + InterpreterSelectQuery select(view.query, local_context, SelectQueryOptions()); - if (view.runtime_stats.type == QueryViewsLogElement::ViewType::MATERIALIZED) - { - /// We create a table with the same name as original table and the same alias columns, - /// but it will contain single block (that is INSERT-ed into main table). - /// InterpreterSelectQuery will do processing of alias columns. + auto pipeline = select.buildQueryPipeline(); + pipeline.resize(1); - auto local_context = Context::createCopy(select_context); - local_context->addViewSource( - StorageValues::create(storage->getStorageID(), metadata_snapshot->getColumns(), block, storage->getVirtuals())); - select.emplace(view.query, local_context, SelectQueryOptions()); - in = std::make_shared(select->execute().getInputStream()); + /// Squashing is needed here because the materialized view query can generate a lot of blocks + /// even when only one block is inserted into the parent table (e.g. if the query is a GROUP BY + /// and two-level aggregation is triggered). + pipeline.addTransform(std::make_shared( + pipeline.getHeader(), + context->getSettingsRef().min_insert_block_size_rows, + context->getSettingsRef().min_insert_block_size_bytes)); - /// Squashing is needed here because the materialized view query can generate a lot of blocks - /// even when only one block is inserted into the parent table (e.g. if the query is a GROUP BY - /// and two-level aggregation is triggered). - in = std::make_shared( - in, getContext()->getSettingsRef().min_insert_block_size_rows, getContext()->getSettingsRef().min_insert_block_size_bytes); - in = std::make_shared(in, view.out->getHeader(), ConvertingBlockInputStream::MatchColumnsMode::Name); - } - else - in = std::make_shared(block); + auto converting = ActionsDAG::makeConvertingActions( + pipeline.getHeader().getColumnsWithTypeAndName(), + view.sample_block.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Name); - in->setProgressCallback([this](const Progress & progress) + pipeline.addTransform(std::make_shared( + pipeline.getHeader(), + std::make_shared(std::move(converting)))); + + pipeline.setProgressCallback([context](const Progress & progress) { CurrentThread::updateProgressIn(progress); - this->onProgress(progress); + if (auto callback = context->getProgressCallback()) + callback(progress); }); - in->readPrefix(); - - while (Block result_block = in->read()) + auto query_pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); + PullingPipelineExecutor executor(query_pipeline); + if (!executor.pull(block)) { - Nested::validateArraySizes(result_block); - view.out->write(result_block); + block.clear(); + return; } - in->readSuffix(); + if (executor.pull(block)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Single chunk is expected from view inner query {}", view.query); } -void PushingToViewsBlockOutputStream::checkExceptionsInViews() +static void logQueryViews(std::list & views, ContextPtr context) { - for (auto & view : views) - { - if (view.exception) - { - logQueryViews(); - std::rethrow_exception(view.exception); - } - } -} - -void PushingToViewsBlockOutputStream::logQueryViews() -{ - const auto & settings = getContext()->getSettingsRef(); + const auto & settings = context->getSettingsRef(); const UInt64 min_query_duration = settings.log_queries_min_query_duration_ms.totalMilliseconds(); const QueryViewsLogElement::ViewStatus min_status = settings.log_queries_min_type; if (views.empty() || !settings.log_queries || !settings.log_query_views) @@ -473,13 +437,14 @@ void PushingToViewsBlockOutputStream::logQueryViews() for (auto & view : views) { - if ((min_query_duration && view.runtime_stats.elapsed_ms <= min_query_duration) || (view.runtime_stats.event_status < min_status)) + const auto & stats = *view.runtime_stats; + if ((min_query_duration && stats.elapsed_ms <= min_query_duration) || (stats.event_status < min_status)) continue; try { - if (view.runtime_stats.thread_status) - view.runtime_stats.thread_status->logToQueryViewsLog(view); + if (stats.thread_status) + stats.thread_status->logToQueryViewsLog(view); } catch (...) { @@ -489,9 +454,226 @@ void PushingToViewsBlockOutputStream::logQueryViews() } -void PushingToViewsBlockOutputStream::onProgress(const Progress & progress) +CopyingDataToViewsTransform::CopyingDataToViewsTransform(const Block & header, ViewsDataPtr data) + : IProcessor({header}, OutputPorts(data->views.size(), header)) + , input(inputs.front()) + , views_data(std::move(data)) { - if (getContext()->getProgressCallback()) - getContext()->getProgressCallback()(progress); + if (views_data->views.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "CopyingDataToViewsTransform cannot have zero outputs"); } + +IProcessor::Status CopyingDataToViewsTransform::prepare() +{ + bool all_can_push = true; + for (auto & output : outputs) + { + if (output.isFinished()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot push data to view because output port is finished"); + + if (!output.canPush()) + all_can_push = false; + } + + if (!all_can_push) + return Status::PortFull; + + if (input.isFinished()) + { + for (auto & output : outputs) + output.finish(); + + return Status::Finished; + } + + input.setNeeded(); + if (!input.hasData()) + return Status::NeedData; + + auto data = input.pullData(); + if (data.exception) + { + if (!views_data->has_exception) + { + views_data->first_exception = data.exception; + views_data->has_exception = true; + } + + for (auto & output : outputs) + output.pushException(data.exception); + } + else + { + for (auto & output : outputs) + output.push(data.chunk.clone()); + } + + return Status::PortFull; +} + + +ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform( + const Block & header, + ViewRuntimeData & view_, + std::shared_ptr views_data_) + : ExceptionKeepingTransform(header, view_.sample_block) + , views_data(std::move(views_data_)) + , view(view_) +{ +} + +void ExecutingInnerQueryFromViewTransform::transform(Chunk & chunk) +{ + auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); + process(block, view, *views_data); + chunk.setColumns(block.getColumns(), block.rows()); +} + + +PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveView & live_view_, StoragePtr storage_holder_, ContextPtr context_) + : SinkToStorage(header) + , live_view(live_view_) + , storage_holder(std::move(storage_holder_)) + , context(std::move(context_)) +{ +} + +void PushingToLiveViewSink::consume(Chunk chunk) +{ + Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); + StorageLiveView::writeIntoLiveView(live_view, getHeader().cloneWithColumns(chunk.detachColumns()), context); + CurrentThread::updateProgressIn(local_progress); +} + + +FinalizingViewsTransform::FinalizingViewsTransform(std::vector headers, ViewsDataPtr data) + : IProcessor(initPorts(std::move(headers)), {Block()}) + , output(outputs.front()) + , views_data(std::move(data)) +{ + statuses.resize(views_data->views.size()); +} + +InputPorts FinalizingViewsTransform::initPorts(std::vector headers) +{ + InputPorts res; + for (auto & header : headers) + res.emplace_back(std::move(header)); + + return res; +} + +IProcessor::Status FinalizingViewsTransform::prepare() +{ + if (output.isFinished()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot finalize views because output port is finished"); + + if (!output.canPush()) + return Status::PortFull; + + size_t num_finished = 0; + size_t pos = 0; + for (auto & input : inputs) + { + auto i = pos; + ++pos; + + if (input.isFinished()) + { + ++num_finished; + continue; + } + + input.setNeeded(); + if (input.hasData()) + { + auto data = input.pullData(); + //std::cerr << "********** FinalizingViewsTransform got input " << i << " has exc " << bool(data.exception) << std::endl; + if (data.exception) + { + if (views_data->has_exception && views_data->first_exception == data.exception) + statuses[i].is_first = true; + else + statuses[i].exception = data.exception; + + if (i == 0 && statuses[0].is_first) + { + output.pushData(std::move(data)); + return Status::PortFull; + } + } + + if (input.isFinished()) + ++num_finished; + } + } + + if (num_finished == inputs.size()) + { + if (!statuses.empty()) + return Status::Ready; + + if (any_exception) + output.pushException(std::move(any_exception)); + + output.finish(); + return Status::Finished; + } + + return Status::NeedData; +} + +static std::exception_ptr addStorageToException(std::exception_ptr ptr, const StorageID & storage) +{ + try + { + std::rethrow_exception(std::move(ptr)); + } + catch (DB::Exception & exception) + { + exception.addMessage("while pushing to view {}", storage.getNameForLogs()); + return std::current_exception(); + } + catch (...) + { + return std::current_exception(); + } + + __builtin_unreachable(); +} + +void FinalizingViewsTransform::work() +{ + size_t i = 0; + for (auto & view : views_data->views) + { + auto & status = statuses[i]; + ++i; + + if (status.exception) + { + if (!any_exception) + any_exception = status.exception; + + view.setException(addStorageToException(std::move(status.exception), view.table_id)); + } + else + { + view.runtime_stats->setStatus(QueryViewsLogElement::ViewStatus::QUERY_FINISH); + + LOG_TRACE( + &Poco::Logger::get("PushingToViews"), + "Pushing ({}) from {} to {} took {} ms.", + views_data->max_threads <= 1 ? "sequentially" : ("parallel " + std::to_string(views_data->max_threads)), + views_data->source_storage_id.getNameForLogs(), + view.table_id.getNameForLogs(), + view.runtime_stats->elapsed_ms); + } + } + + logQueryViews(views_data->views, views_data->context); + + statuses.clear(); +} + } diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.h b/src/DataStreams/PushingToViewsBlockOutputStream.h index ba125e28829..75d0528ff7b 100644 --- a/src/DataStreams/PushingToViewsBlockOutputStream.h +++ b/src/DataStreams/PushingToViewsBlockOutputStream.h @@ -3,7 +3,10 @@ #include #include #include +#include +#include #include +#include #include namespace Poco @@ -14,61 +17,44 @@ class Logger; namespace DB { -class ReplicatedMergeTreeSink; - struct ViewRuntimeData { + /// A query we should run over inserted block befire pushing into inner storage. const ASTPtr query; + /// This structure is expected by inner storage. Will convert query result to it. + Block sample_block; + /// Inner storage id. StorageID table_id; - BlockOutputStreamPtr out; + + /// In case of exception at any step (e.g. query execution or insertion into inner table) + /// exception is stored here (will be stored in query views log). std::exception_ptr exception; - QueryViewsLogElement::ViewRuntimeStats runtime_stats; + /// Info which is needed for query views log. + std::unique_ptr runtime_stats; void setException(std::exception_ptr e) { exception = e; - runtime_stats.setStatus(QueryViewsLogElement::ViewStatus::EXCEPTION_WHILE_PROCESSING); + runtime_stats->setStatus(QueryViewsLogElement::ViewStatus::EXCEPTION_WHILE_PROCESSING); } }; /** Writes data to the specified table and to all dependent materialized views. */ -class PushingToViewsBlockOutputStream : public IBlockOutputStream, WithContext -{ -public: - PushingToViewsBlockOutputStream( - const StoragePtr & storage_, - const StorageMetadataPtr & metadata_snapshot_, - ContextPtr context_, - const ASTPtr & query_ptr_, - bool no_destination = false); - - Block getHeader() const override; - void write(const Block & block) override; - - void flush() override; - void writePrefix() override; - void writeSuffix() override; - void onProgress(const Progress & progress) override; - -private: - StoragePtr storage; - StorageMetadataPtr metadata_snapshot; - BlockOutputStreamPtr output; - ReplicatedMergeTreeSink * replicated_output = nullptr; - Poco::Logger * log; - - ASTPtr query_ptr; - Stopwatch main_watch; - - std::vector views; - ContextMutablePtr select_context; - ContextMutablePtr insert_context; - - void process(const Block & block, ViewRuntimeData & view); - void checkExceptionsInViews(); - void logQueryViews(); -}; - +Chain buildPushingToViewsChain( + const StoragePtr & storage, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr context, + const ASTPtr & query_ptr, + /// It is true when we should not insert into table, but only to views. + /// Used e.g. for kafka. We should try to remove it somehow. + bool no_destination, + /// We could specify separate thread_status for each view. + /// Needed mainly to collect counters separately. Should be improved. + ThreadStatus * thread_status, + /// Counter to measure time spent separately per view. Should be improved. + std::atomic_uint64_t * elapsed_counter_ms, + /// LiveView executes query itself, it needs source block structure. + const Block & live_view_header = {}); } diff --git a/src/DataStreams/RemoteBlockOutputStream.cpp b/src/DataStreams/RemoteBlockOutputStream.cpp index 976c4671652..106add68c5c 100644 --- a/src/DataStreams/RemoteBlockOutputStream.cpp +++ b/src/DataStreams/RemoteBlockOutputStream.cpp @@ -18,11 +18,12 @@ namespace ErrorCodes } -RemoteBlockOutputStream::RemoteBlockOutputStream(Connection & connection_, - const ConnectionTimeouts & timeouts, - const String & query_, - const Settings & settings_, - const ClientInfo & client_info_) +RemoteInserter::RemoteInserter( + Connection & connection_, + const ConnectionTimeouts & timeouts, + const String & query_, + const Settings & settings_, + const ClientInfo & client_info_) : connection(connection_), query(query_) { ClientInfo modified_client_info = client_info_; @@ -70,11 +71,8 @@ RemoteBlockOutputStream::RemoteBlockOutputStream(Connection & connection_, } -void RemoteBlockOutputStream::write(const Block & block) +void RemoteInserter::write(Block block) { - if (header) - assertBlocksHaveEqualStructure(block, header, "RemoteBlockOutputStream"); - try { connection.sendData(block); @@ -94,14 +92,14 @@ void RemoteBlockOutputStream::write(const Block & block) } -void RemoteBlockOutputStream::writePrepared(ReadBuffer & input, size_t size) +void RemoteInserter::writePrepared(ReadBuffer & buf, size_t size) { /// We cannot use 'header'. Input must contain block with proper structure. - connection.sendPreparedData(input, size); + connection.sendPreparedData(buf, size); } -void RemoteBlockOutputStream::writeSuffix() +void RemoteInserter::onFinish() { /// Empty block means end of data. connection.sendData(Block()); @@ -127,7 +125,7 @@ void RemoteBlockOutputStream::writeSuffix() finished = true; } -RemoteBlockOutputStream::~RemoteBlockOutputStream() +RemoteInserter::~RemoteInserter() { /// If interrupted in the middle of the loop of communication with the server, then interrupt the connection, /// to not leave the connection in unsynchronized state. diff --git a/src/DataStreams/RemoteBlockOutputStream.h b/src/DataStreams/RemoteBlockOutputStream.h index 2c89a7358ad..f1f49015c9d 100644 --- a/src/DataStreams/RemoteBlockOutputStream.h +++ b/src/DataStreams/RemoteBlockOutputStream.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include @@ -17,24 +17,25 @@ struct Settings; /** Allow to execute INSERT query on remote server and send data for it. */ -class RemoteBlockOutputStream : public IBlockOutputStream +class RemoteInserter { public: - RemoteBlockOutputStream(Connection & connection_, - const ConnectionTimeouts & timeouts, - const String & query_, - const Settings & settings_, - const ClientInfo & client_info_); + RemoteInserter( + Connection & connection_, + const ConnectionTimeouts & timeouts, + const String & query_, + const Settings & settings_, + const ClientInfo & client_info_); - Block getHeader() const override { return header; } - - void write(const Block & block) override; - void writeSuffix() override; + void write(Block block); + void onFinish(); /// Send pre-serialized and possibly pre-compressed block of data, that will be read from 'input'. - void writePrepared(ReadBuffer & input, size_t size = 0); + void writePrepared(ReadBuffer & buf, size_t size = 0); - ~RemoteBlockOutputStream() override; + ~RemoteInserter(); + + const Block & getHeader() const { return header; } private: Connection & connection; @@ -43,4 +44,23 @@ private: bool finished = false; }; +class RemoteSink final : public RemoteInserter, public SinkToStorage +{ +public: + explicit RemoteSink( + Connection & connection_, + const ConnectionTimeouts & timeouts, + const String & query_, + const Settings & settings_, + const ClientInfo & client_info_) + : RemoteInserter(connection_, timeouts, query_, settings_, client_info_) + , SinkToStorage(RemoteInserter::getHeader()) + { + } + + String getName() const override { return "RemoteSink"; } + void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); } + void onFinish() override { RemoteInserter::onFinish(); } +}; + } diff --git a/src/DataStreams/ShellCommandSource.h b/src/DataStreams/ShellCommandSource.h index 21d0acaf81a..3f8f648f18a 100644 --- a/src/DataStreams/ShellCommandSource.h +++ b/src/DataStreams/ShellCommandSource.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include @@ -97,7 +97,7 @@ public: max_block_size = configuration.number_of_rows_to_read; } - pipeline.init(Pipe(FormatFactory::instance().getInput(format, command->out, sample_block, context, max_block_size))); + pipeline = QueryPipeline(Pipe(FormatFactory::instance().getInput(format, command->out, sample_block, context, max_block_size))); executor = std::make_unique(pipeline); } diff --git a/src/DataStreams/SquashingTransform.cpp b/src/DataStreams/SquashingTransform.cpp index 1f6ca8a7306..ea99dc49780 100644 --- a/src/DataStreams/SquashingTransform.cpp +++ b/src/DataStreams/SquashingTransform.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB @@ -67,7 +68,6 @@ Block SquashingTransform::addImpl(ReferenceType input_block) } append(std::move(input_block)); - if (isEnoughSize(accumulated_block)) { Block to_return; diff --git a/src/DataStreams/TemporaryFileStream.h b/src/DataStreams/TemporaryFileStream.h index ec38f6c1baa..ee7660db717 100644 --- a/src/DataStreams/TemporaryFileStream.h +++ b/src/DataStreams/TemporaryFileStream.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -35,12 +35,13 @@ struct TemporaryFileStream {} /// Flush data from input stream into file for future reading - static void write(const std::string & path, const Block & header, QueryPipeline pipeline, const std::string & codec) + static void write(const std::string & path, const Block & header, QueryPipelineBuilder builder, const std::string & codec) { WriteBufferFromFile file_buf(path); CompressedWriteBuffer compressed_buf(file_buf, CompressionCodecFactory::instance().get(codec, {})); NativeBlockOutputStream output(compressed_buf, 0, header); + auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); PullingPipelineExecutor executor(pipeline); output.writePrefix(); diff --git a/src/DataStreams/tests/gtest_blocks_size_merging_streams.cpp b/src/DataStreams/tests/gtest_blocks_size_merging_streams.cpp index aa4c717a28b..71cf41fcbab 100644 --- a/src/DataStreams/tests/gtest_blocks_size_merging_streams.cpp +++ b/src/DataStreams/tests/gtest_blocks_size_merging_streams.cpp @@ -87,9 +87,8 @@ TEST(MergingSortedTest, SimpleBlockSizeTest) pipe.addTransform(std::move(transform)); - QueryPipeline pipeline; - pipeline.init(std::move(pipe)); - pipeline.setMaxThreads(1); + QueryPipeline pipeline(std::move(pipe)); + pipeline.setNumThreads(1); auto stream = std::make_shared(std::move(pipeline)); size_t total_rows = 0; @@ -132,9 +131,8 @@ TEST(MergingSortedTest, MoreInterestingBlockSizes) pipe.addTransform(std::move(transform)); - QueryPipeline pipeline; - pipeline.init(std::move(pipe)); - pipeline.setMaxThreads(1); + QueryPipeline pipeline(std::move(pipe)); + pipeline.setNumThreads(1); auto stream = std::make_shared(std::move(pipeline)); auto block1 = stream->read(); diff --git a/src/DataStreams/tests/gtest_check_sorted_stream.cpp b/src/DataStreams/tests/gtest_check_sorted_stream.cpp index 2788c44389b..04ccc64fd7c 100644 --- a/src/DataStreams/tests/gtest_check_sorted_stream.cpp +++ b/src/DataStreams/tests/gtest_check_sorted_stream.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -97,8 +98,7 @@ TEST(CheckSortedBlockInputStream, CheckGoodCase) return std::make_shared(header, sort_description); }); - QueryPipeline pipeline; - pipeline.init(std::move(pipe)); + QueryPipeline pipeline(std::move(pipe)); PullingPipelineExecutor executor(pipeline); @@ -125,8 +125,7 @@ TEST(CheckSortedBlockInputStream, CheckBadLastRow) return std::make_shared(header, sort_description); }); - QueryPipeline pipeline; - pipeline.init(std::move(pipe)); + QueryPipeline pipeline(std::move(pipe)); PullingPipelineExecutor executor(pipeline); @@ -150,8 +149,7 @@ TEST(CheckSortedBlockInputStream, CheckUnsortedBlock1) return std::make_shared(header, sort_description); }); - QueryPipeline pipeline; - pipeline.init(std::move(pipe)); + QueryPipeline pipeline(std::move(pipe)); PullingPipelineExecutor executor(pipeline); @@ -172,8 +170,7 @@ TEST(CheckSortedBlockInputStream, CheckUnsortedBlock2) return std::make_shared(header, sort_description); }); - QueryPipeline pipeline; - pipeline.init(std::move(pipe)); + QueryPipeline pipeline(std::move(pipe)); PullingPipelineExecutor executor(pipeline); @@ -194,8 +191,7 @@ TEST(CheckSortedBlockInputStream, CheckUnsortedBlock3) return std::make_shared(header, sort_description); }); - QueryPipeline pipeline; - pipeline.init(std::move(pipe)); + QueryPipeline pipeline(std::move(pipe)); PullingPipelineExecutor executor(pipeline); @@ -218,8 +214,7 @@ TEST(CheckSortedBlockInputStream, CheckEqualBlock) return std::make_shared(header, sort_description); }); - QueryPipeline pipeline; - pipeline.init(std::move(pipe)); + QueryPipeline pipeline(std::move(pipe)); PullingPipelineExecutor executor(pipeline); diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index 0399ec59b16..73800e84256 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -18,9 +18,18 @@ void DDLDependencyVisitor::visit(const ASTPtr & ast, Data & data) visit(*dict_source, data); } -bool DDLDependencyVisitor::needChildVisit(const ASTPtr & node, const ASTPtr & /*child*/) +bool DDLDependencyVisitor::needChildVisit(const ASTPtr & node, const ASTPtr & child) { - return !node->as(); + if (node->as()) + return false; + + if (auto * create = node->as()) + { + if (child.get() == create->select) + return false; + } + + return true; } void DDLDependencyVisitor::visit(const ASTFunction & function, Data & data) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index e941e18625d..9d1bcb51dba 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -187,6 +187,11 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo if (metadata.settings_changes) storage_ast.set(storage_ast.settings, metadata.settings_changes); } + + if (metadata.comment.empty()) + storage_ast.reset(storage_ast.comment); + else + storage_ast.set(storage_ast.comment, std::make_shared(metadata.comment)); } } diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 793b4f15d60..72c505bde94 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -13,7 +13,7 @@ # include # include # include -# include +# include # include # include # include @@ -284,8 +284,7 @@ std::map DatabaseMySQL::fetchTablesWithModificationTime(ContextP std::map tables_with_modification_time; StreamSettings mysql_input_stream_settings(local_context->getSettingsRef()); auto result = std::make_unique(mysql_pool.get(), query.str(), tables_status_sample_block, mysql_input_stream_settings); - QueryPipeline pipeline; - pipeline.init(Pipe(std::move(result))); + QueryPipeline pipeline(std::move(result)); Block block; PullingPipelineExecutor executor(pipeline); diff --git a/src/Databases/MySQL/FetchTablesColumnsList.cpp b/src/Databases/MySQL/FetchTablesColumnsList.cpp index c67dcefb433..618f6bf6d34 100644 --- a/src/Databases/MySQL/FetchTablesColumnsList.cpp +++ b/src/Databases/MySQL/FetchTablesColumnsList.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include @@ -88,8 +88,7 @@ std::map fetchTablesColumnsList( StreamSettings mysql_input_stream_settings(settings); auto result = std::make_unique(pool.get(), query.str(), tables_columns_sample_block, mysql_input_stream_settings); - QueryPipeline pipeline; - pipeline.init(Pipe(std::move(result))); + QueryPipeline pipeline(std::move(result)); Block block; PullingPipelineExecutor executor(pipeline); diff --git a/src/Databases/MySQL/MaterializeMetadata.cpp b/src/Databases/MySQL/MaterializeMetadata.cpp index f684797c675..f55cddfab59 100644 --- a/src/Databases/MySQL/MaterializeMetadata.cpp +++ b/src/Databases/MySQL/MaterializeMetadata.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -44,8 +44,7 @@ static std::unordered_map fetchTablesCreateQuery( connection, "SHOW CREATE TABLE " + backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(fetch_table_name), show_create_table_header, mysql_input_stream_settings); - QueryPipeline pipeline; - pipeline.init(Pipe(std::move(show_create_table))); + QueryPipeline pipeline(std::move(show_create_table)); Block create_query_block; PullingPipelineExecutor executor(pipeline); @@ -69,8 +68,7 @@ static std::vector fetchTablesInDB(const mysqlxx::PoolWithFailover::Entr StreamSettings mysql_input_stream_settings(global_settings); auto input = std::make_unique(connection, query, header, mysql_input_stream_settings); - QueryPipeline pipeline; - pipeline.init(Pipe(std::move(input))); + QueryPipeline pipeline(std::move(input)); Block block; PullingPipelineExecutor executor(pipeline); @@ -97,8 +95,7 @@ void MaterializeMetadata::fetchMasterStatus(mysqlxx::PoolWithFailover::Entry & c StreamSettings mysql_input_stream_settings(settings, false, true); auto input = std::make_unique(connection, "SHOW MASTER STATUS;", header, mysql_input_stream_settings); - QueryPipeline pipeline; - pipeline.init(Pipe(std::move(input))); + QueryPipeline pipeline(std::move(input)); Block master_status; PullingPipelineExecutor executor(pipeline); @@ -125,8 +122,7 @@ void MaterializeMetadata::fetchMasterVariablesValue(const mysqlxx::PoolWithFailo const String & fetch_query = "SHOW VARIABLES WHERE Variable_name = 'binlog_checksum'"; StreamSettings mysql_input_stream_settings(settings, false, true); auto variables_input = std::make_unique(connection, fetch_query, variables_header, mysql_input_stream_settings); - QueryPipeline pipeline; - pipeline.init(Pipe(std::move(variables_input))); + QueryPipeline pipeline(std::move(variables_input)); Block variables_block; PullingPipelineExecutor executor(pipeline); @@ -153,8 +149,7 @@ static bool checkSyncUserPrivImpl(const mysqlxx::PoolWithFailover::Entry & conne String grants_query, sub_privs; StreamSettings mysql_input_stream_settings(global_settings); auto input = std::make_unique(connection, "SHOW GRANTS FOR CURRENT_USER();", sync_user_privs_header, mysql_input_stream_settings); - QueryPipeline pipeline; - pipeline.init(Pipe(std::move(input))); + QueryPipeline pipeline(std::move(input)); Block block; PullingPipelineExecutor executor(pipeline); @@ -204,8 +199,7 @@ bool MaterializeMetadata::checkBinlogFileExists(const mysqlxx::PoolWithFailover: StreamSettings mysql_input_stream_settings(settings, false, true); auto input = std::make_unique(connection, "SHOW MASTER LOGS", logs_header, mysql_input_stream_settings); - QueryPipeline pipeline; - pipeline.init(Pipe(std::move(input))); + QueryPipeline pipeline(std::move(input)); Block block; PullingPipelineExecutor executor(pipeline); diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 5a51704e98b..cd667903113 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -9,8 +9,10 @@ # include # include # include -# include +# include # include +# include +# include # include # include # include @@ -112,8 +114,7 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection, const S {"log_bin_use_v1_row_events", "OFF"} }; - QueryPipeline pipeline; - pipeline.init(Pipe(std::move(variables_input))); + QueryPipeline pipeline(std::move(variables_input)); PullingPipelineExecutor executor(pipeline); Block variables_block; @@ -288,7 +289,7 @@ static inline void cleanOutdatedTables(const String & database_name, ContextPtr } } -static inline BlockOutputStreamPtr +static inline QueryPipeline getTableOutput(const String & database_name, const String & table_name, ContextMutablePtr query_context, bool insert_materialized = false) { const StoragePtr & storage = DatabaseCatalog::instance().getTable(StorageID(database_name, table_name), query_context); @@ -312,10 +313,7 @@ getTableOutput(const String & database_name, const String & table_name, ContextM BlockIO res = tryToExecuteQuery("INSERT INTO " + backQuoteIfNeed(table_name) + "(" + insert_columns_str.str() + ")" + " VALUES", query_context, database_name, comment); - if (!res.out) - throw Exception("LOGICAL ERROR: It is a bug.", ErrorCodes::LOGICAL_ERROR); - - return res.out; + return std::move(res.pipeline); } static inline void dumpDataForTables( @@ -333,25 +331,21 @@ static inline void dumpDataForTables( String comment = "Materialize MySQL step 1: execute MySQL DDL for dump data"; tryToExecuteQuery(query_prefix + " " + iterator->second, query_context, database_name, comment); /// create table. - auto out = std::make_shared(getTableOutput(database_name, table_name, query_context)); + auto pipeline = getTableOutput(database_name, table_name, query_context); StreamSettings mysql_input_stream_settings(context->getSettingsRef()); auto input = std::make_unique( connection, "SELECT * FROM " + backQuoteIfNeed(mysql_database_name) + "." + backQuoteIfNeed(table_name), - out->getHeader(), mysql_input_stream_settings); - - QueryPipeline pipeline; - pipeline.init(Pipe(std::move(input))); - PullingPipelineExecutor executor(pipeline); + pipeline.getHeader(), mysql_input_stream_settings); + auto counting = std::make_shared(pipeline.getHeader()); + Pipe pipe(std::move(input)); + pipe.addTransform(counting); + pipeline.complete(std::move(pipe)); Stopwatch watch; + CompletedPipelineExecutor executor(pipeline); + executor.execute(); - out->writePrefix(); - Block block; - while (executor.pull(block)) - out->write(block); - out->writeSuffix(); - - const Progress & progress = out->getProgress(); + const Progress & progress = counting->getProgress(); LOG_INFO(&Poco::Logger::get("MaterializedMySQLSyncThread(" + database_name + ")"), "Materialize MySQL step 1: dump {}, {} rows, {} in {} sec., {} rows/sec., {}/sec." , table_name, formatReadableQuantity(progress.written_rows), formatReadableSizeWithBinarySuffix(progress.written_bytes) @@ -801,9 +795,12 @@ void MaterializedMySQLSyncThread::Buffers::commit(ContextPtr context) for (auto & table_name_and_buffer : data) { auto query_context = createQueryContext(context); - OneBlockInputStream input(table_name_and_buffer.second->first); - BlockOutputStreamPtr out = getTableOutput(database, table_name_and_buffer.first, query_context, true); - copyData(input, *out); + auto input = std::make_shared(table_name_and_buffer.second->first); + auto pipeline = getTableOutput(database, table_name_and_buffer.first, query_context, true); + pipeline.complete(Pipe(std::move(input))); + + CompletedPipelineExecutor executor(pipeline); + executor.execute(); } data.clear(); diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index 0b2044cfe2c..bbf596ba55a 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -14,7 +14,7 @@ #include #include -#include +#include namespace ProfileEvents { @@ -573,9 +573,9 @@ void CacheDictionary::update(CacheDictionaryUpdateUnitPtrloadIds(requested_keys_vector)); + pipeline = QueryPipeline(current_source_ptr->loadIds(requested_keys_vector)); else - pipeline.init(current_source_ptr->loadKeys(update_unit_ptr->key_columns, requested_complex_key_rows)); + pipeline = QueryPipeline(current_source_ptr->loadKeys(update_unit_ptr->key_columns, requested_complex_key_rows)); size_t skip_keys_size_offset = dict_struct.getKeysSize(); PaddedPODArray found_keys_in_source; diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index 7348569442d..806511248a3 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -162,39 +163,39 @@ std::string ClickHouseDictionarySource::toString() const Pipe ClickHouseDictionarySource::createStreamForQuery(const String & query, std::atomic * result_size_hint) { - QueryPipeline pipeline; + QueryPipelineBuilder builder; /// Sample block should not contain first row default values auto empty_sample_block = sample_block.cloneEmpty(); if (configuration.is_local) { - pipeline = executeQuery(query, context, true).pipeline; + builder.init(executeQuery(query, context, true).pipeline); auto converting = ActionsDAG::makeConvertingActions( - pipeline.getHeader().getColumnsWithTypeAndName(), + builder.getHeader().getColumnsWithTypeAndName(), empty_sample_block.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Position); - pipeline.addSimpleTransform([&](const Block & header) + builder.addSimpleTransform([&](const Block & header) { return std::make_shared(header, std::make_shared(converting)); }); } else { - pipeline.init(Pipe(std::make_shared( + builder.init(Pipe(std::make_shared( std::make_shared(pool, query, empty_sample_block, context), false, false))); } if (result_size_hint) { - pipeline.setProgressCallback([result_size_hint](const Progress & progress) + builder.setProgressCallback([result_size_hint](const Progress & progress) { *result_size_hint += progress.total_rows_to_read; }); } - return QueryPipeline::getPipe(std::move(pipeline)); + return QueryPipelineBuilder::getPipe(std::move(builder)); } std::string ClickHouseDictionarySource::doInvalidateQuery(const std::string & request) const @@ -203,16 +204,15 @@ std::string ClickHouseDictionarySource::doInvalidateQuery(const std::string & re if (configuration.is_local) { auto query_context = Context::createCopy(context); - auto pipe = QueryPipeline::getPipe(executeQuery(request, query_context, true).pipeline); - return readInvalidateQuery(std::move(pipe)); + return readInvalidateQuery(executeQuery(request, query_context, true).pipeline); } else { /// We pass empty block to RemoteBlockInputStream, because we don't know the structure of the result. Block invalidate_sample_block; - Pipe pipe(std::make_shared( + QueryPipeline pipeline(std::make_shared( std::make_shared(pool, request, invalidate_sample_block, context), false, false)); - return readInvalidateQuery(std::move(pipe)); + return readInvalidateQuery(std::move(pipeline)); } } diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h index 4ed3b74226e..6266bd2cf4f 100644 --- a/src/Dictionaries/DictionaryHelpers.h +++ b/src/Dictionaries/DictionaryHelpers.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include namespace DB @@ -567,8 +567,7 @@ void mergeBlockWithPipe( auto result_fetched_columns = block_to_update.cloneEmptyColumns(); - QueryPipeline pipeline; - pipeline.init(std::move(pipe)); + QueryPipeline pipeline(std::move(pipe)); PullingPipelineExecutor executor(pipeline); Block block; diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp index 10e7414b42f..03d3b579ec3 100644 --- a/src/Dictionaries/DirectDictionary.cpp +++ b/src/Dictionaries/DirectDictionary.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include namespace DB @@ -68,8 +68,7 @@ Columns DirectDictionary::getColumns( size_t dictionary_keys_size = dict_struct.getKeysNames().size(); block_key_columns.reserve(dictionary_keys_size); - QueryPipeline pipeline; - pipeline.init(getSourceBlockInputStream(key_columns, requested_keys)); + QueryPipeline pipeline(getSourceBlockInputStream(key_columns, requested_keys)); PullingPipelineExecutor executor(pipeline); @@ -185,9 +184,7 @@ ColumnUInt8::Ptr DirectDictionary::hasKeys( size_t dictionary_keys_size = dict_struct.getKeysNames().size(); block_key_columns.reserve(dictionary_keys_size); - QueryPipeline pipeline; - pipeline.init(getSourceBlockInputStream(key_columns, requested_keys)); - + QueryPipeline pipeline(getSourceBlockInputStream(key_columns, requested_keys)); PullingPipelineExecutor executor(pipeline); size_t keys_found = 0; diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index 5ecf3299ea6..e14ee5d30d1 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include @@ -322,8 +322,7 @@ void FlatDictionary::updateData() { if (!update_field_loaded_block || update_field_loaded_block->rows() == 0) { - QueryPipeline pipeline; - pipeline.init(source_ptr->loadUpdatedAll()); + QueryPipeline pipeline(source_ptr->loadUpdatedAll()); PullingPipelineExecutor executor(pipeline); Block block; @@ -358,8 +357,7 @@ void FlatDictionary::loadData() { if (!source_ptr->hasUpdateField()) { - QueryPipeline pipeline; - pipeline.init(source_ptr->loadAll()); + QueryPipeline pipeline(source_ptr->loadAll()); PullingPipelineExecutor executor(pipeline); Block block; diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index fd5865e24c0..1f3821096da 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -367,8 +367,7 @@ void HashedDictionary::updateData() if (!update_field_loaded_block || update_field_loaded_block->rows() == 0) { - QueryPipeline pipeline; - pipeline.init(source_ptr->loadUpdatedAll()); + QueryPipeline pipeline(source_ptr->loadUpdatedAll()); PullingPipelineExecutor executor(pipeline); Block block; @@ -563,9 +562,9 @@ void HashedDictionary::loadData() QueryPipeline pipeline; if (configuration.preallocate) - pipeline.init(source_ptr->loadAllWithSizeHint(&new_size)); + pipeline = QueryPipeline(source_ptr->loadAllWithSizeHint(&new_size)); else - pipeline.init(source_ptr->loadAll()); + pipeline = QueryPipeline(source_ptr->loadAll()); PullingPipelineExecutor executor(pipeline); Block block; diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp index b688362d048..6514ecd9e3e 100644 --- a/src/Dictionaries/IPAddressDictionary.cpp +++ b/src/Dictionaries/IPAddressDictionary.cpp @@ -352,8 +352,7 @@ void IPAddressDictionary::createAttributes() void IPAddressDictionary::loadData() { - QueryPipeline pipeline; - pipeline.init(source_ptr->loadAll()); + QueryPipeline pipeline(source_ptr->loadAll()); std::vector ip_records; diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index c740cc7548c..5779ddc7c88 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -307,7 +308,7 @@ std::string MySQLDictionarySource::doInvalidateQuery(const std::string & request Block invalidate_sample_block; ColumnPtr column(ColumnString::create()); invalidate_sample_block.insert(ColumnWithTypeAndName(column, std::make_shared(), "Sample Block")); - return readInvalidateQuery(Pipe(std::make_unique(pool->get(), request, invalidate_sample_block, settings))); + return readInvalidateQuery(QueryPipeline(std::make_unique(pool->get(), request, invalidate_sample_block, settings))); } } diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp index f10aa071442..06a705a351e 100644 --- a/src/Dictionaries/PolygonDictionary.cpp +++ b/src/Dictionaries/PolygonDictionary.cpp @@ -165,8 +165,7 @@ void IPolygonDictionary::blockToAttributes(const DB::Block & block) void IPolygonDictionary::loadData() { - QueryPipeline pipeline; - pipeline.init(source_ptr->loadAll()); + QueryPipeline pipeline(source_ptr->loadAll()); PullingPipelineExecutor executor(pipeline); Block block; diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index 56b75f024ad..484d16e1f03 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -11,6 +11,7 @@ #include #include "readInvalidateQuery.h" #include +#include #include #endif @@ -125,7 +126,7 @@ std::string PostgreSQLDictionarySource::doInvalidateQuery(const std::string & re Block invalidate_sample_block; ColumnPtr column(ColumnString::create()); invalidate_sample_block.insert(ColumnWithTypeAndName(column, std::make_shared(), "Sample Block")); - return readInvalidateQuery(Pipe(std::make_unique>(pool->get(), request, invalidate_sample_block, 1))); + return readInvalidateQuery(QueryPipeline(std::make_unique>(pool->get(), request, invalidate_sample_block, 1))); } diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp index 390871661c7..86dfaa615ed 100644 --- a/src/Dictionaries/RangeHashedDictionary.cpp +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -303,8 +303,7 @@ void RangeHashedDictionary::createAttributes() template void RangeHashedDictionary::loadData() { - QueryPipeline pipeline; - pipeline.init(source_ptr->loadAll()); + QueryPipeline pipeline(source_ptr->loadAll()); PullingPipelineExecutor executor(pipeline); Block block; diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp index bf7526580c0..3fffe82c9d5 100644 --- a/src/Dictionaries/XDBCDictionarySource.cpp +++ b/src/Dictionaries/XDBCDictionarySource.cpp @@ -18,6 +18,7 @@ #include "readInvalidateQuery.h" #include "registerDictionaries.h" #include +#include namespace DB @@ -199,7 +200,7 @@ std::string XDBCDictionarySource::doInvalidateQuery(const std::string & request) for (const auto & [name, value] : url_params) invalidate_url.addQueryParameter(name, value); - return readInvalidateQuery(loadFromQuery(invalidate_url, invalidate_sample_block, request)); + return readInvalidateQuery(QueryPipeline(loadFromQuery(invalidate_url, invalidate_sample_block, request))); } diff --git a/src/Dictionaries/readInvalidateQuery.cpp b/src/Dictionaries/readInvalidateQuery.cpp index bd1ec0e0983..c2c42eece58 100644 --- a/src/Dictionaries/readInvalidateQuery.cpp +++ b/src/Dictionaries/readInvalidateQuery.cpp @@ -1,5 +1,5 @@ #include "readInvalidateQuery.h" -#include +#include #include #include #include @@ -15,11 +15,8 @@ namespace ErrorCodes extern const int RECEIVED_EMPTY_DATA; } -std::string readInvalidateQuery(Pipe pipe) +std::string readInvalidateQuery(QueryPipeline pipeline) { - QueryPipeline pipeline; - pipeline.init(std::move(pipe)); - PullingPipelineExecutor executor(pipeline); Block block; diff --git a/src/Dictionaries/readInvalidateQuery.h b/src/Dictionaries/readInvalidateQuery.h index 61d5b29dc89..52cd9f54b2b 100644 --- a/src/Dictionaries/readInvalidateQuery.h +++ b/src/Dictionaries/readInvalidateQuery.h @@ -5,9 +5,9 @@ namespace DB { -class Pipe; +class QueryPipeline; /// Using in MySQLDictionarySource and XDBCDictionarySource after processing invalidate_query. -std::string readInvalidateQuery(Pipe pipe); +std::string readInvalidateQuery(QueryPipeline pipeline); } diff --git a/src/Functions/CountSubstringsImpl.h b/src/Functions/CountSubstringsImpl.h index 2c2ae20488f..6668ca0a392 100644 --- a/src/Functions/CountSubstringsImpl.h +++ b/src/Functions/CountSubstringsImpl.h @@ -19,11 +19,12 @@ namespace ErrorCodes /// NOTE: Intersecting substrings in haystack accounted only once, i.e.: /// /// countSubstrings('aaaa', 'aa') == 2 -template +template struct CountSubstringsImpl { static constexpr bool use_default_implementation_for_constants = false; static constexpr bool supports_start_pos = true; + static constexpr auto name = Name::name; using ResultType = UInt64; @@ -225,7 +226,7 @@ struct CountSubstringsImpl template static void vectorFixedConstant(Args &&...) { - throw Exception("Functions 'position' don't support FixedString haystack argument", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name); } }; diff --git a/src/Functions/FunctionsMultiStringFuzzySearch.h b/src/Functions/FunctionsMultiStringFuzzySearch.h index f0e1437b2aa..0bbb8ade939 100644 --- a/src/Functions/FunctionsMultiStringFuzzySearch.h +++ b/src/Functions/FunctionsMultiStringFuzzySearch.h @@ -28,13 +28,13 @@ namespace ErrorCodes } -template +template class FunctionsMultiStringFuzzySearch : public IFunction { static_assert(LimitArgs > 0); public: - static constexpr auto name = Name::name; + static constexpr auto name = Impl::name; static FunctionPtr create(ContextPtr context) { if (Impl::is_using_hyperscan && !context->getSettingsRef().allow_hyperscan) diff --git a/src/Functions/FunctionsMultiStringSearch.h b/src/Functions/FunctionsMultiStringSearch.h index 6535035469f..4576979681b 100644 --- a/src/Functions/FunctionsMultiStringSearch.h +++ b/src/Functions/FunctionsMultiStringSearch.h @@ -41,13 +41,13 @@ namespace ErrorCodes /// The argument limiting raises from Volnitsky searcher -- it is performance crucial to save only one byte for pattern number. /// But some other searchers use this function, for example, multiMatchAny -- hyperscan does not have such restrictions -template ::max()> +template ::max()> class FunctionsMultiStringSearch : public IFunction { static_assert(LimitArgs > 0); public: - static constexpr auto name = Name::name; + static constexpr auto name = Impl::name; static FunctionPtr create(ContextPtr context) { if (Impl::is_using_hyperscan && !context->getSettingsRef().allow_hyperscan) diff --git a/src/Functions/FunctionsStringSearch.h b/src/Functions/FunctionsStringSearch.h index d8463e69cf3..4aa76ceec28 100644 --- a/src/Functions/FunctionsStringSearch.h +++ b/src/Functions/FunctionsStringSearch.h @@ -46,11 +46,11 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -template +template class FunctionsStringSearch : public IFunction { public: - static constexpr auto name = Name::name; + static constexpr auto name = Impl::name; static FunctionPtr create(ContextPtr) { return std::make_shared(); } String getName() const override { return name; } @@ -80,7 +80,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.size() < 2 || 3 < arguments.size()) - throw Exception("Number of arguments for function " + String(Name::name) + " doesn't match: passed " + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + ", should be 2 or 3.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); diff --git a/src/Functions/FunctionsVisitParam.h b/src/Functions/FunctionsVisitParam.h index 922ea44be9a..362c3bcd693 100644 --- a/src/Functions/FunctionsVisitParam.h +++ b/src/Functions/FunctionsVisitParam.h @@ -74,13 +74,14 @@ struct ExtractNumericType * If a field was not found or an incorrect value is associated with the field, * then the default value used - 0. */ -template +template struct ExtractParamImpl { using ResultType = typename ParamExtractor::ResultType; static constexpr bool use_default_implementation_for_constants = true; static constexpr bool supports_start_pos = false; + static constexpr auto name = Name::name; /// It is assumed that `res` is the correct size and initialized with zeros. static void vectorConstant( @@ -91,7 +92,7 @@ struct ExtractParamImpl PaddedPODArray & res) { if (start_pos != nullptr) - throw Exception("Functions 'visitParamHas' and 'visitParamExtract*' doesn't support start_pos argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' doesn't support start_pos argument", name); /// We are looking for a parameter simply as a substring of the form "name" needle = "\"" + needle + "\":"; @@ -131,18 +132,18 @@ struct ExtractParamImpl template static void vectorVector(Args &&...) { - throw Exception("Functions 'visitParamHas' and 'visitParamExtract*' doesn't support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support non-constant needle argument", name); } template static void constantVector(Args &&...) { - throw Exception("Functions 'visitParamHas' and 'visitParamExtract*' doesn't support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support non-constant needle argument", name); } template static void vectorFixedConstant(Args &&...) { - throw Exception("Functions 'visitParamHas' don't support FixedString haystack argument", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name); } }; diff --git a/src/Functions/HasTokenImpl.h b/src/Functions/HasTokenImpl.h index 044c50b6742..ec33a07fce3 100644 --- a/src/Functions/HasTokenImpl.h +++ b/src/Functions/HasTokenImpl.h @@ -14,13 +14,14 @@ namespace ErrorCodes /** Token search the string, means that needle must be surrounded by some separator chars, like whitespace or puctuation. */ -template +template struct HasTokenImpl { using ResultType = UInt8; static constexpr bool use_default_implementation_for_constants = true; static constexpr bool supports_start_pos = false; + static constexpr auto name = Name::name; static void vectorConstant( const ColumnString::Chars & data, @@ -30,7 +31,7 @@ struct HasTokenImpl PaddedPODArray & res) { if (start_pos != nullptr) - throw Exception("Function 'hasToken' does not support start_pos argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' does not support start_pos argument", name); if (offsets.empty()) return; @@ -72,20 +73,20 @@ struct HasTokenImpl template static void vectorVector(Args &&...) { - throw Exception("Function 'hasToken' does not support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support non-constant needle argument", name); } /// Search different needles in single haystack. template static void constantVector(Args &&...) { - throw Exception("Function 'hasToken' does not support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support non-constant needle argument", name); } template static void vectorFixedConstant(Args &&...) { - throw Exception("Functions 'hasToken' don't support FixedString haystack argument", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name); } }; diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index e1e8394f7b1..4c8db97aded 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -73,11 +73,12 @@ static inline bool likePatternIsStrstr(const String & pattern, String & res) * NOTE: We want to run regexp search for whole columns by one call (as implemented in function 'position') * but for that, regexp engine must support \0 bytes and their interpretation as string boundaries. */ -template +template struct MatchImpl { static constexpr bool use_default_implementation_for_constants = true; static constexpr bool supports_start_pos = false; + static constexpr auto name = Name::name; using ResultType = UInt8; @@ -93,7 +94,8 @@ struct MatchImpl PaddedPODArray & res) { if (start_pos != nullptr) - throw Exception("Functions 'like' and 'match' don't support start_pos argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Function '{}' doesn't support start_pos argument", name); if (offsets.empty()) return; @@ -406,14 +408,14 @@ struct MatchImpl template static void vectorVector(Args &&...) { - throw Exception("Functions 'like' and 'match' don't support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support non-constant needle argument", name); } /// Search different needles in single haystack. template static void constantVector(Args &&...) { - throw Exception("Functions 'like' and 'match' don't support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support non-constant needle argument", name); } }; diff --git a/src/Functions/MultiMatchAllIndicesImpl.h b/src/Functions/MultiMatchAllIndicesImpl.h index f0a94ed5eed..922ec7cf06b 100644 --- a/src/Functions/MultiMatchAllIndicesImpl.h +++ b/src/Functions/MultiMatchAllIndicesImpl.h @@ -29,7 +29,7 @@ namespace ErrorCodes } -template +template struct MultiMatchAllIndicesImpl { using ResultType = Type; @@ -37,6 +37,8 @@ struct MultiMatchAllIndicesImpl /// Variable for understanding, if we used offsets for the output, most /// likely to determine whether the function returns ColumnVector of ColumnArray. static constexpr bool is_column_array = true; + static constexpr auto name = Name::name; + static auto getReturnType() { return std::make_shared(std::make_shared()); diff --git a/src/Functions/MultiMatchAnyImpl.h b/src/Functions/MultiMatchAnyImpl.h index c8d0ea6c87f..e2e54887a32 100644 --- a/src/Functions/MultiMatchAnyImpl.h +++ b/src/Functions/MultiMatchAnyImpl.h @@ -29,7 +29,7 @@ namespace ErrorCodes } -template +template struct MultiMatchAnyImpl { static_assert(static_cast(FindAny) + static_cast(FindAnyIndex) == 1); @@ -38,6 +38,8 @@ struct MultiMatchAnyImpl /// Variable for understanding, if we used offsets for the output, most /// likely to determine whether the function returns ColumnVector of ColumnArray. static constexpr bool is_column_array = false; + static constexpr auto name = Name::name; + static auto getReturnType() { return std::make_shared>(); @@ -120,7 +122,7 @@ struct MultiMatchAnyImpl memset(accum.data(), 0, accum.size()); for (size_t j = 0; j < needles.size(); ++j) { - MatchImpl::vectorConstant(haystack_data, haystack_offsets, needles[j].toString(), nullptr, accum); + MatchImpl::vectorConstant(haystack_data, haystack_offsets, needles[j].toString(), nullptr, accum); for (size_t i = 0; i < res.size(); ++i) { if constexpr (FindAny) diff --git a/src/Functions/MultiSearchFirstIndexImpl.h b/src/Functions/MultiSearchFirstIndexImpl.h index 4b29577d0eb..26709119f6e 100644 --- a/src/Functions/MultiSearchFirstIndexImpl.h +++ b/src/Functions/MultiSearchFirstIndexImpl.h @@ -7,7 +7,7 @@ namespace DB { -template +template struct MultiSearchFirstIndexImpl { using ResultType = UInt64; @@ -15,6 +15,8 @@ struct MultiSearchFirstIndexImpl /// Variable for understanding, if we used offsets for the output, most /// likely to determine whether the function returns ColumnVector of ColumnArray. static constexpr bool is_column_array = false; + static constexpr auto name = Name::name; + static auto getReturnType() { return std::make_shared>(); } static void vectorConstant( diff --git a/src/Functions/MultiSearchFirstPositionImpl.h b/src/Functions/MultiSearchFirstPositionImpl.h index bb1017c43ee..1db8dcbde83 100644 --- a/src/Functions/MultiSearchFirstPositionImpl.h +++ b/src/Functions/MultiSearchFirstPositionImpl.h @@ -7,7 +7,7 @@ namespace DB { -template +template struct MultiSearchFirstPositionImpl { using ResultType = UInt64; @@ -15,6 +15,8 @@ struct MultiSearchFirstPositionImpl /// Variable for understanding, if we used offsets for the output, most /// likely to determine whether the function returns ColumnVector of ColumnArray. static constexpr bool is_column_array = false; + static constexpr auto name = Name::name; + static auto getReturnType() { return std::make_shared>(); } static void vectorConstant( diff --git a/src/Functions/MultiSearchImpl.h b/src/Functions/MultiSearchImpl.h index 461af5c3295..7cb0cefe580 100644 --- a/src/Functions/MultiSearchImpl.h +++ b/src/Functions/MultiSearchImpl.h @@ -7,7 +7,7 @@ namespace DB { -template +template struct MultiSearchImpl { using ResultType = UInt8; @@ -15,6 +15,8 @@ struct MultiSearchImpl /// Variable for understanding, if we used offsets for the output, most /// likely to determine whether the function returns ColumnVector of ColumnArray. static constexpr bool is_column_array = false; + static constexpr auto name = Name::name; + static auto getReturnType() { return std::make_shared>(); } static void vectorConstant( diff --git a/src/Functions/PositionImpl.h b/src/Functions/PositionImpl.h index fe9b49bd2b8..d3b6d74c3cd 100644 --- a/src/Functions/PositionImpl.h +++ b/src/Functions/PositionImpl.h @@ -175,11 +175,12 @@ struct PositionCaseInsensitiveUTF8 }; -template +template struct PositionImpl { static constexpr bool use_default_implementation_for_constants = false; static constexpr bool supports_start_pos = true; + static constexpr auto name = Name::name; using ResultType = UInt64; @@ -408,7 +409,7 @@ struct PositionImpl template static void vectorFixedConstant(Args &&...) { - throw Exception("Functions 'position' don't support FixedString haystack argument", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name); } }; diff --git a/src/Functions/countSubstrings.cpp b/src/Functions/countSubstrings.cpp index d0dd469b962..1bf95f9526f 100644 --- a/src/Functions/countSubstrings.cpp +++ b/src/Functions/countSubstrings.cpp @@ -13,7 +13,7 @@ struct NameCountSubstrings static constexpr auto name = "countSubstrings"; }; -using FunctionCountSubstrings = FunctionsStringSearch, NameCountSubstrings>; +using FunctionCountSubstrings = FunctionsStringSearch>; } diff --git a/src/Functions/countSubstringsCaseInsensitive.cpp b/src/Functions/countSubstringsCaseInsensitive.cpp index 9207f2d4f20..fa234953cc3 100644 --- a/src/Functions/countSubstringsCaseInsensitive.cpp +++ b/src/Functions/countSubstringsCaseInsensitive.cpp @@ -13,7 +13,7 @@ struct NameCountSubstringsCaseInsensitive static constexpr auto name = "countSubstringsCaseInsensitive"; }; -using FunctionCountSubstringsCaseInsensitive = FunctionsStringSearch, NameCountSubstringsCaseInsensitive>; +using FunctionCountSubstringsCaseInsensitive = FunctionsStringSearch>; } diff --git a/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp b/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp index b1747bbd7ff..93f77fddd7f 100644 --- a/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp +++ b/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp @@ -13,7 +13,8 @@ struct NameCountSubstringsCaseInsensitiveUTF8 static constexpr auto name = "countSubstringsCaseInsensitiveUTF8"; }; -using FunctionCountSubstringsCaseInsensitiveUTF8 = FunctionsStringSearch, NameCountSubstringsCaseInsensitiveUTF8>; +using FunctionCountSubstringsCaseInsensitiveUTF8 = FunctionsStringSearch< + CountSubstringsImpl>; } diff --git a/src/Functions/h3ResIsClassIII.cpp b/src/Functions/h3IsResClassIII.cpp similarity index 88% rename from src/Functions/h3ResIsClassIII.cpp rename to src/Functions/h3IsResClassIII.cpp index 044e6d565ac..25a2fc1952f 100644 --- a/src/Functions/h3ResIsClassIII.cpp +++ b/src/Functions/h3IsResClassIII.cpp @@ -24,12 +24,12 @@ extern const int ILLEGAL_TYPE_OF_ARGUMENT; namespace { -class FunctionH3ResIsClassIII : public IFunction +class FunctionH3IsResClassIII : public IFunction { public: - static constexpr auto name = "h3ResIsClassIII"; + static constexpr auto name = "h3IsResClassIII"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static FunctionPtr create(ContextPtr) { return std::make_shared(); } std::string getName() const override { return name; } @@ -69,9 +69,9 @@ public: } -void registerFunctionH3ResIsClassIII(FunctionFactory & factory) +void registerFunctionH3IsResClassIII(FunctionFactory & factory) { - factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/hasToken.cpp b/src/Functions/hasToken.cpp index f20edffbdd8..72d6c11a5fa 100644 --- a/src/Functions/hasToken.cpp +++ b/src/Functions/hasToken.cpp @@ -14,7 +14,7 @@ struct NameHasToken static constexpr auto name = "hasToken"; }; -using FunctionHasToken = FunctionsStringSearch, NameHasToken>; +using FunctionHasToken = FunctionsStringSearch>; } diff --git a/src/Functions/hasTokenCaseInsensitive.cpp b/src/Functions/hasTokenCaseInsensitive.cpp index 28f5b9e80c1..a0280bc12a5 100644 --- a/src/Functions/hasTokenCaseInsensitive.cpp +++ b/src/Functions/hasTokenCaseInsensitive.cpp @@ -15,7 +15,7 @@ struct NameHasTokenCaseInsensitive }; using FunctionHasTokenCaseInsensitive - = FunctionsStringSearch, NameHasTokenCaseInsensitive>; + = FunctionsStringSearch>; } diff --git a/src/Functions/ilike.cpp b/src/Functions/ilike.cpp index fc3e38daeba..116c945e04f 100644 --- a/src/Functions/ilike.cpp +++ b/src/Functions/ilike.cpp @@ -12,8 +12,8 @@ struct NameILike static constexpr auto name = "ilike"; }; -using ILikeImpl = MatchImpl; -using FunctionILike = FunctionsStringSearch; +using ILikeImpl = MatchImpl; +using FunctionILike = FunctionsStringSearch; } diff --git a/src/Functions/like.cpp b/src/Functions/like.cpp index 37d9f006187..1ac9a8d7dab 100644 --- a/src/Functions/like.cpp +++ b/src/Functions/like.cpp @@ -13,8 +13,8 @@ struct NameLike static constexpr auto name = "like"; }; -using LikeImpl = MatchImpl; -using FunctionLike = FunctionsStringSearch; +using LikeImpl = MatchImpl; +using FunctionLike = FunctionsStringSearch; } diff --git a/src/Functions/match.cpp b/src/Functions/match.cpp index 3460d54c6b6..31d36577445 100644 --- a/src/Functions/match.cpp +++ b/src/Functions/match.cpp @@ -13,7 +13,7 @@ struct NameMatch static constexpr auto name = "match"; }; -using FunctionMatch = FunctionsStringSearch, NameMatch>; +using FunctionMatch = FunctionsStringSearch>; } diff --git a/src/Functions/multiFuzzyMatchAllIndices.cpp b/src/Functions/multiFuzzyMatchAllIndices.cpp index 8b104e9ed2d..d0121ee3981 100644 --- a/src/Functions/multiFuzzyMatchAllIndices.cpp +++ b/src/Functions/multiFuzzyMatchAllIndices.cpp @@ -14,8 +14,7 @@ struct NameMultiFuzzyMatchAllIndices }; using FunctionMultiFuzzyMatchAllIndices = FunctionsMultiStringFuzzySearch< - MultiMatchAllIndicesImpl, - NameMultiFuzzyMatchAllIndices, + MultiMatchAllIndicesImpl, std::numeric_limits::max()>; } diff --git a/src/Functions/multiFuzzyMatchAny.cpp b/src/Functions/multiFuzzyMatchAny.cpp index 4d0c3470d91..640e93a23b0 100644 --- a/src/Functions/multiFuzzyMatchAny.cpp +++ b/src/Functions/multiFuzzyMatchAny.cpp @@ -14,8 +14,7 @@ struct NameMultiFuzzyMatchAny }; using FunctionMultiFuzzyMatchAny = FunctionsMultiStringFuzzySearch< - MultiMatchAnyImpl, - NameMultiFuzzyMatchAny, + MultiMatchAnyImpl, std::numeric_limits::max()>; } diff --git a/src/Functions/multiFuzzyMatchAnyIndex.cpp b/src/Functions/multiFuzzyMatchAnyIndex.cpp index 1680f413154..f8bad1bc461 100644 --- a/src/Functions/multiFuzzyMatchAnyIndex.cpp +++ b/src/Functions/multiFuzzyMatchAnyIndex.cpp @@ -14,8 +14,7 @@ struct NameMultiFuzzyMatchAnyIndex }; using FunctionMultiFuzzyMatchAnyIndex = FunctionsMultiStringFuzzySearch< - MultiMatchAnyImpl, - NameMultiFuzzyMatchAnyIndex, + MultiMatchAnyImpl, std::numeric_limits::max()>; } diff --git a/src/Functions/multiMatchAllIndices.cpp b/src/Functions/multiMatchAllIndices.cpp index 171fa6baf74..940c9e7e3bf 100644 --- a/src/Functions/multiMatchAllIndices.cpp +++ b/src/Functions/multiMatchAllIndices.cpp @@ -14,8 +14,7 @@ struct NameMultiMatchAllIndices }; using FunctionMultiMatchAllIndices = FunctionsMultiStringSearch< - MultiMatchAllIndicesImpl, - NameMultiMatchAllIndices, + MultiMatchAllIndicesImpl, std::numeric_limits::max()>; } diff --git a/src/Functions/multiMatchAny.cpp b/src/Functions/multiMatchAny.cpp index 146c27e250c..47510e0ecc2 100644 --- a/src/Functions/multiMatchAny.cpp +++ b/src/Functions/multiMatchAny.cpp @@ -14,8 +14,7 @@ struct NameMultiMatchAny }; using FunctionMultiMatchAny = FunctionsMultiStringSearch< - MultiMatchAnyImpl, - NameMultiMatchAny, + MultiMatchAnyImpl, std::numeric_limits::max()>; } diff --git a/src/Functions/multiMatchAnyIndex.cpp b/src/Functions/multiMatchAnyIndex.cpp index c43cd061187..a56d41dc95b 100644 --- a/src/Functions/multiMatchAnyIndex.cpp +++ b/src/Functions/multiMatchAnyIndex.cpp @@ -14,8 +14,7 @@ struct NameMultiMatchAnyIndex }; using FunctionMultiMatchAnyIndex = FunctionsMultiStringSearch< - MultiMatchAnyImpl, - NameMultiMatchAnyIndex, + MultiMatchAnyImpl, std::numeric_limits::max()>; } diff --git a/src/Functions/multiSearchAny.cpp b/src/Functions/multiSearchAny.cpp index 5cd688ac65d..113289b83ed 100644 --- a/src/Functions/multiSearchAny.cpp +++ b/src/Functions/multiSearchAny.cpp @@ -14,7 +14,7 @@ struct NameMultiSearchAny static constexpr auto name = "multiSearchAny"; }; -using FunctionMultiSearch = FunctionsMultiStringSearch, NameMultiSearchAny>; +using FunctionMultiSearch = FunctionsMultiStringSearch>; } diff --git a/src/Functions/multiSearchAnyCaseInsensitive.cpp b/src/Functions/multiSearchAnyCaseInsensitive.cpp index 2358ce64bf8..9bc950c0d3d 100644 --- a/src/Functions/multiSearchAnyCaseInsensitive.cpp +++ b/src/Functions/multiSearchAnyCaseInsensitive.cpp @@ -14,7 +14,7 @@ struct NameMultiSearchAnyCaseInsensitive static constexpr auto name = "multiSearchAnyCaseInsensitive"; }; using FunctionMultiSearchCaseInsensitive - = FunctionsMultiStringSearch, NameMultiSearchAnyCaseInsensitive>; + = FunctionsMultiStringSearch>; } diff --git a/src/Functions/multiSearchAnyCaseInsensitiveUTF8.cpp b/src/Functions/multiSearchAnyCaseInsensitiveUTF8.cpp index f84762d2bb4..c83dc843f78 100644 --- a/src/Functions/multiSearchAnyCaseInsensitiveUTF8.cpp +++ b/src/Functions/multiSearchAnyCaseInsensitiveUTF8.cpp @@ -15,7 +15,7 @@ struct NameMultiSearchAnyCaseInsensitiveUTF8 }; using FunctionMultiSearchCaseInsensitiveUTF8 - = FunctionsMultiStringSearch, NameMultiSearchAnyCaseInsensitiveUTF8>; + = FunctionsMultiStringSearch>; } diff --git a/src/Functions/multiSearchAnyUTF8.cpp b/src/Functions/multiSearchAnyUTF8.cpp index b2c8342ba7f..3f34f70ab51 100644 --- a/src/Functions/multiSearchAnyUTF8.cpp +++ b/src/Functions/multiSearchAnyUTF8.cpp @@ -13,7 +13,7 @@ struct NameMultiSearchAnyUTF8 { static constexpr auto name = "multiSearchAnyUTF8"; }; -using FunctionMultiSearchUTF8 = FunctionsMultiStringSearch, NameMultiSearchAnyUTF8>; +using FunctionMultiSearchUTF8 = FunctionsMultiStringSearch>; } diff --git a/src/Functions/multiSearchFirstIndex.cpp b/src/Functions/multiSearchFirstIndex.cpp index fcbeb552ae1..a96ebed029c 100644 --- a/src/Functions/multiSearchFirstIndex.cpp +++ b/src/Functions/multiSearchFirstIndex.cpp @@ -15,7 +15,7 @@ struct NameMultiSearchFirstIndex }; using FunctionMultiSearchFirstIndex - = FunctionsMultiStringSearch, NameMultiSearchFirstIndex>; + = FunctionsMultiStringSearch>; } diff --git a/src/Functions/multiSearchFirstIndexCaseInsensitive.cpp b/src/Functions/multiSearchFirstIndexCaseInsensitive.cpp index 87483734cf9..cc4869d1200 100644 --- a/src/Functions/multiSearchFirstIndexCaseInsensitive.cpp +++ b/src/Functions/multiSearchFirstIndexCaseInsensitive.cpp @@ -15,7 +15,7 @@ struct NameMultiSearchFirstIndexCaseInsensitive }; using FunctionMultiSearchFirstIndexCaseInsensitive - = FunctionsMultiStringSearch, NameMultiSearchFirstIndexCaseInsensitive>; + = FunctionsMultiStringSearch>; } diff --git a/src/Functions/multiSearchFirstIndexCaseInsensitiveUTF8.cpp b/src/Functions/multiSearchFirstIndexCaseInsensitiveUTF8.cpp index 69e14adb32a..fd95947bc67 100644 --- a/src/Functions/multiSearchFirstIndexCaseInsensitiveUTF8.cpp +++ b/src/Functions/multiSearchFirstIndexCaseInsensitiveUTF8.cpp @@ -15,7 +15,7 @@ struct NameMultiSearchFirstIndexCaseInsensitiveUTF8 }; using FunctionMultiSearchFirstIndexCaseInsensitiveUTF8 - = FunctionsMultiStringSearch, NameMultiSearchFirstIndexCaseInsensitiveUTF8>; + = FunctionsMultiStringSearch>; } diff --git a/src/Functions/multiSearchFirstIndexUTF8.cpp b/src/Functions/multiSearchFirstIndexUTF8.cpp index 699281dad4b..6854201d14d 100644 --- a/src/Functions/multiSearchFirstIndexUTF8.cpp +++ b/src/Functions/multiSearchFirstIndexUTF8.cpp @@ -15,7 +15,7 @@ struct NameMultiSearchFirstIndexUTF8 }; using FunctionMultiSearchFirstIndexUTF8 - = FunctionsMultiStringSearch, NameMultiSearchFirstIndexUTF8>; + = FunctionsMultiStringSearch>; } diff --git a/src/Functions/multiSearchFirstPosition.cpp b/src/Functions/multiSearchFirstPosition.cpp index 003345afde6..4ca1ac35a4d 100644 --- a/src/Functions/multiSearchFirstPosition.cpp +++ b/src/Functions/multiSearchFirstPosition.cpp @@ -15,7 +15,7 @@ struct NameMultiSearchFirstPosition }; using FunctionMultiSearchFirstPosition - = FunctionsMultiStringSearch, NameMultiSearchFirstPosition>; + = FunctionsMultiStringSearch>; } diff --git a/src/Functions/multiSearchFirstPositionCaseInsensitive.cpp b/src/Functions/multiSearchFirstPositionCaseInsensitive.cpp index 7aa1ef991f3..4e356335e98 100644 --- a/src/Functions/multiSearchFirstPositionCaseInsensitive.cpp +++ b/src/Functions/multiSearchFirstPositionCaseInsensitive.cpp @@ -15,7 +15,7 @@ struct NameMultiSearchFirstPositionCaseInsensitive }; using FunctionMultiSearchFirstPositionCaseInsensitive - = FunctionsMultiStringSearch, NameMultiSearchFirstPositionCaseInsensitive>; + = FunctionsMultiStringSearch>; } diff --git a/src/Functions/multiSearchFirstPositionCaseInsensitiveUTF8.cpp b/src/Functions/multiSearchFirstPositionCaseInsensitiveUTF8.cpp index d20ce6c2de3..647fc3a2cc8 100644 --- a/src/Functions/multiSearchFirstPositionCaseInsensitiveUTF8.cpp +++ b/src/Functions/multiSearchFirstPositionCaseInsensitiveUTF8.cpp @@ -15,8 +15,7 @@ struct NameMultiSearchFirstPositionCaseInsensitiveUTF8 }; using FunctionMultiSearchFirstPositionCaseInsensitiveUTF8 = FunctionsMultiStringSearch< - MultiSearchFirstPositionImpl, - NameMultiSearchFirstPositionCaseInsensitiveUTF8>; + MultiSearchFirstPositionImpl>; } diff --git a/src/Functions/multiSearchFirstPositionUTF8.cpp b/src/Functions/multiSearchFirstPositionUTF8.cpp index c0739808f7b..fbb1099ec35 100644 --- a/src/Functions/multiSearchFirstPositionUTF8.cpp +++ b/src/Functions/multiSearchFirstPositionUTF8.cpp @@ -15,7 +15,7 @@ struct NameMultiSearchFirstPositionUTF8 }; using FunctionMultiSearchFirstPositionUTF8 - = FunctionsMultiStringSearch, NameMultiSearchFirstPositionUTF8>; + = FunctionsMultiStringSearch>; } diff --git a/src/Functions/notILike.cpp b/src/Functions/notILike.cpp index 1fc0ab65ea8..be40e2b989e 100644 --- a/src/Functions/notILike.cpp +++ b/src/Functions/notILike.cpp @@ -12,8 +12,8 @@ struct NameNotILike static constexpr auto name = "notILike"; }; -using NotILikeImpl = MatchImpl; -using FunctionNotILike = FunctionsStringSearch; +using NotILikeImpl = MatchImpl; +using FunctionNotILike = FunctionsStringSearch; } diff --git a/src/Functions/notLike.cpp b/src/Functions/notLike.cpp index 54e2c7481f0..7c4ea8ab2dc 100644 --- a/src/Functions/notLike.cpp +++ b/src/Functions/notLike.cpp @@ -12,7 +12,7 @@ struct NameNotLike static constexpr auto name = "notLike"; }; -using FunctionNotLike = FunctionsStringSearch, NameNotLike>; +using FunctionNotLike = FunctionsStringSearch>; } diff --git a/src/Functions/position.cpp b/src/Functions/position.cpp index 5b8af16fef1..e38dc52b9af 100644 --- a/src/Functions/position.cpp +++ b/src/Functions/position.cpp @@ -13,7 +13,7 @@ struct NamePosition static constexpr auto name = "position"; }; -using FunctionPosition = FunctionsStringSearch, NamePosition>; +using FunctionPosition = FunctionsStringSearch>; } diff --git a/src/Functions/positionCaseInsensitive.cpp b/src/Functions/positionCaseInsensitive.cpp index f72766a1875..ed9d86c033c 100644 --- a/src/Functions/positionCaseInsensitive.cpp +++ b/src/Functions/positionCaseInsensitive.cpp @@ -13,7 +13,7 @@ struct NamePositionCaseInsensitive static constexpr auto name = "positionCaseInsensitive"; }; -using FunctionPositionCaseInsensitive = FunctionsStringSearch, NamePositionCaseInsensitive>; +using FunctionPositionCaseInsensitive = FunctionsStringSearch>; } diff --git a/src/Functions/positionCaseInsensitiveUTF8.cpp b/src/Functions/positionCaseInsensitiveUTF8.cpp index 0af545cc6a3..f6e344b119e 100644 --- a/src/Functions/positionCaseInsensitiveUTF8.cpp +++ b/src/Functions/positionCaseInsensitiveUTF8.cpp @@ -14,7 +14,7 @@ struct NamePositionCaseInsensitiveUTF8 }; using FunctionPositionCaseInsensitiveUTF8 - = FunctionsStringSearch, NamePositionCaseInsensitiveUTF8>; + = FunctionsStringSearch>; } diff --git a/src/Functions/positionUTF8.cpp b/src/Functions/positionUTF8.cpp index 68b2f5c274e..ecb2a1e9e97 100644 --- a/src/Functions/positionUTF8.cpp +++ b/src/Functions/positionUTF8.cpp @@ -13,7 +13,7 @@ struct NamePositionUTF8 static constexpr auto name = "positionUTF8"; }; -using FunctionPositionUTF8 = FunctionsStringSearch, NamePositionUTF8>; +using FunctionPositionUTF8 = FunctionsStringSearch>; } diff --git a/src/Functions/registerFunctionsGeo.cpp b/src/Functions/registerFunctionsGeo.cpp index a86ff18ec65..a0ae38f6b85 100644 --- a/src/Functions/registerFunctionsGeo.cpp +++ b/src/Functions/registerFunctionsGeo.cpp @@ -42,7 +42,7 @@ void registerFunctionH3IndexesAreNeighbors(FunctionFactory &); void registerFunctionStringToH3(FunctionFactory &); void registerFunctionH3ToString(FunctionFactory &); void registerFunctionH3HexAreaM2(FunctionFactory &); -void registerFunctionH3ResIsClassIII(FunctionFactory &); +void registerFunctionH3IsResClassIII(FunctionFactory &); void registerFunctionH3IsPentagon(FunctionFactory &); void registerFunctionH3GetFaces(FunctionFactory &); #endif @@ -98,7 +98,7 @@ void registerFunctionsGeo(FunctionFactory & factory) registerFunctionStringToH3(factory); registerFunctionH3ToString(factory); registerFunctionH3HexAreaM2(factory); - registerFunctionH3ResIsClassIII(factory); + registerFunctionH3IsResClassIII(factory); registerFunctionH3IsPentagon(factory); registerFunctionH3GetFaces(factory); #endif diff --git a/src/Functions/visitParamExtractBool.cpp b/src/Functions/visitParamExtractBool.cpp index 059115b5b13..48fb78ba9b6 100644 --- a/src/Functions/visitParamExtractBool.cpp +++ b/src/Functions/visitParamExtractBool.cpp @@ -17,10 +17,10 @@ struct ExtractBool }; struct NameVisitParamExtractBool { static constexpr auto name = "visitParamExtractBool"; }; -using FunctionVisitParamExtractBool = FunctionsStringSearch, NameVisitParamExtractBool>; +using FunctionVisitParamExtractBool = FunctionsStringSearch>; struct NameSimpleJSONExtractBool { static constexpr auto name = "simpleJSONExtractBool"; }; -using FunctionSimpleJSONExtractBool = FunctionsStringSearch, NameSimpleJSONExtractBool>; +using FunctionSimpleJSONExtractBool = FunctionsStringSearch>; void registerFunctionVisitParamExtractBool(FunctionFactory & factory) { diff --git a/src/Functions/visitParamExtractFloat.cpp b/src/Functions/visitParamExtractFloat.cpp index 7a55cff365c..e7967b6de2c 100644 --- a/src/Functions/visitParamExtractFloat.cpp +++ b/src/Functions/visitParamExtractFloat.cpp @@ -7,10 +7,10 @@ namespace DB { struct NameVisitParamExtractFloat { static constexpr auto name = "visitParamExtractFloat"; }; -using FunctionVisitParamExtractFloat = FunctionsStringSearch>, NameVisitParamExtractFloat>; +using FunctionVisitParamExtractFloat = FunctionsStringSearch>>; struct NameSimpleJSONExtractFloat { static constexpr auto name = "simpleJSONExtractFloat"; }; -using FunctionSimpleJSONExtractFloat = FunctionsStringSearch>, NameSimpleJSONExtractFloat>; +using FunctionSimpleJSONExtractFloat = FunctionsStringSearch>>; void registerFunctionVisitParamExtractFloat(FunctionFactory & factory) { diff --git a/src/Functions/visitParamExtractInt.cpp b/src/Functions/visitParamExtractInt.cpp index 7c2188c10fc..b7f1050972c 100644 --- a/src/Functions/visitParamExtractInt.cpp +++ b/src/Functions/visitParamExtractInt.cpp @@ -7,10 +7,10 @@ namespace DB { struct NameVisitParamExtractInt { static constexpr auto name = "visitParamExtractInt"; }; -using FunctionVisitParamExtractInt = FunctionsStringSearch>, NameVisitParamExtractInt>; +using FunctionVisitParamExtractInt = FunctionsStringSearch>>; struct NameSimpleJSONExtractInt { static constexpr auto name = "simpleJSONExtractInt"; }; -using FunctionSimpleJSONExtractInt = FunctionsStringSearch>, NameSimpleJSONExtractInt>; +using FunctionSimpleJSONExtractInt = FunctionsStringSearch>>; void registerFunctionVisitParamExtractInt(FunctionFactory & factory) { diff --git a/src/Functions/visitParamExtractUInt.cpp b/src/Functions/visitParamExtractUInt.cpp index f5466a63b0d..d89b796263e 100644 --- a/src/Functions/visitParamExtractUInt.cpp +++ b/src/Functions/visitParamExtractUInt.cpp @@ -7,10 +7,10 @@ namespace DB { struct NameVisitParamExtractUInt { static constexpr auto name = "visitParamExtractUInt"; }; -using FunctionVisitParamExtractUInt = FunctionsStringSearch>, NameVisitParamExtractUInt>; +using FunctionVisitParamExtractUInt = FunctionsStringSearch>>; struct NameSimpleJSONExtractUInt { static constexpr auto name = "simpleJSONExtractUInt"; }; -using FunctionSimpleJSONExtractUInt = FunctionsStringSearch>, NameSimpleJSONExtractUInt>; +using FunctionSimpleJSONExtractUInt = FunctionsStringSearch>>; void registerFunctionVisitParamExtractUInt(FunctionFactory & factory) diff --git a/src/Functions/visitParamHas.cpp b/src/Functions/visitParamHas.cpp index f4f377f9e8f..71d69ef5768 100644 --- a/src/Functions/visitParamHas.cpp +++ b/src/Functions/visitParamHas.cpp @@ -17,10 +17,10 @@ struct HasParam }; struct NameVisitParamHas { static constexpr auto name = "visitParamHas"; }; -using FunctionVisitParamHas = FunctionsStringSearch, NameVisitParamHas>; +using FunctionVisitParamHas = FunctionsStringSearch>; struct NameSimpleJSONHas { static constexpr auto name = "simpleJSONHas"; }; -using FunctionSimpleJSONHas = FunctionsStringSearch, NameSimpleJSONHas>; +using FunctionSimpleJSONHas = FunctionsStringSearch>; void registerFunctionVisitParamHas(FunctionFactory & factory) { diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 953eb68cff7..e358696fa40 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -26,6 +26,8 @@ #include #include +#include + #include #include #include diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index da41eb82d5e..c8608e98351 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -7,7 +7,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -367,11 +369,11 @@ try insert_context->makeQueryContext(); insert_context->setSettings(key.settings); - InterpreterInsertQuery interpreter(key.query, insert_context, key.settings.insert_allow_materialized_columns); - auto sinks = interpreter.getSinks(); - assert(sinks.size() == 1); + InterpreterInsertQuery interpreter(key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true); + auto pipeline = interpreter.execute().pipeline; + assert(pipeline.pushing()); - auto header = sinks.at(0)->getInputs().front().getHeader(); + auto header = pipeline.getHeader(); auto format = getInputFormatFromASTInsertQuery(key.query, false, header, insert_context, nullptr); size_t total_rows = 0; @@ -413,15 +415,10 @@ try size_t total_bytes = chunk.bytes(); auto source = std::make_shared(header, std::move(chunk)); - Pipe pipe(source); + pipeline.complete(Pipe(std::move(source))); - QueryPipeline out_pipeline; - out_pipeline.init(std::move(pipe)); - out_pipeline.resize(1); - out_pipeline.setSinks([&](const Block &, Pipe::StreamType) { return sinks.at(0); }); - - auto out_executor = out_pipeline.execute(); - out_executor->execute(out_pipeline.getNumThreads()); + CompletedPipelineExecutor completed_executor(pipeline); + completed_executor.execute(); LOG_INFO(log, "Flushed {} rows, {} bytes for query '{}'", total_rows, total_bytes, queryToString(key.query)); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 1dd63695ad4..d1bd345f7ea 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -55,6 +55,7 @@ #include #include +#include #include namespace DB diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 99197e81f80..70f7c0c0359 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -15,7 +16,7 @@ #include #include #include -#include +#include #include #include @@ -158,13 +159,9 @@ public: auto external_table = external_storage_holder->getTable(); auto table_out = external_table->write({}, external_table->getInMemoryMetadataPtr(), getContext()); auto io = interpreter->execute(); - io.pipeline.resize(1); - io.pipeline.setSinks([&](const Block &, Pipe::StreamType) -> ProcessorPtr - { - return table_out; - }); - auto executor = io.pipeline.execute(); - executor->execute(io.pipeline.getNumThreads()); + io.pipeline.complete(std::move(table_out)); + CompletedPipelineExecutor executor(io.pipeline); + executor.execute(); } else { diff --git a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp index 7233ab332dd..55c007e2713 100644 --- a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp +++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp @@ -1,5 +1,8 @@ #include #include +#include +#include +#include namespace DB { @@ -9,4 +12,15 @@ void IInterpreterUnionOrSelectQuery::extendQueryLogElemImpl(QueryLogElement & el elem.query_kind = "Select"; } + +QueryPipelineBuilder IInterpreterUnionOrSelectQuery::buildQueryPipeline() +{ + QueryPlan query_plan; + + buildQueryPlan(query_plan); + + return std::move(*query_plan.buildQueryPipeline( + QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context))); +} + } diff --git a/src/Interpreters/IInterpreterUnionOrSelectQuery.h b/src/Interpreters/IInterpreterUnionOrSelectQuery.h index cc960e748f6..db9cc086e35 100644 --- a/src/Interpreters/IInterpreterUnionOrSelectQuery.h +++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.h @@ -28,6 +28,7 @@ public: } virtual void buildQueryPlan(QueryPlan & query_plan) = 0; + QueryPipelineBuilder buildQueryPipeline(); virtual void ignoreWithTotals() = 0; diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp index 098bf033399..1112b5c3bda 100644 --- a/src/Interpreters/IdentifierSemantic.cpp +++ b/src/Interpreters/IdentifierSemantic.cpp @@ -162,7 +162,7 @@ IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const { /// database.table.column if (doesIdentifierBelongTo(identifier, db_and_table.database, db_and_table.table)) - return ColumnMatch::DbAndTable; + return ColumnMatch::DBAndTable; /// alias.column if (doesIdentifierBelongTo(identifier, db_and_table.alias)) @@ -199,7 +199,7 @@ void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, const Da case ColumnMatch::TableAlias: to_strip = 1; break; - case ColumnMatch::DbAndTable: + case ColumnMatch::DBAndTable: to_strip = 2; break; default: diff --git a/src/Interpreters/IdentifierSemantic.h b/src/Interpreters/IdentifierSemantic.h index b36c1ad00dd..cabe755027c 100644 --- a/src/Interpreters/IdentifierSemantic.h +++ b/src/Interpreters/IdentifierSemantic.h @@ -31,7 +31,7 @@ struct IdentifierSemantic ColumnName, /// column qualified with column names list AliasedTableName, /// column qualified with table name (but table has an alias so its priority is lower than TableName) TableName, /// column qualified with table name - DbAndTable, /// column qualified with database and table name + DBAndTable, /// column qualified with database and table name TableAlias, /// column qualified with table alias Ambiguous, }; diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 6595e1c02be..1c613758ecc 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -136,7 +136,7 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) table->checkAlterPartitionIsPossible(partition_commands, metadata_snapshot, getContext()->getSettingsRef()); auto partition_commands_pipe = table->alterPartition(metadata_snapshot, partition_commands, getContext()); if (!partition_commands_pipe.empty()) - res.pipeline.init(std::move(partition_commands_pipe)); + res.pipeline = QueryPipeline(std::move(partition_commands_pipe)); } if (!live_view_commands.empty()) @@ -411,6 +411,11 @@ AccessRightsElements InterpreterAlterQuery::getRequiredAccessForCommand(const AS break; } case ASTAlterCommand::NO_TYPE: break; + case ASTAlterCommand::MODIFY_COMMENT: + { + required_access.emplace_back(AccessType::ALTER_MODIFY_COMMENT, database, table); + break; + } } return required_access; diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index e8a4f884dd0..9fd318ee4cf 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include @@ -72,7 +72,7 @@ BlockIO InterpreterCheckQuery::execute() } BlockIO res; - res.in = std::make_shared(block); + res.pipeline = QueryPipeline(std::make_shared(std::move(block))); return res; } diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp index 74d8c9cf980..b7ed3a2fe75 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.cpp +++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp @@ -8,7 +8,7 @@ #include #include #include - +#include namespace DB { diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 5fd9c2539eb..2ebae17cd6b 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -15,7 +16,6 @@ #include #include #include -#include namespace DB @@ -157,7 +157,7 @@ BlockIO InterpreterDescribeQuery::execute() BlockIO res; size_t num_rows = res_columns[0]->size(); auto source = std::make_shared(sample_block, Chunk(std::move(res_columns), num_rows)); - res.pipeline.init(Pipe(std::move(source))); + res.pipeline = QueryPipeline(std::move(source)); return res; } diff --git a/src/Interpreters/InterpreterExistsQuery.cpp b/src/Interpreters/InterpreterExistsQuery.cpp index 9e254db4bb3..6eb188bce9f 100644 --- a/src/Interpreters/InterpreterExistsQuery.cpp +++ b/src/Interpreters/InterpreterExistsQuery.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -21,7 +21,7 @@ namespace ErrorCodes BlockIO InterpreterExistsQuery::execute() { BlockIO res; - res.in = executeImpl(); + res.pipeline = executeImpl(); return res; } @@ -35,7 +35,7 @@ Block InterpreterExistsQuery::getSampleBlock() } -BlockInputStreamPtr InterpreterExistsQuery::executeImpl() +QueryPipeline InterpreterExistsQuery::executeImpl() { ASTQueryWithTableAndOutput * exists_query; bool result = false; @@ -76,10 +76,10 @@ BlockInputStreamPtr InterpreterExistsQuery::executeImpl() result = DatabaseCatalog::instance().isDictionaryExist({database, exists_query->table}); } - return std::make_shared(Block{{ + return QueryPipeline(std::make_shared(Block{{ ColumnUInt8::create(1, result), std::make_shared(), - "result" }}); + "result" }})); } } diff --git a/src/Interpreters/InterpreterExistsQuery.h b/src/Interpreters/InterpreterExistsQuery.h index efc664f07c3..5ef940bdd3f 100644 --- a/src/Interpreters/InterpreterExistsQuery.h +++ b/src/Interpreters/InterpreterExistsQuery.h @@ -21,7 +21,7 @@ public: private: ASTPtr query_ptr; - BlockInputStreamPtr executeImpl(); + QueryPipeline executeImpl(); }; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index fc47f42d266..c29eace1b55 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include #include #include #include @@ -73,7 +73,7 @@ namespace BlockIO InterpreterExplainQuery::execute() { BlockIO res; - res.in = executeImpl(); + res.pipeline = executeImpl(); return res; } @@ -240,7 +240,7 @@ ExplainSettings checkAndGetSettings(const ASTPtr & ast_settings) } -BlockInputStreamPtr InterpreterExplainQuery::executeImpl() +QueryPipeline InterpreterExplainQuery::executeImpl() { const auto & ast = query->as(); @@ -304,33 +304,41 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl() } else if (ast.getKind() == ASTExplainQuery::QueryPipeline) { - if (!dynamic_cast(ast.getExplainedQuery().get())) - throw Exception("Only SELECT is supported for EXPLAIN query", ErrorCodes::INCORRECT_QUERY); - - auto settings = checkAndGetSettings(ast.getSettings()); - QueryPlan plan; - - InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), SelectQueryOptions()); - interpreter.buildQueryPlan(plan); - auto pipeline = plan.buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(getContext()), - BuildQueryPipelineSettings::fromContext(getContext())); - - if (settings.graph) + if (dynamic_cast(ast.getExplainedQuery().get())) { - /// Pipe holds QueryPlan, should not go out-of-scope - auto pipe = QueryPipeline::getPipe(std::move(*pipeline)); - const auto & processors = pipe.getProcessors(); + auto settings = checkAndGetSettings(ast.getSettings()); + QueryPlan plan; - if (settings.compact) - printPipelineCompact(processors, buf, settings.query_pipeline_options.header); + InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), SelectQueryOptions()); + interpreter.buildQueryPlan(plan); + auto pipeline = plan.buildQueryPipeline( + QueryPlanOptimizationSettings::fromContext(getContext()), + BuildQueryPipelineSettings::fromContext(getContext())); + + if (settings.graph) + { + /// Pipe holds QueryPlan, should not go out-of-scope + auto pipe = QueryPipelineBuilder::getPipe(std::move(*pipeline)); + const auto & processors = pipe.getProcessors(); + + if (settings.compact) + printPipelineCompact(processors, buf, settings.query_pipeline_options.header); + else + printPipeline(processors, buf); + } else - printPipeline(processors, buf); + { + plan.explainPipeline(buf, settings.query_pipeline_options); + } + } + else if (dynamic_cast(ast.getExplainedQuery().get())) + { + InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext()); + auto io = insert.execute(); + printPipeline(io.pipeline.getProcessors(), buf); } else - { - plan.explainPipeline(buf, settings.query_pipeline_options); - } + throw Exception("Only SELECT and INSERT is supported for EXPLAIN PIPELINE query", ErrorCodes::INCORRECT_QUERY); } else if (ast.getKind() == ASTExplainQuery::QueryEstimates) { @@ -359,7 +367,7 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl() fillColumn(*res_columns[0], buf.str()); } - return std::make_shared(sample_block.cloneWithColumns(std::move(res_columns))); + return QueryPipeline(std::make_shared(sample_block.cloneWithColumns(std::move(res_columns)))); } } diff --git a/src/Interpreters/InterpreterExplainQuery.h b/src/Interpreters/InterpreterExplainQuery.h index a7f54a10e3e..a640b1c977c 100644 --- a/src/Interpreters/InterpreterExplainQuery.h +++ b/src/Interpreters/InterpreterExplainQuery.h @@ -15,12 +15,12 @@ public: BlockIO execute() override; - static Block getSampleBlock(const ASTExplainQuery::ExplainKind kind); + static Block getSampleBlock(ASTExplainQuery::ExplainKind kind); private: ASTPtr query; - BlockInputStreamPtr executeImpl(); + QueryPipeline executeImpl(); }; diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 3dd42d92e21..85a9ccb376e 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -1,14 +1,12 @@ #include #include -#include #include #include #include #include #include #include -#include #include #include #include @@ -17,9 +15,10 @@ #include #include #include -#include #include +#include #include +#include #include #include #include @@ -28,6 +27,7 @@ #include #include #include +#include #include #include @@ -44,16 +44,18 @@ namespace ErrorCodes } InterpreterInsertQuery::InterpreterInsertQuery( - const ASTPtr & query_ptr_, ContextPtr context_, bool allow_materialized_, bool no_squash_, bool no_destination_) + const ASTPtr & query_ptr_, ContextPtr context_, bool allow_materialized_, bool no_squash_, bool no_destination_, bool async_insert_) : WithContext(context_) , query_ptr(query_ptr_) , allow_materialized(allow_materialized_) , no_squash(no_squash_) , no_destination(no_destination_) + , async_insert(async_insert_) { checkStackSize(); } + StoragePtr InterpreterInsertQuery::getTable(ASTInsertQuery & query) { if (query.table_function) @@ -72,26 +74,37 @@ Block InterpreterInsertQuery::getSampleBlock( const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot) const { - Block table_sample_non_materialized = metadata_snapshot->getSampleBlockNonMaterialized(); /// If the query does not include information about columns if (!query.columns) { if (no_destination) return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtuals()); else - return table_sample_non_materialized; + return metadata_snapshot->getSampleBlockNonMaterialized(); } - Block table_sample = metadata_snapshot->getSampleBlock(); - - const auto columns_ast = processColumnTransformers(getContext()->getCurrentDatabase(), table, metadata_snapshot, query.columns); - /// Form the block based on the column names from the query - Block res; + Names names; + const auto columns_ast = processColumnTransformers(getContext()->getCurrentDatabase(), table, metadata_snapshot, query.columns); for (const auto & identifier : columns_ast->children) { std::string current_name = identifier->getColumnName(); + names.emplace_back(std::move(current_name)); + } + return getSampleBlock(names, table, metadata_snapshot); +} + +Block InterpreterInsertQuery::getSampleBlock( + const Names & names, + const StoragePtr & table, + const StorageMetadataPtr & metadata_snapshot) const +{ + Block table_sample = metadata_snapshot->getSampleBlock(); + Block table_sample_non_materialized = metadata_snapshot->getSampleBlockNonMaterialized(); + Block res; + for (const auto & current_name : names) + { /// The table does not have a column with that name if (!table_sample.has(current_name)) throw Exception("No such column " + current_name + " in table " + table->getStorageID().getNameForLogs(), @@ -146,33 +159,125 @@ static bool isTrivialSelect(const ASTPtr & select) return false; }; - -std::pair InterpreterInsertQuery::executeImpl( - const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, Block & sample_block) +Chain InterpreterInsertQuery::buildChain( + const StoragePtr & table, + const StorageMetadataPtr & metadata_snapshot, + const Names & columns, + ThreadStatus * thread_status, + std::atomic_uint64_t * elapsed_counter_ms) { - const auto & settings = getContext()->getSettingsRef(); - const auto & query = query_ptr->as(); + auto sample = getSampleBlock(columns, table, metadata_snapshot); + return buildChainImpl(table, metadata_snapshot, std::move(sample) , thread_status, elapsed_counter_ms); +} +Chain InterpreterInsertQuery::buildChainImpl( + const StoragePtr & table, + const StorageMetadataPtr & metadata_snapshot, + const Block & query_sample_block, + ThreadStatus * thread_status, + std::atomic_uint64_t * elapsed_counter_ms) +{ + auto context_ptr = getContext(); + const ASTInsertQuery * query = nullptr; + if (query_ptr) + query = query_ptr->as(); + + const Settings & settings = context_ptr->getSettingsRef(); + bool null_as_default = query && query->select && context_ptr->getSettingsRef().insert_null_as_default; + + /// We create a pipeline of several streams, into which we will write data. + Chain out; + + /// NOTE: we explicitly ignore bound materialized views when inserting into Kafka Storage. + /// Otherwise we'll get duplicates when MV reads same rows again from Kafka. + if (table->noPushingToViews() && !no_destination) + { + auto sink = table->write(query_ptr, metadata_snapshot, context_ptr); + sink->setRuntimeData(thread_status, elapsed_counter_ms); + out.addSource(std::move(sink)); + } + else + { + out = buildPushingToViewsChain(table, metadata_snapshot, context_ptr, query_ptr, no_destination, thread_status, elapsed_counter_ms); + } + + /// Note that we wrap transforms one on top of another, so we write them in reverse of data processing order. + + /// Checking constraints. It must be done after calculation of all defaults, so we can check them on calculated columns. + if (const auto & constraints = metadata_snapshot->getConstraints(); !constraints.empty()) + out.addSource(std::make_shared( + table->getStorageID(), out.getInputHeader(), metadata_snapshot->getConstraints(), context_ptr)); + + auto adding_missing_defaults_dag = addMissingDefaults( + query_sample_block, + out.getInputHeader().getNamesAndTypesList(), + metadata_snapshot->getColumns(), + context_ptr, + null_as_default); + + auto adding_missing_defaults_actions = std::make_shared(adding_missing_defaults_dag); + + /// Actually we don't know structure of input blocks from query/table, + /// because some clients break insertion protocol (columns != header) + out.addSource(std::make_shared(query_sample_block, adding_missing_defaults_actions)); + + /// It's important to squash blocks as early as possible (before other transforms), + /// because other transforms may work inefficient if block size is small. + + /// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side. + /// Client-side bufferization might cause excessive timeouts (especially in case of big blocks). + if (!(settings.insert_distributed_sync && table->isRemote()) && !no_squash && !(query && query->watch)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + out.addSource(std::make_shared( + out.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0)); + } + + auto counting = std::make_shared(out.getInputHeader(), thread_status); + counting->setProcessListElement(context_ptr->getProcessListElement()); + out.addSource(std::move(counting)); + + return out; +} + +BlockIO InterpreterInsertQuery::execute() +{ + const Settings & settings = getContext()->getSettingsRef(); + auto & query = query_ptr->as(); + + QueryPipelineBuilder pipeline; + + StoragePtr table = getTable(query); if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); - BlockIO res; - BlockOutputStreams out_streams; + auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout); + auto metadata_snapshot = table->getInMemoryMetadataPtr(); + + auto query_sample_block = getSampleBlock(query, table, metadata_snapshot); + if (!query.table_function) + getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames()); bool is_distributed_insert_select = false; + if (query.select && table->isRemote() && settings.parallel_distributed_insert_select) { // Distributed INSERT SELECT if (auto maybe_pipeline = table->distributedWrite(query, getContext())) { - res.pipeline = std::move(*maybe_pipeline); + pipeline = std::move(*maybe_pipeline); is_distributed_insert_select = true; } } + std::vector out_chains; if (!is_distributed_insert_select || query.watch) { size_t out_streams_size = 1; + if (query.select) { bool is_trivial_insert_select = false; @@ -216,28 +321,28 @@ std::pair InterpreterInsertQuery::executeImpl( InterpreterSelectWithUnionQuery interpreter_select{ query.select, new_context, SelectQueryOptions(QueryProcessingStage::Complete, 1)}; - res = interpreter_select.execute(); + pipeline = interpreter_select.buildQueryPipeline(); } else { /// Passing 1 as subquery_depth will disable limiting size of intermediate result. InterpreterSelectWithUnionQuery interpreter_select{ query.select, getContext(), SelectQueryOptions(QueryProcessingStage::Complete, 1)}; - res = interpreter_select.execute(); + pipeline = interpreter_select.buildQueryPipeline(); } - res.pipeline.dropTotalsAndExtremes(); + pipeline.dropTotalsAndExtremes(); if (table->supportsParallelInsert() && settings.max_insert_threads > 1) - out_streams_size = std::min(size_t(settings.max_insert_threads), res.pipeline.getNumStreams()); + out_streams_size = std::min(size_t(settings.max_insert_threads), pipeline.getNumStreams()); - res.pipeline.resize(out_streams_size); + pipeline.resize(out_streams_size); /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. if (getContext()->getSettingsRef().insert_null_as_default) { - const auto & input_columns = res.pipeline.getHeader().getColumnsWithTypeAndName(); - const auto & query_columns = sample_block.getColumnsWithTypeAndName(); + const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); + const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); const auto & output_columns = metadata_snapshot->getColumns(); if (input_columns.size() == query_columns.size()) @@ -247,7 +352,7 @@ std::pair InterpreterInsertQuery::executeImpl( /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with /// default column values (in AddingDefaultBlockOutputStream), so all values will be cast correctly. if (input_columns[col_idx].type->isNullable() && !query_columns[col_idx].type->isNullable() && output_columns.hasDefault(query_columns[col_idx].name)) - sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullable(query_columns[col_idx].column), makeNullable(query_columns[col_idx].type), query_columns[col_idx].name)); + query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullable(query_columns[col_idx].column), makeNullable(query_columns[col_idx].type), query_columns[col_idx].name)); } } } @@ -255,128 +360,17 @@ std::pair InterpreterInsertQuery::executeImpl( else if (query.watch) { InterpreterWatchQuery interpreter_watch{ query.watch, getContext() }; - res = interpreter_watch.execute(); + pipeline = interpreter_watch.buildQueryPipeline(); } for (size_t i = 0; i < out_streams_size; i++) { - /// We create a pipeline of several streams, into which we will write data. - BlockOutputStreamPtr out; - - /// NOTE: we explicitly ignore bound materialized views when inserting into Kafka Storage. - /// Otherwise we'll get duplicates when MV reads same rows again from Kafka. - if (table->noPushingToViews() && !no_destination) - out = std::make_shared(table->write(query_ptr, metadata_snapshot, getContext())); - else - out = std::make_shared(table, metadata_snapshot, getContext(), query_ptr, no_destination); - - /// Note that we wrap transforms one on top of another, so we write them in reverse of data processing order. - - /// Checking constraints. It must be done after calculation of all defaults, so we can check them on calculated columns. - if (const auto & constraints = metadata_snapshot->getConstraints(); !constraints.empty()) - out = std::make_shared( - query.table_id, out, out->getHeader(), metadata_snapshot->getConstraints(), getContext()); - - bool null_as_default = query.select && getContext()->getSettingsRef().insert_null_as_default; - - /// Actually we don't know structure of input blocks from query/table, - /// because some clients break insertion protocol (columns != header) - out = std::make_shared( - out, sample_block, metadata_snapshot->getColumns(), getContext(), null_as_default); - - /// It's important to squash blocks as early as possible (before other transforms), - /// because other transforms may work inefficient if block size is small. - - /// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side. - /// Client-side bufferization might cause excessive timeouts (especially in case of big blocks). - if (!(settings.insert_distributed_sync && table->isRemote()) && !no_squash && !query.watch) - { - bool table_prefers_large_blocks = table->prefersLargeBlocks(); - - out = std::make_shared( - out, - out->getHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0); - } - - auto out_wrapper = std::make_shared(out); - out_wrapper->setProcessListElement(getContext()->getProcessListElement()); - out_streams.emplace_back(std::move(out_wrapper)); + auto out = buildChainImpl(table, metadata_snapshot, query_sample_block, nullptr, nullptr); + out_chains.emplace_back(std::move(out)); } } - return {std::move(res), std::move(out_streams)}; -} - -BlockIO InterpreterInsertQuery::execute() -{ - const auto & settings = getContext()->getSettingsRef(); - auto & query = query_ptr->as(); - - auto table = getTable(query); - auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout); - auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto sample_block = getSampleBlock(query, table, metadata_snapshot); - - if (!query.table_function) - getContext()->checkAccess(AccessType::INSERT, query.table_id, sample_block.getNames()); - BlockIO res; - BlockOutputStreams out_streams; - std::tie(res, out_streams) = executeImpl(table, metadata_snapshot, sample_block); - - /// What type of query: INSERT or INSERT SELECT or INSERT WATCH? - if (out_streams.empty()) - { - /// Pipeline was already built. - } - else if (query.select || query.watch) - { - /// XXX: is this branch also triggered for select+input() case? - - const auto & header = out_streams.at(0)->getHeader(); - auto actions_dag = ActionsDAG::makeConvertingActions( - res.pipeline.getHeader().getColumnsWithTypeAndName(), - header.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position); - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); - - res.pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header, actions); - }); - - res.pipeline.setSinks([&](const Block &, QueryPipeline::StreamType type) -> ProcessorPtr - { - if (type != QueryPipeline::StreamType::Main) - return nullptr; - - auto stream = std::move(out_streams.back()); - out_streams.pop_back(); - - return std::make_shared(std::move(stream)); - }); - - if (!allow_materialized) - { - for (const auto & column : metadata_snapshot->getColumns()) - if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name)) - throw Exception("Cannot insert column " + column.name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN); - } - } - else if (query.hasInlinedData()) - { - auto pipe = getSourceFromASTInsertQuery(query_ptr, true, sample_block, getContext(), nullptr); - res.pipeline.init(std::move(pipe)); - res.pipeline.resize(1); - res.pipeline.setSinks([&](const Block &, Pipe::StreamType) - { - return std::make_shared(out_streams.at(0)); - }); - } - else - res.out = std::move(out_streams.at(0)); res.pipeline.addStorageHolder(table); if (const auto * mv = dynamic_cast(table.get())) @@ -385,31 +379,63 @@ BlockIO InterpreterInsertQuery::execute() res.pipeline.addStorageHolder(inner_table); } + /// What type of query: INSERT or INSERT SELECT or INSERT WATCH? + if (is_distributed_insert_select) + { + res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); + } + else if (query.select || query.watch) + { + const auto & header = out_chains.at(0).getInputHeader(); + auto actions_dag = ActionsDAG::makeConvertingActions( + pipeline.getHeader().getColumnsWithTypeAndName(), + header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header, actions); + }); + + auto num_select_threads = pipeline.getNumThreads(); + + pipeline.addChains(std::move(out_chains)); + + pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr + { + return std::make_shared(cur_header); + }); + + /// Don't use more threads for insert then for select to reduce memory consumption. + if (!settings.parallel_view_processing && pipeline.getNumThreads() > num_select_threads) + pipeline.setMaxThreads(num_select_threads); + + if (!allow_materialized) + { + for (const auto & column : metadata_snapshot->getColumns()) + if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name)) + throw Exception("Cannot insert column " + column.name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN); + } + + res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); + } + else + { + res.pipeline = QueryPipeline(std::move(out_chains.at(0))); + res.pipeline.setNumThreads(std::min(res.pipeline.getNumThreads(), settings.max_threads)); + + if (query.hasInlinedData() && !async_insert) + { + /// can execute without additional data + auto pipe = getSourceFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); + res.pipeline.complete(std::move(pipe)); + } + } + return res; } -Processors InterpreterInsertQuery::getSinks() -{ - const auto & settings = getContext()->getSettingsRef(); - auto & query = query_ptr->as(); - - auto table = getTable(query); - auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout); - auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto sample_block = getSampleBlock(query, table, metadata_snapshot); - - if (!query.table_function) - getContext()->checkAccess(AccessType::INSERT, query.table_id, sample_block.getNames()); - - auto out_streams = executeImpl(table, metadata_snapshot, sample_block).second; - - Processors sinks; - sinks.reserve(out_streams.size()); - for (const auto & out : out_streams) - sinks.emplace_back(std::make_shared(out)); - - return sinks; -} StorageID InterpreterInsertQuery::getDatabaseTable() const { diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index 0ccf931db35..0d6fe34c0c2 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -5,11 +5,12 @@ #include #include #include -#include namespace DB { +class Chain; +class ThreadStatus; /** Interprets the INSERT query. */ @@ -21,7 +22,8 @@ public: ContextPtr context_, bool allow_materialized_ = false, bool no_squash_ = false, - bool no_destination_ = false); + bool no_destination_ = false, + bool async_insert_ = false); /** Prepare a request for execution. Return block streams * - the stream into which you can write data to execute the query, if INSERT; @@ -30,23 +32,35 @@ public: */ BlockIO execute() override; - /// Returns only sinks, without input sources. - Processors getSinks(); - StorageID getDatabaseTable() const; + Chain buildChain( + const StoragePtr & table, + const StorageMetadataPtr & metadata_snapshot, + const Names & columns, + ThreadStatus * thread_status = nullptr, + std::atomic_uint64_t * elapsed_counter_ms = nullptr); + void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr context_) const override; - Block getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot) const; + StoragePtr getTable(ASTInsertQuery & query); + Block getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot) const; private: - std::pair executeImpl( - const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, Block & sample_block); + Block getSampleBlock(const Names & names, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot) const; ASTPtr query_ptr; const bool allow_materialized; const bool no_squash; const bool no_destination; + const bool async_insert; + + Chain buildChainImpl( + const StoragePtr & table, + const StorageMetadataPtr & metadata_snapshot, + const Block & query_sample_block, + ThreadStatus * thread_status, + std::atomic_uint64_t * elapsed_counter_ms); }; diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index d43d697fcd5..e94eee25dc3 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -16,7 +16,9 @@ #include #include #include -#include +#include +#include +#include #include #include #include @@ -121,15 +123,16 @@ static QueryDescriptors extractQueriesExceptMeAndCheckAccess(const Block & proce } -class SyncKillQueryInputStream : public IBlockInputStream +class SyncKillQuerySource : public SourceWithProgress { public: - SyncKillQueryInputStream(ProcessList & process_list_, QueryDescriptors && processes_to_stop_, Block && processes_block_, + SyncKillQuerySource(ProcessList & process_list_, QueryDescriptors && processes_to_stop_, Block && processes_block_, const Block & res_sample_block_) - : process_list(process_list_), - processes_to_stop(std::move(processes_to_stop_)), - processes_block(std::move(processes_block_)), - res_sample_block(res_sample_block_) + : SourceWithProgress(res_sample_block_) + , process_list(process_list_) + , processes_to_stop(std::move(processes_to_stop_)) + , processes_block(std::move(processes_block_)) + , res_sample_block(std::move(res_sample_block_)) { addTotalRowsApprox(processes_to_stop.size()); } @@ -139,14 +142,12 @@ public: return "SynchronousQueryKiller"; } - Block getHeader() const override { return res_sample_block; } - - Block readImpl() override + Chunk generate() override { size_t num_result_queries = processes_to_stop.size(); if (num_processed_queries >= num_result_queries) - return Block(); + return {}; MutableColumns columns = res_sample_block.cloneEmptyColumns(); @@ -179,7 +180,8 @@ public: /// Don't produce empty block } while (columns.empty() || columns[0]->empty()); - return res_sample_block.cloneWithColumns(std::move(columns)); + size_t num_rows = columns.empty() ? 0 : columns.front()->size(); + return Chunk(std::move(columns), num_rows); } ProcessList & process_list; @@ -221,12 +223,12 @@ BlockIO InterpreterKillQueryQuery::execute() insertResultRow(query_desc.source_num, code, processes_block, header, res_columns); } - res_io.in = std::make_shared(header.cloneWithColumns(std::move(res_columns))); + res_io.pipeline = QueryPipeline(std::make_shared(header.cloneWithColumns(std::move(res_columns)))); } else { - res_io.in = std::make_shared( - process_list, std::move(queries_to_stop), std::move(processes_block), header); + res_io.pipeline = QueryPipeline(std::make_shared( + process_list, std::move(queries_to_stop), std::move(processes_block), header)); } break; @@ -286,7 +288,7 @@ BlockIO InterpreterKillQueryQuery::execute() "Not allowed to kill mutation. To execute this query it's necessary to have the grant " + required_access_rights.toString(), ErrorCodes::ACCESS_DENIED); - res_io.in = std::make_shared(header.cloneWithColumns(std::move(res_columns))); + res_io.pipeline = QueryPipeline(Pipe(std::make_shared(header.cloneWithColumns(std::move(res_columns))))); break; } @@ -302,10 +304,15 @@ Block InterpreterKillQueryQuery::getSelectResult(const String & columns, const S if (where_expression) select_query += " WHERE " + queryToString(where_expression); - auto stream = executeQuery(select_query, getContext(), true).getInputStream(); - Block res = stream->read(); + auto io = executeQuery(select_query, getContext(), true); + PullingPipelineExecutor executor(io.pipeline); + Block res; + while (!res && executor.pull(res)); - if (res && stream->read()) + Block tmp_block; + while (executor.pull(tmp_block)); + + if (tmp_block) throw Exception("Expected one block from input stream", ErrorCodes::LOGICAL_ERROR); return res; diff --git a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp index 9c8dda56b44..f7d98e2e0fe 100644 --- a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp +++ b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp @@ -133,8 +133,10 @@ BlockIO InterpreterSelectIntersectExceptQuery::execute() QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); - res.pipeline = std::move(*pipeline); - res.pipeline.addInterpreterContext(context); + pipeline->addInterpreterContext(context); + + res.pipeline = QueryPipelineBuilder::getPipeline(std::move(*query_plan.buildQueryPipeline( + QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)))); return res; } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 5fe9948f857..ba2750a6690 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -603,8 +603,8 @@ BlockIO InterpreterSelectQuery::execute() buildQueryPlan(query_plan); - res.pipeline = std::move(*query_plan.buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context))); + res.pipeline = QueryPipelineBuilder::getPipeline(std::move(*query_plan.buildQueryPipeline( + QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)))); return res; } diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index b7494a6c965..4aeaa9e4f13 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -320,13 +320,13 @@ BlockIO InterpreterSelectWithUnionQuery::execute() QueryPlan query_plan; buildQueryPlan(query_plan); - auto pipeline = query_plan.buildQueryPipeline( + auto pipeline_builder = query_plan.buildQueryPipeline( QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); - res.pipeline = std::move(*pipeline); - res.pipeline.addInterpreterContext(context); + pipeline_builder->addInterpreterContext(context); + res.pipeline = QueryPipelineBuilder::getPipeline(std::move(*pipeline_builder)); return res; } diff --git a/src/Interpreters/InterpreterShowAccessQuery.cpp b/src/Interpreters/InterpreterShowAccessQuery.cpp index a82ba0411b4..137da9b7c5f 100644 --- a/src/Interpreters/InterpreterShowAccessQuery.cpp +++ b/src/Interpreters/InterpreterShowAccessQuery.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -22,12 +22,12 @@ using EntityType = IAccessEntity::Type; BlockIO InterpreterShowAccessQuery::execute() { BlockIO res; - res.in = executeImpl(); + res.pipeline = executeImpl(); return res; } -BlockInputStreamPtr InterpreterShowAccessQuery::executeImpl() const +QueryPipeline InterpreterShowAccessQuery::executeImpl() const { /// Build a create query. ASTs queries = getCreateAndGrantQueries(); @@ -43,7 +43,7 @@ BlockInputStreamPtr InterpreterShowAccessQuery::executeImpl() const } String desc = "ACCESS"; - return std::make_shared(Block{{std::move(column), std::make_shared(), desc}}); + return QueryPipeline(std::make_shared(Block{{std::move(column), std::make_shared(), desc}})); } diff --git a/src/Interpreters/InterpreterShowAccessQuery.h b/src/Interpreters/InterpreterShowAccessQuery.h index d08d8962abc..d305af9487f 100644 --- a/src/Interpreters/InterpreterShowAccessQuery.h +++ b/src/Interpreters/InterpreterShowAccessQuery.h @@ -23,7 +23,7 @@ public: bool ignoreLimits() const override { return true; } private: - BlockInputStreamPtr executeImpl() const; + QueryPipeline executeImpl() const; ASTs getCreateAndGrantQueries() const; std::vector getEntities() const; diff --git a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp index 8115b4a63df..87ebd064ae5 100644 --- a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include @@ -241,12 +241,12 @@ InterpreterShowCreateAccessEntityQuery::InterpreterShowCreateAccessEntityQuery(c BlockIO InterpreterShowCreateAccessEntityQuery::execute() { BlockIO res; - res.in = executeImpl(); + res.pipeline = executeImpl(); return res; } -BlockInputStreamPtr InterpreterShowCreateAccessEntityQuery::executeImpl() +QueryPipeline InterpreterShowCreateAccessEntityQuery::executeImpl() { /// Build a create queries. ASTs create_queries = getCreateQueries(); @@ -270,7 +270,7 @@ BlockInputStreamPtr InterpreterShowCreateAccessEntityQuery::executeImpl() if (startsWith(desc, prefix)) desc = desc.substr(prefix.length()); /// `desc` always starts with "SHOW ", so we can trim this prefix. - return std::make_shared(Block{{std::move(column), std::make_shared(), desc}}); + return QueryPipeline(std::make_shared(Block{{std::move(column), std::make_shared(), desc}})); } diff --git a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h index 6d026d2b81b..0aedeb18be4 100644 --- a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h +++ b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h @@ -30,7 +30,7 @@ public: static ASTPtr getAttachQuery(const IAccessEntity & entity); private: - BlockInputStreamPtr executeImpl(); + QueryPipeline executeImpl(); std::vector getEntities() const; ASTs getCreateQueries() const; AccessRightsElements getRequiredAccess() const; diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index 967d3e7f570..adf1aae3ff3 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include @@ -26,7 +26,7 @@ namespace ErrorCodes BlockIO InterpreterShowCreateQuery::execute() { BlockIO res; - res.in = executeImpl(); + res.pipeline = executeImpl(); return res; } @@ -40,7 +40,7 @@ Block InterpreterShowCreateQuery::getSampleBlock() } -BlockInputStreamPtr InterpreterShowCreateQuery::executeImpl() +QueryPipeline InterpreterShowCreateQuery::executeImpl() { ASTPtr create_query; ASTQueryWithTableAndOutput * show_query; @@ -100,10 +100,10 @@ BlockInputStreamPtr InterpreterShowCreateQuery::executeImpl() MutableColumnPtr column = ColumnString::create(); column->insert(res); - return std::make_shared(Block{{ + return QueryPipeline(std::make_shared(Block{{ std::move(column), std::make_shared(), - "statement"}}); + "statement"}})); } } diff --git a/src/Interpreters/InterpreterShowCreateQuery.h b/src/Interpreters/InterpreterShowCreateQuery.h index 53f587d3e7d..da622de7fc6 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.h +++ b/src/Interpreters/InterpreterShowCreateQuery.h @@ -21,7 +21,7 @@ public: private: ASTPtr query_ptr; - BlockInputStreamPtr executeImpl(); + QueryPipeline executeImpl(); }; diff --git a/src/Interpreters/InterpreterShowGrantsQuery.cpp b/src/Interpreters/InterpreterShowGrantsQuery.cpp index f3e554122e1..7302e893cdd 100644 --- a/src/Interpreters/InterpreterShowGrantsQuery.cpp +++ b/src/Interpreters/InterpreterShowGrantsQuery.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -100,12 +100,12 @@ namespace BlockIO InterpreterShowGrantsQuery::execute() { BlockIO res; - res.in = executeImpl(); + res.pipeline = executeImpl(); return res; } -BlockInputStreamPtr InterpreterShowGrantsQuery::executeImpl() +QueryPipeline InterpreterShowGrantsQuery::executeImpl() { /// Build a create query. ASTs grant_queries = getGrantQueries(); @@ -129,7 +129,7 @@ BlockInputStreamPtr InterpreterShowGrantsQuery::executeImpl() if (desc.starts_with(prefix)) desc = desc.substr(prefix.length()); /// `desc` always starts with "SHOW ", so we can trim this prefix. - return std::make_shared(Block{{std::move(column), std::make_shared(), desc}}); + return QueryPipeline(std::make_shared(Block{{std::move(column), std::make_shared(), desc}})); } diff --git a/src/Interpreters/InterpreterShowGrantsQuery.h b/src/Interpreters/InterpreterShowGrantsQuery.h index c23aa1e3b94..06bdcf169b1 100644 --- a/src/Interpreters/InterpreterShowGrantsQuery.h +++ b/src/Interpreters/InterpreterShowGrantsQuery.h @@ -27,7 +27,7 @@ public: bool ignoreLimits() const override { return true; } private: - BlockInputStreamPtr executeImpl(); + QueryPipeline executeImpl(); ASTs getGrantQueries() const; std::vector getEntities() const; diff --git a/src/Interpreters/InterpreterWatchQuery.cpp b/src/Interpreters/InterpreterWatchQuery.cpp index ee96045bbc4..bc0aeda56bd 100644 --- a/src/Interpreters/InterpreterWatchQuery.cpp +++ b/src/Interpreters/InterpreterWatchQuery.cpp @@ -31,11 +31,17 @@ namespace ErrorCodes BlockIO InterpreterWatchQuery::execute() +{ + BlockIO res; + res.pipeline = QueryPipelineBuilder::getPipeline(buildQueryPipeline()); + return res; +} + +QueryPipelineBuilder InterpreterWatchQuery::buildQueryPipeline() { if (!getContext()->getSettingsRef().allow_experimental_live_view) throw Exception("Experimental LIVE VIEW feature is not enabled (the setting 'allow_experimental_live_view')", ErrorCodes::SUPPORT_IS_DISABLED); - BlockIO res; const ASTWatchQuery & query = typeid_cast(*query_ptr); auto table_id = getContext()->resolveStorageID(query, Context::ResolveOrdinary); @@ -85,10 +91,9 @@ BlockIO InterpreterWatchQuery::execute() pipe.setQuota(getContext()->getQuota()); } - res.pipeline.init(std::move(pipe)); - - return res; + QueryPipelineBuilder pipeline; + pipeline.init(std::move(pipe)); + return pipeline; } - } diff --git a/src/Interpreters/InterpreterWatchQuery.h b/src/Interpreters/InterpreterWatchQuery.h index 51eb4a00556..2bc7236582a 100644 --- a/src/Interpreters/InterpreterWatchQuery.h +++ b/src/Interpreters/InterpreterWatchQuery.h @@ -31,6 +31,7 @@ public: InterpreterWatchQuery(const ASTPtr & query_ptr_, ContextPtr context_) : WithContext(context_), query_ptr(query_ptr_) {} BlockIO execute() override; + QueryPipelineBuilder buildQueryPipeline(); private: ASTPtr query_ptr; diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 271d7371425..3aae3982758 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -128,7 +128,7 @@ private: /// Table has an alias. We do not need to rewrite qualified names with table alias (match == ColumnMatch::TableName). auto match = IdentifierSemantic::canReferColumnToTable(identifier, table); if (match == IdentifierSemantic::ColumnMatch::AliasedTableName || - match == IdentifierSemantic::ColumnMatch::DbAndTable) + match == IdentifierSemantic::ColumnMatch::DBAndTable) { if (rewritten) throw Exception("Failed to rewrite distributed table names. Ambiguous column '" + identifier.name() + "'", diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index eb806caa34f..ebcc8739fc4 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -580,13 +580,14 @@ void MergeJoin::mergeInMemoryRightBlocks() Pipe source(std::make_shared(std::move(right_blocks.blocks))); right_blocks.clear(); - QueryPipeline pipeline; - pipeline.init(std::move(source)); + QueryPipelineBuilder builder; + builder.init(std::move(source)); /// TODO: there should be no split keys by blocks for RIGHT|FULL JOIN - pipeline.addTransform(std::make_shared( - pipeline.getHeader(), right_sort_description, max_rows_in_right_block, 0, 0, 0, 0, nullptr, 0)); + builder.addTransform(std::make_shared( + builder.getHeader(), right_sort_description, max_rows_in_right_block, 0, 0, 0, 0, nullptr, 0)); + auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); auto sorted_input = PipelineExecutingBlockInputStream(std::move(pipeline)); while (Block block = sorted_input.read()) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index b90a1d10d2d..540d5c76c97 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -11,12 +11,13 @@ #include #include #include -#include +#include #include #include #include #include #include +#include #include #include #include @@ -216,15 +217,21 @@ bool isStorageTouchedByMutations( /// For some reason it may copy context and and give it into ExpressionBlockInputStream /// after that we will use context from destroyed stack frame in our stream. InterpreterSelectQuery interpreter(select_query, context_copy, storage, metadata_snapshot, SelectQueryOptions().ignoreLimits()); - BlockInputStreamPtr in = interpreter.execute().getInputStream(); + auto io = interpreter.execute(); + PullingPipelineExecutor executor(io.pipeline); - Block block = in->read(); + Block block; + while (!block.rows()) + executor.pull(block); if (!block.rows()) return false; else if (block.rows() != 1) throw Exception("count() expression returned " + toString(block.rows()) + " rows, not 1", ErrorCodes::LOGICAL_ERROR); + Block tmp_block; + while (executor.pull(tmp_block)); + auto count = (*block.getByName("count()").column)[0].get(); return count != 0; } @@ -852,7 +859,7 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & return select; } -QueryPipelinePtr MutationsInterpreter::addStreamsForLaterStages(const std::vector & prepared_stages, QueryPlan & plan) const +QueryPipelineBuilderPtr MutationsInterpreter::addStreamsForLaterStages(const std::vector & prepared_stages, QueryPlan & plan) const { for (size_t i_stage = 1; i_stage < prepared_stages.size(); ++i_stage) { @@ -936,19 +943,20 @@ BlockInputStreamPtr MutationsInterpreter::execute() QueryPlan plan; select_interpreter->buildQueryPlan(plan); - auto pipeline = addStreamsForLaterStages(stages, plan); + auto builder = addStreamsForLaterStages(stages, plan); /// Sometimes we update just part of columns (for example UPDATE mutation) /// in this case we don't read sorting key, so just we don't check anything. - if (auto sort_desc = getStorageSortDescriptionIfPossible(pipeline->getHeader())) + if (auto sort_desc = getStorageSortDescriptionIfPossible(builder->getHeader())) { - pipeline->addSimpleTransform([&](const Block & header) + builder->addSimpleTransform([&](const Block & header) { return std::make_shared(header, *sort_desc); }); } - BlockInputStreamPtr result_stream = std::make_shared(std::move(*pipeline)); + auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder)); + BlockInputStreamPtr result_stream = std::make_shared(std::move(pipeline)); if (!updated_header) updated_header = std::make_unique(result_stream->getHeader()); diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index 4f8960ae8f7..b0540f7d2ed 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -14,8 +14,8 @@ namespace DB class Context; class QueryPlan; -class QueryPipeline; -using QueryPipelinePtr = std::unique_ptr; +class QueryPipelineBuilder; +using QueryPipelineBuilderPtr = std::unique_ptr; /// Return false if the data isn't going to be changed by mutations. bool isStorageTouchedByMutations( @@ -84,7 +84,7 @@ private: struct Stage; ASTPtr prepareInterpreterSelectQuery(std::vector &prepared_stages, bool dry_run); - QueryPipelinePtr addStreamsForLaterStages(const std::vector & prepared_stages, QueryPlan & plan) const; + QueryPipelineBuilderPtr addStreamsForLaterStages(const std::vector & prepared_stages, QueryPlan & plan) const; std::optional getStorageSortDescriptionIfPossible(const Block & header) const; diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 06320f00dfa..4fd022772f6 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -238,9 +238,6 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as ProcessListEntry::~ProcessListEntry() { - /// Destroy all streams to avoid long lock of ProcessList - it->releaseQueryStreams(); - std::lock_guard lock(parent.mutex); String user = it->getClientInfo().current_user; @@ -303,88 +300,30 @@ QueryStatus::~QueryStatus() assert(executors.empty()); } -void QueryStatus::setQueryStreams(const BlockIO & io) +CancellationCode QueryStatus::cancelQuery(bool) { - std::lock_guard lock(query_streams_mutex); - - query_stream_in = io.in; - query_stream_out = io.out; - query_streams_status = QueryStreamsStatus::Initialized; -} - -void QueryStatus::releaseQueryStreams() -{ - BlockInputStreamPtr in; - BlockOutputStreamPtr out; - - { - std::lock_guard lock(query_streams_mutex); - - query_streams_status = QueryStreamsStatus::Released; - in = std::move(query_stream_in); - out = std::move(query_stream_out); - } - - /// Destroy streams outside the mutex lock -} - -bool QueryStatus::streamsAreReleased() -{ - std::lock_guard lock(query_streams_mutex); - - return query_streams_status == QueryStreamsStatus::Released; -} - -bool QueryStatus::tryGetQueryStreams(BlockInputStreamPtr & in, BlockOutputStreamPtr & out) const -{ - std::lock_guard lock(query_streams_mutex); - - if (query_streams_status != QueryStreamsStatus::Initialized) - return false; - - in = query_stream_in; - out = query_stream_out; - return true; -} - -CancellationCode QueryStatus::cancelQuery(bool kill) -{ - /// Streams are destroyed, and ProcessListElement will be deleted from ProcessList soon. We need wait a little bit - if (streamsAreReleased()) + if (is_killed.load()) return CancellationCode::CancelSent; - BlockInputStreamPtr input_stream; - BlockOutputStreamPtr output_stream; - SCOPE_EXIT({ - std::lock_guard lock(query_streams_mutex); - for (auto * e : executors) - e->cancel(); - }); - - if (tryGetQueryStreams(input_stream, output_stream)) - { - if (input_stream) - { - input_stream->cancel(kill); - return CancellationCode::CancelSent; - } - return CancellationCode::CancelCannotBeSent; - } - /// Query is not even started is_killed.store(true); + + std::lock_guard lock(executors_mutex); + for (auto * e : executors) + e->cancel(); + return CancellationCode::CancelSent; } void QueryStatus::addPipelineExecutor(PipelineExecutor * e) { - std::lock_guard lock(query_streams_mutex); + std::lock_guard lock(executors_mutex); assert(std::find(executors.begin(), executors.end(), e) == executors.end()); executors.push_back(e); } void QueryStatus::removePipelineExecutor(PipelineExecutor * e) { - std::lock_guard lock(query_streams_mutex); + std::lock_guard lock(executors_mutex); assert(std::find(executors.begin(), executors.end(), e) != executors.end()); std::erase_if(executors, [e](PipelineExecutor * x) { return x == e; }); } diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 1adad84c040..2e300472647 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -103,13 +103,7 @@ protected: /// Be careful using it. For example, queries field of ProcessListForUser could be modified concurrently. const ProcessListForUser * getUserProcessList() const { return user_process_list; } - mutable std::mutex query_streams_mutex; - - /// Streams with query results, point to BlockIO from executeQuery() - /// This declaration is compatible with notes about BlockIO::process_list_entry: - /// there are no cyclic dependencies: BlockIO::in,out point to objects inside ProcessListElement (not whole object) - BlockInputStreamPtr query_stream_in; - BlockOutputStreamPtr query_stream_out; + mutable std::mutex executors_mutex; /// Array of PipelineExecutors to be cancelled when a cancelQuery is received std::vector executors; @@ -173,18 +167,6 @@ public: QueryStatusInfo getInfo(bool get_thread_list = false, bool get_profile_events = false, bool get_settings = false) const; - /// Copies pointers to in/out streams - void setQueryStreams(const BlockIO & io); - - /// Frees in/out streams - void releaseQueryStreams(); - - /// It means that ProcessListEntry still exists, but stream was already destroyed - bool streamsAreReleased(); - - /// Get query in/out pointers from BlockIO - bool tryGetQueryStreams(BlockInputStreamPtr & in, BlockOutputStreamPtr & out) const; - CancellationCode cancelQuery(bool kill); bool isKilled() const { return is_killed; } diff --git a/src/Interpreters/QueryViewsLog.h b/src/Interpreters/QueryViewsLog.h index e751224a51e..34e7532d5d4 100644 --- a/src/Interpreters/QueryViewsLog.h +++ b/src/Interpreters/QueryViewsLog.h @@ -37,7 +37,7 @@ struct QueryViewsLogElement String target_name; ViewType type = ViewType::DEFAULT; std::unique_ptr thread_status = nullptr; - UInt64 elapsed_ms = 0; + std::atomic_uint64_t elapsed_ms = 0; std::chrono::time_point event_time; ViewStatus event_status = ViewStatus::QUERY_START; diff --git a/src/Interpreters/SortedBlocksWriter.cpp b/src/Interpreters/SortedBlocksWriter.cpp index 3ce9f2d1b90..ebe4aba71ab 100644 --- a/src/Interpreters/SortedBlocksWriter.cpp +++ b/src/Interpreters/SortedBlocksWriter.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -15,7 +15,7 @@ namespace DB namespace { -std::unique_ptr flushToFile(const String & tmp_path, const Block & header, QueryPipeline pipeline, const String & codec) +std::unique_ptr flushToFile(const String & tmp_path, const Block & header, QueryPipelineBuilder pipeline, const String & codec) { auto tmp_file = createTemporaryFile(tmp_path); @@ -24,10 +24,11 @@ std::unique_ptr flushToFile(const String & tmp_path, const Block return tmp_file; } -SortedBlocksWriter::SortedFiles flushToManyFiles(const String & tmp_path, const Block & header, QueryPipeline pipeline, +SortedBlocksWriter::SortedFiles flushToManyFiles(const String & tmp_path, const Block & header, QueryPipelineBuilder builder, const String & codec, std::function callback = [](const Block &){}) { std::vector> files; + auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); PullingPipelineExecutor executor(pipeline); Block block; @@ -38,7 +39,7 @@ SortedBlocksWriter::SortedFiles flushToManyFiles(const String & tmp_path, const callback(block); - QueryPipeline one_block_pipeline; + QueryPipelineBuilder one_block_pipeline; Chunk chunk(block.getColumns(), block.rows()); one_block_pipeline.init(Pipe(std::make_shared(block.cloneEmpty(), std::move(chunk)))); auto tmp_file = flushToFile(tmp_path, header, std::move(one_block_pipeline), codec); @@ -126,7 +127,7 @@ SortedBlocksWriter::TmpFilePtr SortedBlocksWriter::flush(const BlocksList & bloc if (pipes.empty()) return {}; - QueryPipeline pipeline; + QueryPipelineBuilder pipeline; pipeline.init(Pipe::unitePipes(std::move(pipes))); if (pipeline.getNumStreams() > 1) @@ -179,7 +180,7 @@ SortedBlocksWriter::PremergedFiles SortedBlocksWriter::premerge() if (pipes.size() == num_files_for_merge || &file == &files.back()) { - QueryPipeline pipeline; + QueryPipelineBuilder pipeline; pipeline.init(Pipe::unitePipes(std::move(pipes))); pipes = Pipes(); @@ -212,7 +213,7 @@ SortedBlocksWriter::PremergedFiles SortedBlocksWriter::premerge() SortedBlocksWriter::SortedFiles SortedBlocksWriter::finishMerge(std::function callback) { PremergedFiles files = premerge(); - QueryPipeline pipeline; + QueryPipelineBuilder pipeline; pipeline.init(std::move(files.pipe)); if (pipeline.getNumStreams() > 1) @@ -293,20 +294,21 @@ Block SortedBlocksBuffer::mergeBlocks(Blocks && blocks) const Blocks tmp_blocks; - QueryPipeline pipeline; - pipeline.init(Pipe::unitePipes(std::move(pipes))); + QueryPipelineBuilder builder; + builder.init(Pipe::unitePipes(std::move(pipes))); - if (pipeline.getNumStreams() > 1) + if (builder.getNumStreams() > 1) { auto transform = std::make_shared( - pipeline.getHeader(), - pipeline.getNumStreams(), + builder.getHeader(), + builder.getNumStreams(), sort_description, num_rows); - pipeline.addTransform(std::move(transform)); + builder.addTransform(std::move(transform)); } + auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); PullingPipelineExecutor executor(pipeline); Block block; while (executor.pull(block)) diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index f6f7706dc24..7212597becc 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -488,9 +489,11 @@ void SystemLog::flushImpl(const std::vector & to_flush, InterpreterInsertQuery interpreter(query_ptr, insert_context); BlockIO io = interpreter.execute(); - io.out->writePrefix(); - io.out->write(block); - io.out->writeSuffix(); + PushingPipelineExecutor executor(io.pipeline); + + executor.start(); + executor.push(block); + executor.finish(); } catch (...) { diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 52f9c6b6fbf..d3ebd5ee0b7 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -500,17 +500,17 @@ void ThreadStatus::logToQueryViewsLog(const ViewRuntimeData & vinfo) QueryViewsLogElement element; - element.event_time = time_in_seconds(vinfo.runtime_stats.event_time); - element.event_time_microseconds = time_in_microseconds(vinfo.runtime_stats.event_time); - element.view_duration_ms = vinfo.runtime_stats.elapsed_ms; + element.event_time = time_in_seconds(vinfo.runtime_stats->event_time); + element.event_time_microseconds = time_in_microseconds(vinfo.runtime_stats->event_time); + element.view_duration_ms = vinfo.runtime_stats->elapsed_ms; element.initial_query_id = query_id; element.view_name = vinfo.table_id.getFullTableName(); element.view_uuid = vinfo.table_id.uuid; - element.view_type = vinfo.runtime_stats.type; + element.view_type = vinfo.runtime_stats->type; if (vinfo.query) element.view_query = getCleanQueryAst(vinfo.query, query_context_ptr); - element.view_target = vinfo.runtime_stats.target_name; + element.view_target = vinfo.runtime_stats->target_name; auto events = std::make_shared(performance_counters.getPartiallyAtomicSnapshot()); element.read_rows = progress_in.read_rows.load(std::memory_order_relaxed); @@ -523,7 +523,7 @@ void ThreadStatus::logToQueryViewsLog(const ViewRuntimeData & vinfo) element.profile_counters = events; } - element.status = vinfo.runtime_stats.event_status; + element.status = vinfo.runtime_stats->event_status; element.exception_code = 0; if (vinfo.exception) { diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index bf3bbf22b8c..2d1b6b3f239 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -334,7 +334,7 @@ void RestoreQualifiedNamesMatcher::Data::changeTable(ASTIdentifier & identifier) { case IdentifierSemantic::ColumnMatch::AliasedTableName: case IdentifierSemantic::ColumnMatch::TableName: - case IdentifierSemantic::ColumnMatch::DbAndTable: + case IdentifierSemantic::ColumnMatch::DBAndTable: IdentifierSemantic::setColumnLongName(identifier, remote_table); break; default: diff --git a/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp b/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp index b6fb4d8d5e5..8e8bb0fdb73 100644 --- a/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp +++ b/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp @@ -110,8 +110,7 @@ public: shell_command_source_configuration, process_pool)); - QueryPipeline pipeline; - pipeline.init(std::move(pipe)); + QueryPipeline pipeline(std::move(pipe)); PullingPipelineExecutor executor(pipeline); diff --git a/src/Interpreters/UserDefinedSQLObjectsLoader.cpp b/src/Interpreters/UserDefinedSQLObjectsLoader.cpp index 5b2a5605de7..f8379edd9e7 100644 --- a/src/Interpreters/UserDefinedSQLObjectsLoader.cpp +++ b/src/Interpreters/UserDefinedSQLObjectsLoader.cpp @@ -21,6 +21,7 @@ #include #include +#include namespace DB diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index c05118b7c6a..ae304906476 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -104,24 +104,6 @@ ASTPtr evaluateConstantExpressionForDatabaseName(const ASTPtr & node, ContextPtr return res; } -std::tuple evaluateDatabaseNameForMergeEngine(const ASTPtr & node, ContextPtr context) -{ - if (const auto * func = node->as(); func && func->name == "REGEXP") - { - if (func->arguments->children.size() != 1) - throw Exception("Arguments for REGEXP in Merge ENGINE should be 1", ErrorCodes::BAD_ARGUMENTS); - - auto * literal = func->arguments->children[0]->as(); - if (!literal || literal->value.safeGet().empty()) - throw Exception("Argument for REGEXP in Merge ENGINE should be a non empty String Literal", ErrorCodes::BAD_ARGUMENTS); - - return std::tuple{true, func->arguments->children[0]}; - } - - auto ast = evaluateConstantExpressionForDatabaseName(node, context); - return std::tuple{false, ast}; -} - namespace { diff --git a/src/Interpreters/evaluateConstantExpression.h b/src/Interpreters/evaluateConstantExpression.h index 3b817080fe0..b95982f5b99 100644 --- a/src/Interpreters/evaluateConstantExpression.h +++ b/src/Interpreters/evaluateConstantExpression.h @@ -53,6 +53,4 @@ ASTPtr evaluateConstantExpressionForDatabaseName(const ASTPtr & node, ContextPtr */ std::optional evaluateExpressionOverConstantCondition(const ASTPtr & node, const ExpressionActionsPtr & target_expr, size_t & limit); -// Evaluate database name or regexp for StorageMerge and TableFunction merge -std::tuple evaluateDatabaseNameForMergeEngine(const ASTPtr & node, ContextPtr context); } diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 180a4f9af3e..576c1f3ffdd 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -212,11 +213,11 @@ BlockIO getDistributedDDLStatus(const String & node_path, const DDLLogEntry & en if (context->getSettingsRef().distributed_ddl_task_timeout == 0) return io; - ProcessorPtr processor = std::make_shared(node_path, entry, context, hosts_to_wait); - io.pipeline.init(Pipe{processor}); + auto source = std::make_shared(node_path, entry, context, hosts_to_wait); + io.pipeline = QueryPipeline(std::move(source)); if (context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NONE) - io.pipeline.setSinks([](const Block & header, QueryPipeline::StreamType){ return std::make_shared(header); }); + io.pipeline.complete(std::make_shared(io.pipeline.getHeader())); return io; } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 0b1746feebc..077bd09f814 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -51,12 +51,13 @@ #include #include -#include "IO/CompressionMethod.h" +#include +#include #include #include #include -#include +#include #include #include @@ -81,6 +82,7 @@ namespace ErrorCodes extern const int INTO_OUTFILE_NOT_ALLOWED; extern const int QUERY_WAS_CANCELLED; extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; } @@ -594,7 +596,7 @@ static std::tuple executeQueryImpl( auto timeout = settings.wait_for_async_insert_timeout.totalMilliseconds(); auto query_id = context->getCurrentQueryId(); auto source = std::make_shared(query_id, timeout, *queue); - io.pipeline.init(Pipe(source)); + io.pipeline = QueryPipeline(Pipe(std::move(source))); } return std::make_tuple(ast, std::move(io)); @@ -634,7 +636,6 @@ static std::tuple executeQueryImpl( } QueryPipeline & pipeline = res.pipeline; - bool use_processors = pipeline.initialized(); if (const auto * insert_interpreter = typeid_cast(&*interpreter)) { @@ -650,54 +651,23 @@ static std::tuple executeQueryImpl( if ((*process_list_entry)->isKilled()) throw Exception("Query '" + (*process_list_entry)->getInfo().client_info.current_query_id + "' is killed in pending state", ErrorCodes::QUERY_WAS_CANCELLED); - else if (!use_processors) - (*process_list_entry)->setQueryStreams(res); } /// Hold element of process list till end of query execution. res.process_list_entry = process_list_entry; - if (use_processors) + if (pipeline.pulling() || pipeline.completed()) { /// Limits on the result, the quota on the result, and also callback for progress. /// Limits apply only to the final result. pipeline.setProgressCallback(context->getProgressCallback()); pipeline.setProcessListElement(context->getProcessListElement()); - if (stage == QueryProcessingStage::Complete && !pipeline.isCompleted()) - { - pipeline.resize(1); - pipeline.addSimpleTransform([&](const Block & header) - { - auto transform = std::make_shared(header, limits); - transform->setQuota(quota); - return transform; - }); - } + if (stage == QueryProcessingStage::Complete && pipeline.pulling()) + pipeline.setLimitsAndQuota(limits, quota); } - else + else if (pipeline.pushing()) { - /// Limits on the result, the quota on the result, and also callback for progress. - /// Limits apply only to the final result. - if (res.in) - { - res.in->setProgressCallback(context->getProgressCallback()); - res.in->setProcessListElement(context->getProcessListElement()); - if (stage == QueryProcessingStage::Complete) - { - if (!interpreter->ignoreQuota()) - res.in->setQuota(quota); - if (!interpreter->ignoreLimits()) - res.in->setLimits(limits); - } - } - - if (res.out) - { - if (auto * stream = dynamic_cast(res.out.get())) - { - stream->setProcessListElement(context->getProcessListElement()); - } - } + pipeline.setProcessListElement(context->getProcessListElement()); } /// Everything related to query log. @@ -724,7 +694,8 @@ static std::tuple executeQueryImpl( /// Log into system table start of query execution, if need. if (log_queries) { - if (use_processors) + /// This check is not obvious, but without it 01220_scalar_optimization_in_alter fails. + if (pipeline.initialized()) { const auto & info = context->getQueryAccessInfo(); elem.query_databases = info.databases; @@ -803,9 +774,10 @@ static std::tuple executeQueryImpl( log_queries, log_queries_min_type = settings.log_queries_min_type, log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(), - status_info_to_query_log + status_info_to_query_log, + pulling_pipeline = pipeline.pulling() ] - (IBlockInputStream * stream_in, IBlockOutputStream * stream_out, QueryPipeline * query_pipeline) mutable + (QueryPipeline & query_pipeline) mutable { QueryStatus * process_list_elem = context->getProcessListElement(); @@ -833,30 +805,15 @@ static std::tuple executeQueryImpl( if (progress_callback) progress_callback(Progress(WriteProgress(info.written_rows, info.written_bytes))); - if (stream_in) + if (pulling_pipeline) { - const BlockStreamProfileInfo & stream_in_info = stream_in->getProfileInfo(); - - /// NOTE: INSERT SELECT query contains zero metrics - elem.result_rows = stream_in_info.rows; - elem.result_bytes = stream_in_info.bytes; + query_pipeline.tryGetResultRowsAndBytes(elem.result_rows, elem.result_bytes); } - else if (stream_out) /// will be used only for ordinary INSERT queries + else /// will be used only for ordinary INSERT queries { - if (const auto * counting_stream = dynamic_cast(stream_out)) - { - /// NOTE: Redundancy. The same values could be extracted from process_list_elem->progress_out.query_settings = process_list_elem->progress_in - elem.result_rows = counting_stream->getProgress().read_rows; - elem.result_bytes = counting_stream->getProgress().read_bytes; - } - } - else if (query_pipeline) - { - if (const auto * output_format = query_pipeline->getOutputFormat()) - { - elem.result_rows = output_format->getResultRows(); - elem.result_bytes = output_format->getResultBytes(); - } + auto progress_out = process_list_elem->getProgressOut(); + elem.result_rows = progress_out.read_rows; + elem.result_bytes = progress_out.read_bytes; } if (elem.read_rows != 0) @@ -1012,12 +969,10 @@ BlockIO executeQuery( bool internal, QueryProcessingStage::Enum stage) { - BlockIO res = executeQuery(query, context, internal, stage); + if (!allow_processors) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Flag allow_processors is deprecated for executeQuery"); - if (!allow_processors && res.pipeline.initialized()) - res.in = res.getInputStream(); - - return res; + return executeQuery(query, context, internal, stage); } @@ -1065,86 +1020,19 @@ void executeQuery( std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete, &istr); auto & pipeline = streams.pipeline; + std::unique_ptr compressed_buffer; try { - if (streams.out) + if (pipeline.pushing()) { - auto pipe = getSourceFromASTInsertQuery(ast, true, streams.out->getHeader(), context, nullptr); - - pipeline.init(std::move(pipe)); - pipeline.resize(1); - pipeline.setSinks([&](const Block &, Pipe::StreamType) - { - return std::make_shared(streams.out); - }); - - auto executor = pipeline.execute(); - executor->execute(pipeline.getNumThreads()); + auto pipe = getSourceFromASTInsertQuery(ast, true, pipeline.getHeader(), context, nullptr); + pipeline.complete(std::move(pipe)); } - else if (streams.in) - { - assert(!pipeline.initialized()); - - const auto * ast_query_with_output = dynamic_cast(ast.get()); - - WriteBuffer * out_buf = &ostr; - std::unique_ptr compressed_buffer; - if (ast_query_with_output && ast_query_with_output->out_file) - { - if (!allow_into_outfile) - throw Exception("INTO OUTFILE is not allowed", ErrorCodes::INTO_OUTFILE_NOT_ALLOWED); - - const auto & out_file = ast_query_with_output->out_file->as().value.safeGet(); - - std::string compression_method; - if (ast_query_with_output->compression) - { - const auto & compression_method_node = ast_query_with_output->compression->as(); - compression_method = compression_method_node.value.safeGet(); - } - - compressed_buffer = wrapWriteBufferWithCompressionMethod( - std::make_unique(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT), - chooseCompressionMethod(out_file, compression_method), - /* compression level = */ 3 - ); - } - - String format_name = ast_query_with_output && (ast_query_with_output->format != nullptr) - ? getIdentifierName(ast_query_with_output->format) - : context->getDefaultFormat(); - - auto out = FormatFactory::instance().getOutputStreamParallelIfPossible( - format_name, - compressed_buffer ? *compressed_buffer : *out_buf, - streams.in->getHeader(), - context, - {}, - output_format_settings); - - /// Save previous progress callback if any. TODO Do it more conveniently. - auto previous_progress_callback = context->getProgressCallback(); - - /// NOTE Progress callback takes shared ownership of 'out'. - streams.in->setProgressCallback([out, previous_progress_callback] (const Progress & progress) - { - if (previous_progress_callback) - previous_progress_callback(progress); - out->onProgress(progress); - }); - - if (set_result_details) - set_result_details( - context->getClientInfo().current_query_id, out->getContentType(), format_name, DateLUT::instance().getTimeZone()); - - copyData(*streams.in, *out, [](){ return false; }, [&out](const Block &) { out->flush(); }); - } - else if (pipeline.initialized()) + else if (pipeline.pulling()) { const ASTQueryWithOutput * ast_query_with_output = dynamic_cast(ast.get()); WriteBuffer * out_buf = &ostr; - std::unique_ptr compressed_buffer; if (ast_query_with_output && ast_query_with_output->out_file) { if (!allow_into_outfile) @@ -1167,54 +1055,47 @@ void executeQuery( } String format_name = ast_query_with_output && (ast_query_with_output->format != nullptr) - ? getIdentifierName(ast_query_with_output->format) - : context->getDefaultFormat(); + ? getIdentifierName(ast_query_with_output->format) + : context->getDefaultFormat(); - if (!pipeline.isCompleted()) + auto out = FormatFactory::instance().getOutputFormatParallelIfPossible( + format_name, + compressed_buffer ? *compressed_buffer : *out_buf, + materializeBlock(pipeline.getHeader()), + context, + {}, + output_format_settings); + + out->setAutoFlush(); + + /// Save previous progress callback if any. TODO Do it more conveniently. + auto previous_progress_callback = context->getProgressCallback(); + + /// NOTE Progress callback takes shared ownership of 'out'. + pipeline.setProgressCallback([out, previous_progress_callback] (const Progress & progress) { - pipeline.addSimpleTransform([](const Block & header) - { - return std::make_shared(header); - }); + if (previous_progress_callback) + previous_progress_callback(progress); + out->onProgress(progress); + }); - auto out = FormatFactory::instance().getOutputFormatParallelIfPossible( - format_name, - compressed_buffer ? *compressed_buffer : *out_buf, - pipeline.getHeader(), - context, - {}, - output_format_settings); + out->setBeforeFinalizeCallback(before_finalize_callback); - out->setAutoFlush(); + if (set_result_details) + set_result_details( + context->getClientInfo().current_query_id, out->getContentType(), format_name, DateLUT::instance().getTimeZone()); - /// Save previous progress callback if any. TODO Do it more conveniently. - auto previous_progress_callback = context->getProgressCallback(); + pipeline.complete(std::move(out)); + } + else + { + pipeline.setProgressCallback(context->getProgressCallback()); + } - /// NOTE Progress callback takes shared ownership of 'out'. - pipeline.setProgressCallback([out, previous_progress_callback] (const Progress & progress) - { - if (previous_progress_callback) - previous_progress_callback(progress); - out->onProgress(progress); - }); - - out->setBeforeFinalizeCallback(before_finalize_callback); - - if (set_result_details) - set_result_details( - context->getClientInfo().current_query_id, out->getContentType(), format_name, DateLUT::instance().getTimeZone()); - - pipeline.setOutputFormat(std::move(out)); - } - else - { - pipeline.setProgressCallback(context->getProgressCallback()); - } - - { - auto executor = pipeline.execute(); - executor->execute(pipeline.getNumThreads()); - } + if (pipeline.initialized()) + { + CompletedPipelineExecutor executor(pipeline); + executor.execute(); } else { @@ -1234,20 +1115,15 @@ void executeTrivialBlockIO(BlockIO & streams, ContextPtr context) { try { - if (streams.out) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Query stream requires input, but no input buffer provided, it's a bug"); - if (streams.in) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Query stream requires output, but no output buffer provided, it's a bug"); - if (!streams.pipeline.initialized()) return; - if (!streams.pipeline.isCompleted()) + if (!streams.pipeline.completed()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Query pipeline requires output, but no output buffer provided, it's a bug"); streams.pipeline.setProgressCallback(context->getProgressCallback()); - auto executor = streams.pipeline.execute(); - executor->execute(streams.pipeline.getNumThreads()); + CompletedPipelineExecutor executor(streams.pipeline); + executor.execute(); } catch (...) { diff --git a/src/Interpreters/executeQuery.h b/src/Interpreters/executeQuery.h index 6f932caa30a..83e3f22c1b1 100644 --- a/src/Interpreters/executeQuery.h +++ b/src/Interpreters/executeQuery.h @@ -2,7 +2,8 @@ #include #include -#include +#include +#include namespace DB { @@ -10,7 +11,6 @@ namespace DB class ReadBuffer; class WriteBuffer; - /// Parse and execute a query. void executeQuery( ReadBuffer & istr, /// Where to read query from (and data for INSERT, if present). diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 1aa116fefc4..06a0b6d46f5 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -67,6 +67,11 @@ ASTPtr ASTAlterCommand::clone() const res->rename_to = rename_to->clone(); res->children.push_back(res->rename_to); } + if (comment) + { + res->comment = comment->clone(); + res->children.push_back(res->comment); + } return res; } @@ -138,6 +143,12 @@ void ASTAlterCommand::formatImpl( settings.ostr << " " << (settings.hilite ? hilite_none : ""); comment->formatImpl(settings, state, frame); } + else if (type == ASTAlterCommand::MODIFY_COMMENT) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COMMENT" << (settings.hilite ? hilite_none : ""); + settings.ostr << " " << (settings.hilite ? hilite_none : ""); + comment->formatImpl(settings, state, frame); + } else if (type == ASTAlterCommand::MODIFY_ORDER_BY) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY ORDER BY " << (settings.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index 7c301d581e6..9b40586e09f 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -72,6 +72,8 @@ public: LIVE_VIEW_REFRESH, MODIFY_DATABASE_SETTING, + + MODIFY_COMMENT, }; Type type = NO_TYPE; diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index 3a21d704eb9..9a8838956f9 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -14,6 +14,7 @@ namespace ErrorCodes extern const int TOO_DEEP_AST; extern const int BAD_ARGUMENTS; extern const int UNKNOWN_ELEMENT_IN_AST; + extern const int LOGICAL_ERROR; } @@ -47,6 +48,23 @@ size_t IAST::checkSize(size_t max_size) const return res; } +void IAST::reset(IAST *& field) +{ + if (field == nullptr) + return; + + const auto child = std::find_if(children.begin(), children.end(), [field](const auto & p) + { + return p.get() == field; + }); + + if (child == children.end()) + throw Exception("AST subtree not found in children", ErrorCodes::LOGICAL_ERROR); + + children.erase(child); + field = nullptr; +} + IAST::Hash IAST::getTreeHash() const { diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index 2f9212da632..88736e8137e 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -157,6 +157,8 @@ public: set(field, child); } + void reset(IAST *& field); + /// Convert to a string. /// Format settings. diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index e89302fd212..2eade2079da 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -50,6 +50,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_drop_projection("DROP PROJECTION"); ParserKeyword s_clear_projection("CLEAR PROJECTION"); ParserKeyword s_materialize_projection("MATERIALIZE PROJECTION"); + ParserKeyword s_modify_comment("MODIFY COMMENT"); ParserKeyword s_add("ADD"); ParserKeyword s_drop("DROP"); @@ -754,6 +755,13 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected return false; command->type = ASTAlterCommand::MODIFY_QUERY; } + else if (s_modify_comment.ignore(pos, expected)) + { + if (!parser_string_literal.parse(pos, command->comment, expected)) + return false; + + command->type = ASTAlterCommand::MODIFY_COMMENT; + } else return false; } diff --git a/src/Parsers/ParserAlterQuery.h b/src/Parsers/ParserAlterQuery.h index de9d752d1a3..b0029ff88fd 100644 --- a/src/Parsers/ParserAlterQuery.h +++ b/src/Parsers/ParserAlterQuery.h @@ -18,6 +18,7 @@ namespace DB * [MODIFY SETTING setting_name=setting_value, ...] * [RESET SETTING setting_name, ...] * [COMMENT COLUMN [IF EXISTS] col_name string] + * [MODIFY COMMENT string] * [DROP|DETACH|ATTACH PARTITION|PART partition, ...] * [FETCH PARTITION partition FROM ...] * [FREEZE [PARTITION] [WITH NAME name]] diff --git a/src/Parsers/ParserExplainQuery.cpp b/src/Parsers/ParserExplainQuery.cpp index b4ba0523239..ffaab0f2b6d 100644 --- a/src/Parsers/ParserExplainQuery.cpp +++ b/src/Parsers/ParserExplainQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -54,6 +55,7 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserCreateTableQuery create_p; ParserSelectWithUnionQuery select_p; + ParserInsertQuery insert_p(end); ASTPtr query; if (kind == ASTExplainQuery::ExplainKind::ParsedAST) { @@ -64,7 +66,8 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected return false; } else if (select_p.parse(pos, query, expected) || - create_p.parse(pos, query, expected)) + create_p.parse(pos, query, expected) || + insert_p.parse(pos, query, expected)) explain_query->setExplainedQuery(std::move(query)); else return false; diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 9a5a99f8f5a..f97bc77272c 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -17,22 +18,27 @@ using namespace std::literals; struct ParserTestCase { - std::shared_ptr parser; const std::string_view input_text; const char * expected_ast = nullptr; }; -std::ostream & operator<<(std::ostream & ostr, const ParserTestCase & test_case) +std::ostream & operator<<(std::ostream & ostr, const std::shared_ptr parser) { - return ostr << "parser: " << test_case.parser->getName() << ", input: " << test_case.input_text; + return ostr << "Praser: " << parser->getName(); } -class ParserTest : public ::testing::TestWithParam +std::ostream & operator<<(std::ostream & ostr, const ParserTestCase & test_case) +{ + return ostr << "ParserTestCase input: " << test_case.input_text; +} + +class ParserTest : public ::testing::TestWithParam, ParserTestCase>> {}; TEST_P(ParserTest, parseQuery) { - const auto & [parser, input_text, expected_ast] = GetParam(); + const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); ASSERT_NE(nullptr, parser); @@ -49,86 +55,92 @@ TEST_P(ParserTest, parseQuery) } -INSTANTIATE_TEST_SUITE_P(ParserOptimizeQuery, ParserTest, ::testing::Values( - ParserTestCase - { - std::make_shared(), - "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('a, b')", - "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('a, b')" - }, - ParserTestCase - { - std::make_shared(), - "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]')", - "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]')" - }, - ParserTestCase - { - std::make_shared(), - "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]') EXCEPT b", - "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]') EXCEPT b" - }, - ParserTestCase - { - std::make_shared(), - "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]') EXCEPT (a, b)", - "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]') EXCEPT (a, b)" - }, - ParserTestCase - { - std::make_shared(), - "OPTIMIZE TABLE table_name DEDUPLICATE BY a, b, c", - "OPTIMIZE TABLE table_name DEDUPLICATE BY a, b, c" - }, - ParserTestCase - { - std::make_shared(), - "OPTIMIZE TABLE table_name DEDUPLICATE BY *", - "OPTIMIZE TABLE table_name DEDUPLICATE BY *" - }, - ParserTestCase - { - std::make_shared(), - "OPTIMIZE TABLE table_name DEDUPLICATE BY * EXCEPT a", - "OPTIMIZE TABLE table_name DEDUPLICATE BY * EXCEPT a" - }, - ParserTestCase - { - std::make_shared(), - "OPTIMIZE TABLE table_name DEDUPLICATE BY * EXCEPT (a, b)", - "OPTIMIZE TABLE table_name DEDUPLICATE BY * EXCEPT (a, b)" - } -)); +INSTANTIATE_TEST_SUITE_P(ParserOptimizeQuery, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list + { + { + "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('a, b')", + "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('a, b')" + }, + { + "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]')", + "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]')" + }, + { + "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]') EXCEPT b", + "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]') EXCEPT b" + }, + { + "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]') EXCEPT (a, b)", + "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]') EXCEPT (a, b)" + }, + { + "OPTIMIZE TABLE table_name DEDUPLICATE BY a, b, c", + "OPTIMIZE TABLE table_name DEDUPLICATE BY a, b, c" + }, + { + "OPTIMIZE TABLE table_name DEDUPLICATE BY *", + "OPTIMIZE TABLE table_name DEDUPLICATE BY *" + }, + { + "OPTIMIZE TABLE table_name DEDUPLICATE BY * EXCEPT a", + "OPTIMIZE TABLE table_name DEDUPLICATE BY * EXCEPT a" + }, + { + "OPTIMIZE TABLE table_name DEDUPLICATE BY * EXCEPT (a, b)", + "OPTIMIZE TABLE table_name DEDUPLICATE BY * EXCEPT (a, b)" + } + } +))); -INSTANTIATE_TEST_SUITE_P(ParserOptimizeQuery_FAIL, ParserTest, ::testing::Values( - ParserTestCase - { - std::make_shared(), - "OPTIMIZE TABLE table_name DEDUPLICATE BY", - }, - ParserTestCase - { - std::make_shared(), - "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]') APPLY(x)", - }, - ParserTestCase - { - std::make_shared(), - "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]') REPLACE(y)", - }, - ParserTestCase - { - std::make_shared(), - "OPTIMIZE TABLE table_name DEDUPLICATE BY * APPLY(x)", - }, - ParserTestCase - { - std::make_shared(), - "OPTIMIZE TABLE table_name DEDUPLICATE BY * REPLACE(y)", - }, - ParserTestCase - { - std::make_shared(), - "OPTIMIZE TABLE table_name DEDUPLICATE BY db.a, db.b, db.c", - } -)); +INSTANTIATE_TEST_SUITE_P(ParserOptimizeQuery_FAIL, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list + { + { + "OPTIMIZE TABLE table_name DEDUPLICATE BY", + }, + { + "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]') APPLY(x)", + }, + { + "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]') REPLACE(y)", + }, + { + "OPTIMIZE TABLE table_name DEDUPLICATE BY * APPLY(x)", + }, + { + "OPTIMIZE TABLE table_name DEDUPLICATE BY * REPLACE(y)", + }, + { + "OPTIMIZE TABLE table_name DEDUPLICATE BY db.a, db.b, db.c", + } + } +))); + + +INSTANTIATE_TEST_SUITE_P(ParserAlterCommand_MODIFY_COMMENT, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list + { + { + // Empty comment value + "MODIFY COMMENT ''", + "MODIFY COMMENT ''", + }, +// { +// // No comment - same as empty comment +// "MODIFY COMMENT NULL", +// "MODIFY COMMENT ''", +// }, + { + // Non-empty comment value + "MODIFY COMMENT 'some comment value'", + "MODIFY COMMENT 'some comment value'", + } + } +))); diff --git a/src/Processors/Chain.cpp b/src/Processors/Chain.cpp new file mode 100644 index 00000000000..5e3b2e6a678 --- /dev/null +++ b/src/Processors/Chain.cpp @@ -0,0 +1,126 @@ +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +static void checkSingleInput(const IProcessor & transform) +{ + if (transform.getInputs().size() != 1) + throw Exception("Transform for chain should have single input, " + "but " + transform.getName() + " has " + + toString(transform.getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR); + + if (transform.getInputs().front().isConnected()) + throw Exception("Transform for chain has connected input.", ErrorCodes::LOGICAL_ERROR); +} + +static void checkSingleOutput(const IProcessor & transform) +{ + if (transform.getOutputs().size() != 1) + throw Exception("Transform for chain should have single output, " + "but " + transform.getName() + " has " + + toString(transform.getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR); + + if (transform.getOutputs().front().isConnected()) + throw Exception("Transform for chain has connected input.", ErrorCodes::LOGICAL_ERROR); +} + +static void checkTransform(const IProcessor & transform) +{ + checkSingleInput(transform); + checkSingleOutput(transform); +} + +static void checkInitialized(const std::list & processors) +{ + if (processors.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Drain is not initialized"); +} + +Chain::Chain(ProcessorPtr processor) +{ + checkTransform(*processor); + processors.emplace_back(std::move(processor)); +} + +Chain::Chain(std::list processors_) : processors(std::move(processors_)) +{ + if (processors.empty()) + return; + + checkSingleInput(*processors.front()); + checkSingleOutput(*processors.back()); + + for (const auto & processor : processors) + { + for (const auto & input : processor->getInputs()) + if (&input != &getInputPort() && !input.isConnected()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot initialize chain because there is a not connected input for {}", + processor->getName()); + + for (const auto & output : processor->getOutputs()) + if (&output != &getOutputPort() && !output.isConnected()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot initialize chain because there is a not connected output for {}", + processor->getName()); + } +} + +void Chain::addSource(ProcessorPtr processor) +{ + checkTransform(*processor); + + if (!processors.empty()) + connect(processor->getOutputs().front(), getInputPort()); + + processors.emplace_front(std::move(processor)); +} + +void Chain::addSink(ProcessorPtr processor) +{ + checkTransform(*processor); + + if (!processors.empty()) + connect(getOutputPort(), processor->getInputs().front()); + + processors.emplace_front(std::move(processor)); +} + +IProcessor & Chain::getSource() +{ + checkInitialized(processors); + return *processors.front(); +} + +IProcessor & Chain::getSink() +{ + checkInitialized(processors); + return *processors.back(); +} + +InputPort & Chain::getInputPort() const +{ + checkInitialized(processors); + return processors.front()->getInputs().front(); +} + +OutputPort & Chain::getOutputPort() const +{ + checkInitialized(processors); + return processors.back()->getOutputs().front(); +} + +void Chain::reset() +{ + Chain to_remove = std::move(*this); + *this = Chain(); +} + +} diff --git a/src/Processors/Chain.h b/src/Processors/Chain.h new file mode 100644 index 00000000000..da5167f9c7a --- /dev/null +++ b/src/Processors/Chain.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class Chain +{ +public: + Chain() = default; + Chain(Chain &&) = default; + Chain(const Chain &) = delete; + + Chain & operator=(Chain &&) = default; + Chain & operator=(const Chain &) = delete; + + explicit Chain(ProcessorPtr processor); + explicit Chain(std::list processors); + + bool empty() const { return processors.empty(); } + + size_t getNumThreads() const { return num_threads; } + void setNumThreads(size_t num_threads_) { num_threads = num_threads_; } + + void addSource(ProcessorPtr processor); + void addSink(ProcessorPtr processor); + + IProcessor & getSource(); + IProcessor & getSink(); + + InputPort & getInputPort() const; + OutputPort & getOutputPort() const; + + const Block & getInputHeader() const { return getInputPort().getHeader(); } + const Block & getOutputHeader() const { return getOutputPort().getHeader(); } + + const std::list & getProcessors() const { return processors; } + static std::list getProcessors(Chain chain) { return std::move(chain.processors); } + + void addTableLock(TableLockHolder lock) { holder.table_locks.emplace_back(std::move(lock)); } + void addStorageHolder(StoragePtr storage) { holder.storage_holders.emplace_back(std::move(storage)); } + void attachResources(PipelineResourcesHolder holder_) { holder = std::move(holder_); } + PipelineResourcesHolder detachResources() { return std::move(holder); } + + void reset(); + +private: + PipelineResourcesHolder holder; + + /// -> source -> transform -> ... -> transform -> sink -> + /// ^ -> -> -> -> ^ + /// input port output port + std::list processors; + size_t num_threads = 0; +}; + +} diff --git a/src/Processors/Executors/CompletedPipelineExecutor.cpp b/src/Processors/Executors/CompletedPipelineExecutor.cpp new file mode 100644 index 00000000000..7dc9e50c24b --- /dev/null +++ b/src/Processors/Executors/CompletedPipelineExecutor.cpp @@ -0,0 +1,115 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +struct CompletedPipelineExecutor::Data +{ + PipelineExecutorPtr executor; + std::exception_ptr exception; + std::atomic_bool is_finished = false; + std::atomic_bool has_exception = false; + ThreadFromGlobalPool thread; + Poco::Event finish_event; + + ~Data() + { + if (thread.joinable()) + thread.join(); + } +}; + +static void threadFunction(CompletedPipelineExecutor::Data & data, ThreadGroupStatusPtr thread_group, size_t num_threads) +{ + setThreadName("QueryPipelineEx"); + + try + { + if (thread_group) + CurrentThread::attachTo(thread_group); + + SCOPE_EXIT_SAFE( + if (thread_group) + CurrentThread::detachQueryIfNotDetached(); + ); + + data.executor->execute(num_threads); + } + catch (...) + { + data.exception = std::current_exception(); + data.has_exception = true; + } + + data.is_finished = true; + data.finish_event.set(); +} + +CompletedPipelineExecutor::CompletedPipelineExecutor(QueryPipeline & pipeline_) : pipeline(pipeline_) +{ + if (!pipeline.completed()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline for CompletedPipelineExecutor must be completed"); +} + +void CompletedPipelineExecutor::setCancelCallback(std::function is_cancelled, size_t interactive_timeout_ms_) +{ + is_cancelled_callback = is_cancelled; + interactive_timeout_ms = interactive_timeout_ms_; +} + +void CompletedPipelineExecutor::execute() +{ + PipelineExecutor executor(pipeline.processors, pipeline.process_list_element); + + if (interactive_timeout_ms) + { + data = std::make_unique(); + data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element); + + auto func = [&, thread_group = CurrentThread::getGroup()]() + { + threadFunction(*data, thread_group, pipeline.getNumThreads()); + }; + + data->thread = ThreadFromGlobalPool(std::move(func)); + + while (!data->is_finished) + { + if (data->finish_event.tryWait(interactive_timeout_ms)) + break; + + if (is_cancelled_callback()) + data->executor->cancel(); + } + + if (data->has_exception) + std::rethrow_exception(data->exception); + } + else + executor.execute(pipeline.getNumThreads()); +} + +CompletedPipelineExecutor::~CompletedPipelineExecutor() +{ + try + { + if (data && data->executor) + data->executor->cancel(); + } + catch (...) + { + tryLogCurrentException("PullingAsyncPipelineExecutor"); + } +} + +} diff --git a/src/Processors/Executors/CompletedPipelineExecutor.h b/src/Processors/Executors/CompletedPipelineExecutor.h new file mode 100644 index 00000000000..e616cd6a2b7 --- /dev/null +++ b/src/Processors/Executors/CompletedPipelineExecutor.h @@ -0,0 +1,33 @@ +#pragma once +#include +#include + +namespace DB +{ + +class QueryPipeline; + +/// Executor for completed QueryPipeline. +/// Allows to specify a callback which checks if execution should be cancelled. +/// If callback is specified, runs execution in a separate thread. +class CompletedPipelineExecutor +{ +public: + explicit CompletedPipelineExecutor(QueryPipeline & pipeline_); + ~CompletedPipelineExecutor(); + + /// This callback will be called each interactive_timeout_ms (if it is not 0). + /// If returns true, query would be cancelled. + void setCancelCallback(std::function is_cancelled, size_t interactive_timeout_ms_); + + void execute(); + struct Data; + +private: + QueryPipeline & pipeline; + std::function is_cancelled_callback; + size_t interactive_timeout_ms = 0; + std::unique_ptr data; +}; + +} diff --git a/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp b/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp index 736ad1ecefe..bdfbbc2874e 100644 --- a/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp +++ b/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index b91c1caa4a5..d19eefeb53c 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -429,12 +429,14 @@ void PipelineExecutor::execute(size_t num_threads) bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag) { - if (finished) - return false; - if (!is_execution_initialized) + { initializeExecution(1); + if (yield_flag && *yield_flag) + return true; + } + executeStepImpl(0, 1, yield_flag); if (!finished) diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index 8ecbe75af3a..e1c5293302e 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -10,6 +11,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + struct PullingAsyncPipelineExecutor::Data { PipelineExecutorPtr executor; @@ -38,11 +44,11 @@ struct PullingAsyncPipelineExecutor::Data PullingAsyncPipelineExecutor::PullingAsyncPipelineExecutor(QueryPipeline & pipeline_) : pipeline(pipeline_) { - if (!pipeline.isCompleted()) - { - lazy_format = std::make_shared(pipeline.getHeader()); - pipeline.setOutputFormat(lazy_format); - } + if (!pipeline.pulling()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline for PullingAsyncPipelineExecutor must be pulling"); + + lazy_format = std::make_shared(pipeline.output->getHeader()); + pipeline.complete(lazy_format); } PullingAsyncPipelineExecutor::~PullingAsyncPipelineExecutor() @@ -59,8 +65,7 @@ PullingAsyncPipelineExecutor::~PullingAsyncPipelineExecutor() const Block & PullingAsyncPipelineExecutor::getHeader() const { - return lazy_format ? lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader() - : pipeline.getHeader(); /// Empty. + return lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader(); } static void threadFunction(PullingAsyncPipelineExecutor::Data & data, ThreadGroupStatusPtr thread_group, size_t num_threads) @@ -99,7 +104,7 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds) if (!data) { data = std::make_unique(); - data->executor = pipeline.execute(); + data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element); data->lazy_format = lazy_format.get(); auto func = [&, thread_group = CurrentThread::getGroup()]() diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index dc59e0a2f5a..7da2a6d3059 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -1,15 +1,25 @@ #include +#include #include #include #include +#include namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + PullingPipelineExecutor::PullingPipelineExecutor(QueryPipeline & pipeline_) : pipeline(pipeline_) { - pulling_format = std::make_shared(pipeline.getHeader(), has_data_flag); - pipeline.setOutputFormat(pulling_format); + if (!pipeline.pulling()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline for PullingPipelineExecutor must be pulling"); + + pulling_format = std::make_shared(pipeline.output->getHeader(), has_data_flag); + pipeline.complete(pulling_format); } PullingPipelineExecutor::~PullingPipelineExecutor() @@ -32,7 +42,7 @@ const Block & PullingPipelineExecutor::getHeader() const bool PullingPipelineExecutor::pull(Chunk & chunk) { if (!executor) - executor = pipeline.execute(); + executor = std::make_shared(pipeline.processors, pipeline.process_list_element); if (!executor->executeStep(&has_data_flag)) return false; diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp new file mode 100644 index 00000000000..08ff6d4d96c --- /dev/null +++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp @@ -0,0 +1,220 @@ +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +class PushingAsyncSource : public ISource +{ +public: + explicit PushingAsyncSource(const Block & header) + : ISource(header) + {} + + String getName() const override { return "PushingAsyncSource"; } + + bool setData(Chunk chunk) + { + std::unique_lock lock(mutex); + condvar.wait(lock, [this] { return !has_data || is_finished; }); + + if (is_finished) + return false; + + data.swap(chunk); + has_data = true; + condvar.notify_one(); + + return true; + } + + void finish() + { + is_finished = true; + condvar.notify_all(); + } + +protected: + + Chunk generate() override + { + std::unique_lock lock(mutex); + condvar.wait(lock, [this] { return has_data || is_finished; }); + + Chunk res; + + res.swap(data); + has_data = false; + condvar.notify_one(); + + return res; + } + +private: + Chunk data; + bool has_data = false; + std::atomic_bool is_finished = false; + std::mutex mutex; + std::condition_variable condvar; +}; + +struct PushingAsyncPipelineExecutor::Data +{ + PipelineExecutorPtr executor; + std::exception_ptr exception; + PushingAsyncSource * source = nullptr; + std::atomic_bool is_finished = false; + std::atomic_bool has_exception = false; + ThreadFromGlobalPool thread; + Poco::Event finish_event; + + ~Data() + { + if (thread.joinable()) + thread.join(); + } + + void rethrowExceptionIfHas() + { + if (has_exception) + { + has_exception = false; + std::rethrow_exception(std::move(exception)); + } + } +}; + +static void threadFunction(PushingAsyncPipelineExecutor::Data & data, ThreadGroupStatusPtr thread_group, size_t num_threads) +{ + setThreadName("QueryPipelineEx"); + + try + { + if (thread_group) + CurrentThread::attachTo(thread_group); + + SCOPE_EXIT_SAFE( + if (thread_group) + CurrentThread::detachQueryIfNotDetached(); + ); + + data.executor->execute(num_threads); + } + catch (...) + { + data.exception = std::current_exception(); + data.has_exception = true; + + /// Finish source in case of exception. Otherwise thread.join() may hung. + if (data.source) + data.source->finish(); + } + + data.is_finished = true; + data.finish_event.set(); +} + + +PushingAsyncPipelineExecutor::PushingAsyncPipelineExecutor(QueryPipeline & pipeline_) : pipeline(pipeline_) +{ + if (!pipeline.pushing()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline for PushingPipelineExecutor must be pushing"); + + pushing_source = std::make_shared(pipeline.input->getHeader()); + connect(pushing_source->getPort(), *pipeline.input); + pipeline.processors.emplace_back(pushing_source); +} + +PushingAsyncPipelineExecutor::~PushingAsyncPipelineExecutor() +{ + try + { + finish(); + } + catch (...) + { + tryLogCurrentException("PushingAsyncPipelineExecutor"); + } +} + +const Block & PushingAsyncPipelineExecutor::getHeader() const +{ + return pushing_source->getPort().getHeader(); +} + + +void PushingAsyncPipelineExecutor::start() +{ + if (started) + return; + + started = true; + + data = std::make_unique(); + data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element); + data->source = pushing_source.get(); + + auto func = [&, thread_group = CurrentThread::getGroup()]() + { + threadFunction(*data, thread_group, pipeline.getNumThreads()); + }; + + data->thread = ThreadFromGlobalPool(std::move(func)); +} + +void PushingAsyncPipelineExecutor::push(Chunk chunk) +{ + if (!started) + start(); + + bool is_pushed = pushing_source->setData(std::move(chunk)); + data->rethrowExceptionIfHas(); + + if (!is_pushed) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Pipeline for PushingPipelineExecutor was finished before all data was inserted"); +} + +void PushingAsyncPipelineExecutor::push(Block block) +{ + push(Chunk(block.getColumns(), block.rows())); +} + +void PushingAsyncPipelineExecutor::finish() +{ + if (finished) + return; + finished = true; + + pushing_source->finish(); + + /// Join thread here to wait for possible exception. + if (data && data->thread.joinable()) + data->thread.join(); + + /// Rethrow exception to not swallow it in destructor. + if (data) + data->rethrowExceptionIfHas(); +} + +void PushingAsyncPipelineExecutor::cancel() +{ + /// Cancel execution if it wasn't finished. + if (data && !data->is_finished && data->executor) + data->executor->cancel(); + + finish(); +} + +} diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.h b/src/Processors/Executors/PushingAsyncPipelineExecutor.h new file mode 100644 index 00000000000..4b4b83a90b5 --- /dev/null +++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.h @@ -0,0 +1,60 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class Block; +class Chunk; +class QueryPipeline; +class PushingAsyncSource; + +class PipelineExecutor; +using PipelineExecutorPtr = std::shared_ptr; + +class IProcessor; +using ProcessorPtr = std::shared_ptr; +using Processors = std::vector; + +/// Pushing executor for Chain of processors using several background threads. +/// Typical usage is: +/// +/// PushingAsyncPipelineExecutor executor(chain); +/// executor.start(); +/// while (auto chunk = ...) +/// executor.push(std::move(chunk)); +/// executor.finish(); +class PushingAsyncPipelineExecutor +{ +public: + explicit PushingAsyncPipelineExecutor(QueryPipeline & pipeline_); + ~PushingAsyncPipelineExecutor(); + + /// Get structure of returned block or chunk. + const Block & getHeader() const; + + void start(); + + void push(Chunk chunk); + void push(Block block); + + void finish(); + + /// Stop execution. It is not necessary, but helps to stop execution before executor is destroyed. + void cancel(); + + struct Data; + +private: + QueryPipeline & pipeline; + std::shared_ptr pushing_source; + + bool started = false; + bool finished = false; + + std::unique_ptr data; +}; + +} diff --git a/src/Processors/Executors/PushingPipelineExecutor.cpp b/src/Processors/Executors/PushingPipelineExecutor.cpp new file mode 100644 index 00000000000..2e2b5e9ca1e --- /dev/null +++ b/src/Processors/Executors/PushingPipelineExecutor.cpp @@ -0,0 +1,132 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +class PushingSource : public ISource +{ +public: + explicit PushingSource(const Block & header, std::atomic_bool & need_data_flag_) + : ISource(header) + , need_data_flag(need_data_flag_) + {} + + String getName() const override { return "PushingSource"; } + + void setData(Chunk chunk) + { + need_data_flag = false; + data = std::move(chunk); + } + +protected: + + Status prepare() override + { + auto status = ISource::prepare(); + if (status == Status::Ready) + need_data_flag = true; + + return status; + } + + Chunk generate() override + { + return std::move(data); + } + +private: + Chunk data; + std::atomic_bool & need_data_flag; +}; + + +PushingPipelineExecutor::PushingPipelineExecutor(QueryPipeline & pipeline_) : pipeline(pipeline_) +{ + if (!pipeline.pushing()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline for PushingPipelineExecutor must be pushing"); + + pushing_source = std::make_shared(pipeline.input->getHeader(), need_data_flag); + connect(pushing_source->getPort(), *pipeline.input); + pipeline.processors.emplace_back(pushing_source); +} + +PushingPipelineExecutor::~PushingPipelineExecutor() +{ + try + { + finish(); + } + catch (...) + { + tryLogCurrentException("PushingPipelineExecutor"); + } +} + +const Block & PushingPipelineExecutor::getHeader() const +{ + return pushing_source->getPort().getHeader(); +} + + +void PushingPipelineExecutor::start() +{ + if (started) + return; + + started = true; + executor = std::make_shared(pipeline.processors, pipeline.process_list_element); + + if (!executor->executeStep(&need_data_flag)) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Pipeline for PushingPipelineExecutor was finished before all data was inserted"); +} + +void PushingPipelineExecutor::push(Chunk chunk) +{ + if (!started) + start(); + + pushing_source->setData(std::move(chunk)); + + if (!executor->executeStep(&need_data_flag)) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Pipeline for PushingPipelineExecutor was finished before all data was inserted"); +} + +void PushingPipelineExecutor::push(Block block) +{ + push(Chunk(block.getColumns(), block.rows())); +} + +void PushingPipelineExecutor::finish() +{ + if (finished) + return; + finished = true; + + if (executor) + executor->executeStep(); +} + +void PushingPipelineExecutor::cancel() +{ + /// Cancel execution if it wasn't finished. + if (executor && !finished) + { + finished = true; + executor->cancel(); + } +} + +} diff --git a/src/Processors/Executors/PushingPipelineExecutor.h b/src/Processors/Executors/PushingPipelineExecutor.h new file mode 100644 index 00000000000..b047e62415a --- /dev/null +++ b/src/Processors/Executors/PushingPipelineExecutor.h @@ -0,0 +1,58 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class Block; +class Chunk; +class QueryPipeline; +class PushingSource; + +class PipelineExecutor; +using PipelineExecutorPtr = std::shared_ptr; + +class IProcessor; +using ProcessorPtr = std::shared_ptr; +using Processors = std::vector; + +/// Pushing executor for Chain of processors. Always executed in single thread. +/// Typical usage is: +/// +/// PushingPipelineExecutor executor(chain); +/// executor.start(); +/// while (auto chunk = ...) +/// executor.push(std::move(chunk)); +/// executor.finish(); +class PushingPipelineExecutor +{ +public: + explicit PushingPipelineExecutor(QueryPipeline & pipeline_); + ~PushingPipelineExecutor(); + + /// Get structure of returned block or chunk. + const Block & getHeader() const; + + void start(); + + void push(Chunk chunk); + void push(Block block); + + void finish(); + + /// Stop execution. It is not necessary, but helps to stop execution before executor is destroyed. + void cancel(); + +private: + QueryPipeline & pipeline; + std::atomic_bool need_data_flag = false; + std::shared_ptr pushing_source; + + PipelineExecutorPtr executor; + bool started = false; + bool finished = false; +}; + +} diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index 4d86d18f70e..16216e7f363 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -82,6 +82,8 @@ public: virtual void doWritePrefix() {} virtual void doWriteSuffix() { finalize(); } + virtual bool expectMaterializedColumns() const { return true; } + void setTotals(const Block & totals) { consumeTotals(Chunk(totals.getColumns(), totals.rows())); } void setExtremes(const Block & extremes) { consumeExtremes(Chunk(extremes.getColumns(), extremes.rows())); } diff --git a/src/Processors/Formats/LazyOutputFormat.h b/src/Processors/Formats/LazyOutputFormat.h index c6be0adb347..2c29f55c4f3 100644 --- a/src/Processors/Formats/LazyOutputFormat.h +++ b/src/Processors/Formats/LazyOutputFormat.h @@ -44,6 +44,8 @@ public: queue.emplace(Chunk()); } + bool expectMaterializedColumns() const override { return false; } + protected: void consume(Chunk chunk) override { diff --git a/src/Processors/Formats/PullingOutputFormat.h b/src/Processors/Formats/PullingOutputFormat.h index 0864b5a02ef..53b2086712f 100644 --- a/src/Processors/Formats/PullingOutputFormat.h +++ b/src/Processors/Formats/PullingOutputFormat.h @@ -24,6 +24,8 @@ public: void setRowsBeforeLimit(size_t rows_before_limit) override; + bool expectMaterializedColumns() const override { return false; } + protected: void consume(Chunk chunk) override; void consumeTotals(Chunk chunk) override { totals = std::move(chunk); } diff --git a/src/Processors/IAccumulatingTransform.cpp b/src/Processors/IAccumulatingTransform.cpp index 64bdbe2410f..35b45543151 100644 --- a/src/Processors/IAccumulatingTransform.cpp +++ b/src/Processors/IAccumulatingTransform.cpp @@ -1,5 +1,5 @@ #include - +#include namespace DB { diff --git a/src/Processors/Pipe.cpp b/src/Processors/Pipe.cpp index e0da79f148d..ec288484ca3 100644 --- a/src/Processors/Pipe.cpp +++ b/src/Processors/Pipe.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include namespace DB @@ -98,18 +99,14 @@ static OutputPort * uniteTotals(const OutputPortRawPtrs & ports, const Block & h return totals_port; } -Pipe::Holder & Pipe::Holder::operator=(Holder && rhs) +void Pipe::addQueryPlan(std::unique_ptr plan) { - table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end()); - storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end()); - interpreter_context.insert(interpreter_context.end(), - rhs.interpreter_context.begin(), rhs.interpreter_context.end()); - for (auto & plan : rhs.query_plans) - query_plans.emplace_back(std::move(plan)); + holder.query_plans.emplace_back(std::move(plan)); +} - query_id_holder = std::move(rhs.query_id_holder); - - return *this; +PipelineResourcesHolder Pipe::detachResources() +{ + return std::move(holder); } Pipe::Pipe(ProcessorPtr source, OutputPort * output, OutputPort * totals, OutputPort * extremes) @@ -667,6 +664,47 @@ void Pipe::addSimpleTransform(const ProcessorGetter & getter) addSimpleTransform([&](const Block & stream_header, StreamType) { return getter(stream_header); }); } +void Pipe::addChains(std::vector chains) +{ + if (output_ports.size() != chains.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot add chains to Pipe because " + "number of output ports ({}) is not equal to the number of chains ({})", + output_ports.size(), chains.size()); + + dropTotals(); + dropExtremes(); + + size_t max_parallel_streams_for_chains = 0; + + Block new_header; + for (size_t i = 0; i < output_ports.size(); ++i) + { + max_parallel_streams_for_chains += std::max(chains[i].getNumThreads(), 1); + + if (i == 0) + new_header = chains[i].getOutputHeader(); + else + assertBlocksHaveEqualStructure(new_header, chains[i].getOutputHeader(), "QueryPipeline"); + + connect(*output_ports[i], chains[i].getInputPort()); + output_ports[i] = &chains[i].getOutputPort(); + + holder = chains[i].detachResources(); + auto added_processors = Chain::getProcessors(std::move(chains[i])); + for (auto & transform : added_processors) + { + if (collected_processors) + collected_processors->emplace_back(transform); + + processors.emplace_back(std::move(transform)); + } + } + + header = std::move(new_header); + max_parallel_streams = std::max(max_parallel_streams, max_parallel_streams_for_chains); +} + void Pipe::resize(size_t num_streams, bool force, bool strict) { if (output_ports.empty()) @@ -739,7 +777,7 @@ void Pipe::setOutputFormat(ProcessorPtr output) auto * format = dynamic_cast(output.get()); if (!format) - throw Exception("IOutputFormat processor expected for QueryPipeline::setOutputFormat.", + throw Exception("IOutputFormat processor expected for QueryPipelineBuilder::setOutputFormat.", ErrorCodes::LOGICAL_ERROR); auto & main = format->getPort(IOutputFormat::PortKind::Main); diff --git a/src/Processors/Pipe.h b/src/Processors/Pipe.h index dc3be3289fc..a07c68f56b2 100644 --- a/src/Processors/Pipe.h +++ b/src/Processors/Pipe.h @@ -1,11 +1,10 @@ #pragma once #include -#include -#include +#include +#include #include #include -#include namespace DB { @@ -15,11 +14,6 @@ struct StreamLocalLimits; class Pipe; using Pipes = std::vector; -class QueryPipeline; - -class IStorage; -using StoragePtr = std::shared_ptr; - using OutputPortRawPtrs = std::vector; /// Pipe is a set of processors which represents the part of pipeline. @@ -86,6 +80,9 @@ public: void addSimpleTransform(const ProcessorGetter & getter); void addSimpleTransform(const ProcessorGetterWithStreamKind & getter); + /// Add chain to every output port. + void addChains(std::vector chains); + /// Changes the number of output ports if needed. Adds ResizeTransform. void resize(size_t num_streams, bool force = false, bool strict = false); @@ -114,29 +111,13 @@ public: void addStorageHolder(StoragePtr storage) { holder.storage_holders.emplace_back(std::move(storage)); } void addQueryIdHolder(std::shared_ptr query_id_holder) { holder.query_id_holder = std::move(query_id_holder); } /// For queries with nested interpreters (i.e. StorageDistributed) - void addQueryPlan(std::unique_ptr plan) { holder.query_plans.emplace_back(std::move(plan)); } + void addQueryPlan(std::unique_ptr plan); + + PipelineResourcesHolder detachResources(); private: /// Destruction order: processors, header, locks, temporary storages, local contexts - - struct Holder - { - Holder() = default; - Holder(Holder &&) = default; - /// Custom mode assignment does not destroy data from lhs. It appends data from rhs to lhs. - Holder& operator=(Holder &&); - - /// Some processors may implicitly use Context or temporary Storage created by Interpreter. - /// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here, - /// because QueryPipeline is alive until query is finished. - std::vector> interpreter_context; - std::vector storage_holders; - std::vector table_locks; - std::vector> query_plans; - std::shared_ptr query_id_holder; - }; - - Holder holder; + PipelineResourcesHolder holder; /// Header is common for all output below. Block header; @@ -162,6 +143,7 @@ private: void setSinks(const Pipe::ProcessorGetterWithStreamKind & getter); void setOutputFormat(ProcessorPtr output); + friend class QueryPipelineBuilder; friend class QueryPipeline; }; diff --git a/src/Processors/PipelineResourcesHolder.cpp b/src/Processors/PipelineResourcesHolder.cpp new file mode 100644 index 00000000000..9cb2ea301ad --- /dev/null +++ b/src/Processors/PipelineResourcesHolder.cpp @@ -0,0 +1,25 @@ +#include +#include + +namespace DB +{ + +PipelineResourcesHolder::PipelineResourcesHolder() = default; +PipelineResourcesHolder::PipelineResourcesHolder(PipelineResourcesHolder &&) = default; +PipelineResourcesHolder::~PipelineResourcesHolder() = default; + +PipelineResourcesHolder & PipelineResourcesHolder::operator=(PipelineResourcesHolder && rhs) +{ + table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end()); + storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end()); + interpreter_context.insert(interpreter_context.end(), + rhs.interpreter_context.begin(), rhs.interpreter_context.end()); + for (auto & plan : rhs.query_plans) + query_plans.emplace_back(std::move(plan)); + + query_id_holder = std::move(rhs.query_id_holder); + + return *this; +} + +} diff --git a/src/Processors/PipelineResourcesHolder.h b/src/Processors/PipelineResourcesHolder.h new file mode 100644 index 00000000000..9fb1438424a --- /dev/null +++ b/src/Processors/PipelineResourcesHolder.h @@ -0,0 +1,34 @@ +#pragma once +#include +#include + +namespace DB +{ + +class QueryPipelineBuilder; + +class IStorage; +using StoragePtr = std::shared_ptr; + +class QueryPlan; +class Context; + +struct PipelineResourcesHolder +{ + PipelineResourcesHolder(); + PipelineResourcesHolder(PipelineResourcesHolder &&); + ~PipelineResourcesHolder(); + /// Custom mode assignment does not destroy data from lhs. It appends data from rhs to lhs. + PipelineResourcesHolder& operator=(PipelineResourcesHolder &&); + + /// Some processors may implicitly use Context or temporary Storage created by Interpreter. + /// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here, + /// because QueryPipeline is alive until query is finished. + std::vector> interpreter_context; + std::vector storage_holders; + std::vector table_locks; + std::vector> query_plans; + std::shared_ptr query_id_holder; +}; + +} diff --git a/src/Processors/QueryPipeline.cpp b/src/Processors/QueryPipeline.cpp index 2b882ee93ab..4c46bed1093 100644 --- a/src/Processors/QueryPipeline.cpp +++ b/src/Processors/QueryPipeline.cpp @@ -1,421 +1,121 @@ #include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include - +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace DB { + namespace ErrorCodes { extern const int LOGICAL_ERROR; } -void QueryPipeline::checkInitialized() +QueryPipeline::QueryPipeline() = default; +QueryPipeline::QueryPipeline(QueryPipeline &&) = default; +QueryPipeline & QueryPipeline::operator=(QueryPipeline &&) = default; +QueryPipeline::~QueryPipeline() = default; + +static void checkInput(const InputPort & input, const ProcessorPtr & processor) { - if (!initialized()) - throw Exception("QueryPipeline wasn't initialized.", ErrorCodes::LOGICAL_ERROR); + if (!input.isConnected()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot create QueryPipeline because {} has not connected input", + processor->getName()); } -void QueryPipeline::checkInitializedAndNotCompleted() +static void checkOutput(const OutputPort & output, const ProcessorPtr & processor) { - checkInitialized(); - - if (pipe.isCompleted()) - throw Exception("QueryPipeline was already completed.", ErrorCodes::LOGICAL_ERROR); + if (!output.isConnected()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot create QueryPipeline because {} has not connected output", + processor->getName()); } -static void checkSource(const ProcessorPtr & source, bool can_have_totals) +static void checkPulling( + Processors & processors, + OutputPort * output, + OutputPort * totals, + OutputPort * extremes) { - if (!source->getInputs().empty()) - throw Exception("Source for query pipeline shouldn't have any input, but " + source->getName() + " has " + - toString(source->getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR); + if (!output || output->isConnected()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot create pulling QueryPipeline because its output port is connected or null"); - if (source->getOutputs().empty()) - throw Exception("Source for query pipeline should have single output, but it doesn't have any", - ErrorCodes::LOGICAL_ERROR); + if (totals && totals->isConnected()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot create pulling QueryPipeline because its totals port is connected"); - if (!can_have_totals && source->getOutputs().size() != 1) - throw Exception("Source for query pipeline should have single output, but " + source->getName() + " has " + - toString(source->getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR); + if (extremes && extremes->isConnected()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot create pulling QueryPipeline because its extremes port is connected"); - if (source->getOutputs().size() > 2) - throw Exception("Source for query pipeline should have 1 or 2 outputs, but " + source->getName() + " has " + - toString(source->getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR); -} - -void QueryPipeline::init(Pipe pipe_) -{ - if (initialized()) - throw Exception("Pipeline has already been initialized.", ErrorCodes::LOGICAL_ERROR); - - if (pipe_.empty()) - throw Exception("Can't initialize pipeline with empty pipe.", ErrorCodes::LOGICAL_ERROR); - - pipe = std::move(pipe_); -} - -void QueryPipeline::reset() -{ - Pipe pipe_to_destroy(std::move(pipe)); - *this = QueryPipeline(); -} - -void QueryPipeline::addSimpleTransform(const Pipe::ProcessorGetter & getter) -{ - checkInitializedAndNotCompleted(); - pipe.addSimpleTransform(getter); -} - -void QueryPipeline::addSimpleTransform(const Pipe::ProcessorGetterWithStreamKind & getter) -{ - checkInitializedAndNotCompleted(); - pipe.addSimpleTransform(getter); -} - -void QueryPipeline::addTransform(ProcessorPtr transform) -{ - checkInitializedAndNotCompleted(); - pipe.addTransform(std::move(transform)); -} - -void QueryPipeline::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes) -{ - checkInitializedAndNotCompleted(); - pipe.addTransform(std::move(transform), totals, extremes); -} - -void QueryPipeline::transform(const Transformer & transformer) -{ - checkInitializedAndNotCompleted(); - pipe.transform(transformer); -} - -void QueryPipeline::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter) -{ - checkInitializedAndNotCompleted(); - pipe.setSinks(getter); -} - -void QueryPipeline::addDelayedStream(ProcessorPtr source) -{ - checkInitializedAndNotCompleted(); - - checkSource(source, false); - assertBlocksHaveEqualStructure(getHeader(), source->getOutputs().front().getHeader(), "QueryPipeline"); - - IProcessor::PortNumbers delayed_streams = { pipe.numOutputPorts() }; - pipe.addSource(std::move(source)); - - auto processor = std::make_shared(getHeader(), pipe.numOutputPorts(), delayed_streams); - addTransform(std::move(processor)); -} - -void QueryPipeline::addMergingAggregatedMemoryEfficientTransform(AggregatingTransformParamsPtr params, size_t num_merging_processors) -{ - DB::addMergingAggregatedMemoryEfficientTransform(pipe, std::move(params), num_merging_processors); -} - -void QueryPipeline::resize(size_t num_streams, bool force, bool strict) -{ - checkInitializedAndNotCompleted(); - pipe.resize(num_streams, force, strict); -} - -void QueryPipeline::addTotalsHavingTransform(ProcessorPtr transform) -{ - checkInitializedAndNotCompleted(); - - if (!typeid_cast(transform.get())) - throw Exception("TotalsHavingTransform expected for QueryPipeline::addTotalsHavingTransform.", - ErrorCodes::LOGICAL_ERROR); - - if (pipe.getTotalsPort()) - throw Exception("Totals having transform was already added to pipeline.", ErrorCodes::LOGICAL_ERROR); - - resize(1); - - auto * totals_port = &transform->getOutputs().back(); - pipe.addTransform(std::move(transform), totals_port, nullptr); -} - -void QueryPipeline::addDefaultTotals() -{ - checkInitializedAndNotCompleted(); - - if (pipe.getTotalsPort()) - throw Exception("Totals having transform was already added to pipeline.", ErrorCodes::LOGICAL_ERROR); - - const auto & current_header = getHeader(); - Columns columns; - columns.reserve(current_header.columns()); - - for (size_t i = 0; i < current_header.columns(); ++i) + bool found_output = false; + bool found_totals = false; + bool found_extremes = false; + for (const auto & processor : processors) { - auto column = current_header.getByPosition(i).type->createColumn(); - column->insertDefault(); - columns.emplace_back(std::move(column)); + for (const auto & in : processor->getInputs()) + checkInput(in, processor); + + for (const auto & out : processor->getOutputs()) + { + if (&out == output) + found_output = true; + else if (totals && &out == totals) + found_totals = true; + else if (extremes && &out == extremes) + found_extremes = true; + else + checkOutput(out, processor); + } } - auto source = std::make_shared(current_header, Chunk(std::move(columns), 1)); - pipe.addTotalsSource(std::move(source)); + if (!found_output) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot create pulling QueryPipeline because its output port does not belong to any processor"); + if (totals && !found_totals) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot create pulling QueryPipeline because its totals port does not belong to any processor"); + if (extremes && !found_extremes) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot create pulling QueryPipeline because its extremes port does not belong to any processor"); } -void QueryPipeline::dropTotalsAndExtremes() +static void checkCompleted(Processors & processors) { - pipe.dropTotals(); - pipe.dropExtremes(); -} - -void QueryPipeline::addExtremesTransform() -{ - checkInitializedAndNotCompleted(); - - /// It is possible that pipeline already have extremes. - /// For example, it may be added from VIEW subquery. - /// In this case, recalculate extremes again - they should be calculated for different rows. - if (pipe.getExtremesPort()) - pipe.dropExtremes(); - - resize(1); - auto transform = std::make_shared(getHeader()); - auto * port = &transform->getExtremesPort(); - pipe.addTransform(std::move(transform), nullptr, port); -} - -void QueryPipeline::setOutputFormat(ProcessorPtr output) -{ - checkInitializedAndNotCompleted(); - - if (output_format) - throw Exception("QueryPipeline already has output.", ErrorCodes::LOGICAL_ERROR); - - resize(1); - - output_format = dynamic_cast(output.get()); - pipe.setOutputFormat(std::move(output)); - - initRowsBeforeLimit(); -} - -QueryPipeline QueryPipeline::unitePipelines( - std::vector> pipelines, - size_t max_threads_limit, - Processors * collected_processors) -{ - if (pipelines.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot unite an empty set of pipelines"); - - Block common_header = pipelines.front()->getHeader(); - - /// Should we limit the number of threads for united pipeline. True if all pipelines have max_threads != 0. - /// If true, result max_threads will be sum(max_threads). - /// Note: it may be > than settings.max_threads, so we should apply this limit again. - bool will_limit_max_threads = true; - size_t max_threads = 0; - Pipes pipes; - - for (auto & pipeline_ptr : pipelines) + for (const auto & processor : processors) { - auto & pipeline = *pipeline_ptr; - pipeline.checkInitialized(); - pipeline.pipe.collected_processors = collected_processors; + for (const auto & in : processor->getInputs()) + checkInput(in, processor); - pipes.emplace_back(std::move(pipeline.pipe)); - - max_threads += pipeline.max_threads; - will_limit_max_threads = will_limit_max_threads && pipeline.max_threads != 0; - - /// If one of pipelines uses more threads then current limit, will keep it. - /// It may happen if max_distributed_connections > max_threads - if (pipeline.max_threads > max_threads_limit) - max_threads_limit = pipeline.max_threads; - } - - QueryPipeline pipeline; - pipeline.init(Pipe::unitePipes(std::move(pipes), collected_processors, false)); - - if (will_limit_max_threads) - { - pipeline.setMaxThreads(max_threads); - pipeline.limitMaxThreads(max_threads_limit); - } - - return pipeline; -} - -std::unique_ptr QueryPipeline::joinPipelines( - std::unique_ptr left, - std::unique_ptr right, - JoinPtr join, - size_t max_block_size, - Processors * collected_processors) -{ - left->checkInitializedAndNotCompleted(); - right->checkInitializedAndNotCompleted(); - - /// Extremes before join are useless. They will be calculated after if needed. - left->pipe.dropExtremes(); - right->pipe.dropExtremes(); - - left->pipe.collected_processors = collected_processors; - right->pipe.collected_processors = collected_processors; - - /// In case joined subquery has totals, and we don't, add default chunk to totals. - bool default_totals = false; - if (!left->hasTotals() && right->hasTotals()) - { - left->addDefaultTotals(); - default_totals = true; - } - - /// (left) ──────┐ - /// ╞> Joining ─> (joined) - /// (left) ─┐┌───┘ - /// └┼───┐ - /// (right) ┐ (totals) ──┼─┐ ╞> Joining ─> (joined) - /// ╞> Resize ┐ ╓─┘┌┼─┘ - /// (right) ┘ │ ╟──┘└─┐ - /// ╞> FillingJoin ─> Resize ╣ ╞> Joining ─> (totals) - /// (totals) ─────────┘ ╙─────┘ - - size_t num_streams = left->getNumStreams(); - right->resize(1); - - auto adding_joined = std::make_shared(right->getHeader(), join); - InputPort * totals_port = nullptr; - if (right->hasTotals()) - totals_port = adding_joined->addTotalsPort(); - - right->addTransform(std::move(adding_joined), totals_port, nullptr); - - size_t num_streams_including_totals = num_streams + (left->hasTotals() ? 1 : 0); - right->resize(num_streams_including_totals); - - /// This counter is needed for every Joining except totals, to decide which Joining will generate non joined rows. - auto finish_counter = std::make_shared(num_streams); - - auto lit = left->pipe.output_ports.begin(); - auto rit = right->pipe.output_ports.begin(); - - for (size_t i = 0; i < num_streams; ++i) - { - auto joining = std::make_shared(left->getHeader(), join, max_block_size, false, default_totals, finish_counter); - connect(**lit, joining->getInputs().front()); - connect(**rit, joining->getInputs().back()); - *lit = &joining->getOutputs().front(); - - ++lit; - ++rit; - - if (collected_processors) - collected_processors->emplace_back(joining); - - left->pipe.processors.emplace_back(std::move(joining)); - } - - if (left->hasTotals()) - { - auto joining = std::make_shared(left->getHeader(), join, max_block_size, true, default_totals); - connect(*left->pipe.totals_port, joining->getInputs().front()); - connect(**rit, joining->getInputs().back()); - left->pipe.totals_port = &joining->getOutputs().front(); - - ++rit; - - if (collected_processors) - collected_processors->emplace_back(joining); - - left->pipe.processors.emplace_back(std::move(joining)); - } - - left->pipe.processors.insert(left->pipe.processors.end(), right->pipe.processors.begin(), right->pipe.processors.end()); - left->pipe.holder = std::move(right->pipe.holder); - left->pipe.header = left->pipe.output_ports.front()->getHeader(); - left->pipe.max_parallel_streams = std::max(left->pipe.max_parallel_streams, right->pipe.max_parallel_streams); - return left; -} - -void QueryPipeline::addCreatingSetsTransform(const Block & res_header, SubqueryForSet subquery_for_set, const SizeLimits & limits, ContextPtr context) -{ - resize(1); - - auto transform = std::make_shared( - getHeader(), - res_header, - std::move(subquery_for_set), - limits, - context); - - InputPort * totals_port = nullptr; - - if (pipe.getTotalsPort()) - totals_port = transform->addTotalsPort(); - - pipe.addTransform(std::move(transform), totals_port, nullptr); -} - -void QueryPipeline::addPipelineBefore(QueryPipeline pipeline) -{ - checkInitializedAndNotCompleted(); - if (pipeline.getHeader()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline for CreatingSets should have empty header. Got: {}", - pipeline.getHeader().dumpStructure()); - - IProcessor::PortNumbers delayed_streams(pipe.numOutputPorts()); - for (size_t i = 0; i < delayed_streams.size(); ++i) - delayed_streams[i] = i; - - auto * collected_processors = pipe.collected_processors; - - Pipes pipes; - pipes.emplace_back(std::move(pipe)); - pipes.emplace_back(QueryPipeline::getPipe(std::move(pipeline))); - pipe = Pipe::unitePipes(std::move(pipes), collected_processors, true); - - auto processor = std::make_shared(getHeader(), pipe.numOutputPorts(), delayed_streams, true); - addTransform(std::move(processor)); -} - -void QueryPipeline::setProgressCallback(const ProgressCallback & callback) -{ - for (auto & processor : pipe.processors) - { - if (auto * source = dynamic_cast(processor.get())) - source->setProgressCallback(callback); + for (const auto & out : processor->getOutputs()) + checkOutput(out, processor); } } -void QueryPipeline::setProcessListElement(QueryStatus * elem) -{ - process_list_element = elem; - - for (auto & processor : pipe.processors) - { - if (auto * source = dynamic_cast(processor.get())) - source->setProcessListElement(elem); - } -} - -void QueryPipeline::initRowsBeforeLimit() +static void initRowsBeforeLimit(IOutputFormat * output_format) { RowsBeforeLimitCounterPtr rows_before_limit_at_least; @@ -509,39 +209,337 @@ void QueryPipeline::initRowsBeforeLimit() output_format->setRowsBeforeLimitCounter(rows_before_limit_at_least); } -PipelineExecutorPtr QueryPipeline::execute() -{ - if (!isCompleted()) - throw Exception("Cannot execute pipeline because it is not completed.", ErrorCodes::LOGICAL_ERROR); - return std::make_shared(pipe.processors, process_list_element); +QueryPipeline::QueryPipeline( + PipelineResourcesHolder resources_, + Processors processors_) + : resources(std::move(resources_)) + , processors(std::move(processors_)) +{ + checkCompleted(processors); } -void QueryPipeline::setCollectedProcessors(Processors * processors) +QueryPipeline::QueryPipeline( + PipelineResourcesHolder resources_, + Processors processors_, + InputPort * input_) + : resources(std::move(resources_)) + , processors(std::move(processors_)) + , input(input_) { - pipe.collected_processors = processors; + if (!input || input->isConnected()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot create pushing QueryPipeline because its input port is connected or null"); + + bool found_input = false; + for (const auto & processor : processors) + { + for (const auto & in : processor->getInputs()) + { + if (&in == input) + found_input = true; + else + checkInput(in, processor); + } + + for (const auto & out : processor->getOutputs()) + checkOutput(out, processor); + } + + if (!found_input) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot create pushing QueryPipeline because its input port does not belong to any processor"); } +QueryPipeline::QueryPipeline(std::shared_ptr source) : QueryPipeline(Pipe(std::move(source))) {} -QueryPipelineProcessorsCollector::QueryPipelineProcessorsCollector(QueryPipeline & pipeline_, IQueryPlanStep * step_) - : pipeline(pipeline_), step(step_) +QueryPipeline::QueryPipeline( + PipelineResourcesHolder resources_, + Processors processors_, + OutputPort * output_, + OutputPort * totals_, + OutputPort * extremes_) + : resources(std::move(resources_)) + , processors(std::move(processors_)) + , output(output_) + , totals(totals_) + , extremes(extremes_) { - pipeline.setCollectedProcessors(&processors); + checkPulling(processors, output, totals, extremes); } -QueryPipelineProcessorsCollector::~QueryPipelineProcessorsCollector() +QueryPipeline::QueryPipeline(Pipe pipe) { - pipeline.setCollectedProcessors(nullptr); + resources = std::move(pipe.holder); + + if (pipe.numOutputPorts() > 0) + { + pipe.resize(1); + output = pipe.getOutputPort(0); + totals = pipe.getTotalsPort(); + extremes = pipe.getExtremesPort(); + + processors = std::move(pipe.processors); + checkPulling(processors, output, totals, extremes); + } + else + { + processors = std::move(pipe.processors); + checkCompleted(processors); + } } -Processors QueryPipelineProcessorsCollector::detachProcessors(size_t group) +QueryPipeline::QueryPipeline(Chain chain) + : resources(chain.detachResources()) + , input(&chain.getInputPort()) + , num_threads(chain.getNumThreads()) +{ + processors.reserve(chain.getProcessors().size() + 1); + for (auto processor : chain.getProcessors()) + processors.emplace_back(std::move(processor)); + + auto sink = std::make_shared(chain.getOutputPort().getHeader()); + connect(chain.getOutputPort(), sink->getPort()); + processors.emplace_back(std::move(sink)); + + input = &chain.getInputPort(); +} + +QueryPipeline::QueryPipeline(std::shared_ptr format) +{ + auto & format_main = format->getPort(IOutputFormat::PortKind::Main); + auto & format_totals = format->getPort(IOutputFormat::PortKind::Totals); + auto & format_extremes = format->getPort(IOutputFormat::PortKind::Extremes); + + if (!totals) + { + auto source = std::make_shared(format_totals.getHeader()); + totals = &source->getPort(); + processors.emplace_back(std::move(source)); + } + + if (!extremes) + { + auto source = std::make_shared(format_extremes.getHeader()); + extremes = &source->getPort(); + processors.emplace_back(std::move(source)); + } + + connect(*totals, format_totals); + connect(*extremes, format_extremes); + + input = &format_main; + totals = nullptr; + extremes = nullptr; + + output_format = format.get(); + + processors.emplace_back(std::move(format)); +} + +static void drop(OutputPort *& port, Processors & processors) +{ + if (!port) + return; + + auto null_sink = std::make_shared(port->getHeader()); + connect(*port, null_sink->getPort()); + + processors.emplace_back(std::move(null_sink)); + port = nullptr; +} + +QueryPipeline::QueryPipeline(std::shared_ptr sink) : QueryPipeline(Chain(std::move(sink))) {} + +void QueryPipeline::complete(std::shared_ptr sink) +{ + if (!pulling()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline must be pulling to be completed with chain"); + + drop(totals, processors); + drop(extremes, processors); + + connect(*output, sink->getPort()); + processors.emplace_back(std::move(sink)); + output = nullptr; +} + +void QueryPipeline::complete(Chain chain) +{ + if (!pulling()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline must be pulling to be completed with chain"); + + resources = chain.detachResources(); + + drop(totals, processors); + drop(extremes, processors); + + processors.reserve(processors.size() + chain.getProcessors().size() + 1); + for (auto processor : chain.getProcessors()) + processors.emplace_back(std::move(processor)); + + auto sink = std::make_shared(chain.getOutputPort().getHeader()); + connect(*output, chain.getInputPort()); + connect(chain.getOutputPort(), sink->getPort()); + processors.emplace_back(std::move(sink)); + output = nullptr; +} + +void QueryPipeline::complete(std::shared_ptr sink) +{ + complete(Chain(std::move(sink))); +} + +void QueryPipeline::complete(Pipe pipe) +{ + if (!pushing()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline must be pushing to be completed with pipe"); + + pipe.resize(1); + resources = pipe.detachResources(); + pipe.dropExtremes(); + pipe.dropTotals(); + connect(*pipe.getOutputPort(0), *input); + input = nullptr; + + auto pipe_processors = Pipe::detachProcessors(std::move(pipe)); + processors.insert(processors.end(), pipe_processors.begin(), pipe_processors.end()); +} + +static void addMaterializing(OutputPort *& output, Processors & processors) +{ + if (!output) + return; + + auto materializing = std::make_shared(output->getHeader()); + connect(*output, materializing->getInputPort()); + output = &materializing->getOutputPort(); + processors.emplace_back(std::move(materializing)); +} + +void QueryPipeline::complete(std::shared_ptr format) +{ + if (!pulling()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline must be pulling to be completed with output format"); + + if (format->expectMaterializedColumns()) + { + addMaterializing(output, processors); + addMaterializing(totals, processors); + addMaterializing(extremes, processors); + } + + auto & format_main = format->getPort(IOutputFormat::PortKind::Main); + auto & format_totals = format->getPort(IOutputFormat::PortKind::Totals); + auto & format_extremes = format->getPort(IOutputFormat::PortKind::Extremes); + + if (!totals) + { + auto source = std::make_shared(format_totals.getHeader()); + totals = &source->getPort(); + processors.emplace_back(std::move(source)); + } + + if (!extremes) + { + auto source = std::make_shared(format_extremes.getHeader()); + extremes = &source->getPort(); + processors.emplace_back(std::move(source)); + } + + connect(*output, format_main); + connect(*totals, format_totals); + connect(*extremes, format_extremes); + + output = nullptr; + totals = nullptr; + extremes = nullptr; + + initRowsBeforeLimit(format.get()); + output_format = format.get(); + + processors.emplace_back(std::move(format)); +} + +Block QueryPipeline::getHeader() const +{ + if (input) + return input->getHeader(); + else if (output) + return output->getHeader(); + else + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Header is available only for pushing or pulling QueryPipeline"); +} + +void QueryPipeline::setProgressCallback(const ProgressCallback & callback) { for (auto & processor : processors) - processor->setQueryPlanStep(step, group); + { + if (auto * source = dynamic_cast(processor.get())) + source->setProgressCallback(callback); + } +} - Processors res; - res.swap(processors); - return res; +void QueryPipeline::setProcessListElement(QueryStatus * elem) +{ + process_list_element = elem; + + if (pulling() || completed()) + { + for (auto & processor : processors) + { + if (auto * source = dynamic_cast(processor.get())) + source->setProcessListElement(elem); + } + } + else if (pushing()) + { + if (auto * counting = dynamic_cast(&input->getProcessor())) + { + counting->setProcessListElement(elem); + } + } +} + + +void QueryPipeline::setLimitsAndQuota(const StreamLocalLimits & limits, std::shared_ptr quota) +{ + if (!pulling()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "It is possible to set limits and quota only to pulling QueryPipeline"); + + auto transform = std::make_shared(output->getHeader(), limits); + transform->setQuota(quota); + connect(*output, transform->getInputPort()); + output = &transform->getOutputPort(); + processors.emplace_back(std::move(transform)); +} + + +bool QueryPipeline::tryGetResultRowsAndBytes(UInt64 & result_rows, UInt64 & result_bytes) const +{ + if (!output_format) + return false; + + result_rows = output_format->getResultRows(); + result_bytes = output_format->getResultBytes(); + return true; +} + +void QueryPipeline::addStorageHolder(StoragePtr storage) +{ + resources.storage_holders.emplace_back(std::move(storage)); +} + +void QueryPipeline::reset() +{ + QueryPipeline to_remove = std::move(*this); + *this = QueryPipeline(); } } diff --git a/src/Processors/QueryPipeline.h b/src/Processors/QueryPipeline.h index b75f6d99124..42fdb429a14 100644 --- a/src/Processors/QueryPipeline.h +++ b/src/Processors/QueryPipeline.h @@ -1,193 +1,128 @@ #pragma once - -#include -#include -#include -#include -#include -#include +#include +#include namespace DB { +class InputPort; +class OutputPort; + +class IProcessor; +using ProcessorPtr = std::shared_ptr; +using Processors = std::vector; + +class QueryStatus; + +struct Progress; +using ProgressCallback = std::function; + +struct StreamLocalLimits; +class EnabledQuota; + +class Block; +class Pipe; +class Chain; class IOutputFormat; - -class QueryPipelineProcessorsCollector; - -struct AggregatingTransformParams; -using AggregatingTransformParamsPtr = std::shared_ptr; - -class QueryPlan; - -struct SubqueryForSet; -using SubqueriesForSets = std::unordered_map; - -struct SizeLimits; - -struct ExpressionActionsSettings; - -class IJoin; -using JoinPtr = std::shared_ptr; +class SinkToStorage; +class ISource; +class ISink; class QueryPipeline { public: - QueryPipeline() = default; - ~QueryPipeline() = default; - QueryPipeline(QueryPipeline &&) = default; + QueryPipeline(); + QueryPipeline(QueryPipeline &&); QueryPipeline(const QueryPipeline &) = delete; - QueryPipeline & operator= (QueryPipeline && rhs) = default; - QueryPipeline & operator= (const QueryPipeline & rhs) = delete; - /// All pipes must have same header. - void init(Pipe pipe); - /// Clear and release all resources. + QueryPipeline & operator=(QueryPipeline &&); + QueryPipeline & operator=(const QueryPipeline &) = delete; + + ~QueryPipeline(); + + /// pulling + explicit QueryPipeline(Pipe pipe); + explicit QueryPipeline(std::shared_ptr source); + /// pushing + explicit QueryPipeline(Chain chain); + explicit QueryPipeline(std::shared_ptr sink); + explicit QueryPipeline(std::shared_ptr format); + + /// completed + QueryPipeline( + PipelineResourcesHolder resources_, + Processors processors_); + + /// pushing + QueryPipeline( + PipelineResourcesHolder resources_, + Processors processors_, + InputPort * input_); + + /// pulling + QueryPipeline( + PipelineResourcesHolder resources_, + Processors processors_, + OutputPort * output_, + OutputPort * totals_ = nullptr, + OutputPort * extremes_ = nullptr); + + bool initialized() const { return !processors.empty(); } + /// When initialized, exactly one of the following is true. + /// Use PullingPipelineExecutor or PullingAsyncPipelineExecutor. + bool pulling() const { return output != nullptr; } + /// Use PushingPipelineExecutor or PushingAsyncPipelineExecutor. + bool pushing() const { return input != nullptr; } + /// Use PipelineExecutor. Call execute() to build one. + bool completed() const { return initialized() && !pulling() && !pushing(); } + + /// Only for pushing. + void complete(Pipe pipe); + /// Only for pulling. + void complete(std::shared_ptr format); + void complete(Chain chain); + void complete(std::shared_ptr sink); + void complete(std::shared_ptr sink); + + /// Only for pushing and pulling. + Block getHeader() const; + + size_t getNumThreads() const { return num_threads; } + void setNumThreads(size_t num_threads_) { num_threads = num_threads_; } + + void setProcessListElement(QueryStatus * elem); + void setProgressCallback(const ProgressCallback & callback); + void setLimitsAndQuota(const StreamLocalLimits & limits, std::shared_ptr quota); + bool tryGetResultRowsAndBytes(UInt64 & result_rows, UInt64 & result_bytes) const; + + void addStorageHolder(StoragePtr storage); + + const Processors & getProcessors() const { return processors; } + void reset(); - bool initialized() { return !pipe.empty(); } - bool isCompleted() { return pipe.isCompleted(); } - - using StreamType = Pipe::StreamType; - - /// Add transform with simple input and simple output for each port. - void addSimpleTransform(const Pipe::ProcessorGetter & getter); - void addSimpleTransform(const Pipe::ProcessorGetterWithStreamKind & getter); - /// Add transform with getNumStreams() input ports. - void addTransform(ProcessorPtr transform); - void addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes); - - using Transformer = std::function; - /// Transform pipeline in general way. - void transform(const Transformer & transformer); - - /// Add TotalsHavingTransform. Resize pipeline to single input. Adds totals port. - void addTotalsHavingTransform(ProcessorPtr transform); - /// Add transform which calculates extremes. This transform adds extremes port and doesn't change inputs number. - void addExtremesTransform(); - /// Resize pipeline to single output and add IOutputFormat. Pipeline will be completed after this transformation. - void setOutputFormat(ProcessorPtr output); - /// Get current OutputFormat. - IOutputFormat * getOutputFormat() const { return output_format; } - /// Sink is a processor with single input port and no output ports. Creates sink for each output port. - /// Pipeline will be completed after this transformation. - void setSinks(const Pipe::ProcessorGetterWithStreamKind & getter); - - /// Add totals which returns one chunk with single row with defaults. - void addDefaultTotals(); - - /// Forget about current totals and extremes. It is needed before aggregation, cause they will be calculated again. - void dropTotalsAndExtremes(); - - /// Will read from this stream after all data was read from other streams. - void addDelayedStream(ProcessorPtr source); - - void addMergingAggregatedMemoryEfficientTransform(AggregatingTransformParamsPtr params, size_t num_merging_processors); - - /// Changes the number of output ports if needed. Adds ResizeTransform. - void resize(size_t num_streams, bool force = false, bool strict = false); - - /// Unite several pipelines together. Result pipeline would have common_header structure. - /// If collector is used, it will collect only newly-added processors, but not processors from pipelines. - static QueryPipeline unitePipelines( - std::vector> pipelines, - size_t max_threads_limit = 0, - Processors * collected_processors = nullptr); - - /// Join two pipelines together using JoinPtr. - /// If collector is used, it will collect only newly-added processors, but not processors from pipelines. - static std::unique_ptr joinPipelines( - std::unique_ptr left, - std::unique_ptr right, - JoinPtr join, - size_t max_block_size, - Processors * collected_processors = nullptr); - - /// Add other pipeline and execute it before current one. - /// Pipeline must have empty header, it should not generate any chunk. - /// This is used for CreatingSets. - void addPipelineBefore(QueryPipeline pipeline); - - void addCreatingSetsTransform(const Block & res_header, SubqueryForSet subquery_for_set, const SizeLimits & limits, ContextPtr context); - - PipelineExecutorPtr execute(); - - size_t getNumStreams() const { return pipe.numOutputPorts(); } - - bool hasTotals() const { return pipe.getTotalsPort() != nullptr; } - - const Block & getHeader() const { return pipe.getHeader(); } - - void addTableLock(TableLockHolder lock) { pipe.addTableLock(std::move(lock)); } - void addInterpreterContext(ContextPtr context) { pipe.addInterpreterContext(std::move(context)); } - void addStorageHolder(StoragePtr storage) { pipe.addStorageHolder(std::move(storage)); } - void addQueryPlan(std::unique_ptr plan) { pipe.addQueryPlan(std::move(plan)); } - void setLimits(const StreamLocalLimits & limits) { pipe.setLimits(limits); } - void setLeafLimits(const SizeLimits & limits) { pipe.setLeafLimits(limits); } - void setQuota(const std::shared_ptr & quota) { pipe.setQuota(quota); } - - /// For compatibility with IBlockInputStream. - void setProgressCallback(const ProgressCallback & callback); - void setProcessListElement(QueryStatus * elem); - - /// Recommend number of threads for pipeline execution. - size_t getNumThreads() const - { - auto num_threads = pipe.maxParallelStreams(); - - if (max_threads) //-V1051 - num_threads = std::min(num_threads, max_threads); - - return std::max(1, num_threads); - } - - /// Set upper limit for the recommend number of threads - void setMaxThreads(size_t max_threads_) { max_threads = max_threads_; } - - /// Update upper limit for the recommend number of threads - void limitMaxThreads(size_t max_threads_) - { - if (max_threads == 0 || max_threads_ < max_threads) - max_threads = max_threads_; - } - - /// Convert query pipeline to pipe. - static Pipe getPipe(QueryPipeline pipeline) { return std::move(pipeline.pipe); } - private: + PipelineResourcesHolder resources; + Processors processors; - Pipe pipe; - IOutputFormat * output_format = nullptr; + InputPort * input = nullptr; - /// Limit on the number of threads. Zero means no limit. - /// Sometimes, more streams are created then the number of threads for more optimal execution. - size_t max_threads = 0; + OutputPort * output = nullptr; + OutputPort * totals = nullptr; + OutputPort * extremes = nullptr; QueryStatus * process_list_element = nullptr; - void checkInitialized(); - void checkInitializedAndNotCompleted(); + IOutputFormat * output_format = nullptr; - void initRowsBeforeLimit(); + size_t num_threads = 0; - void setCollectedProcessors(Processors * processors); - - friend class QueryPipelineProcessorsCollector; -}; - -/// This is a small class which collects newly added processors to QueryPipeline. -/// Pipeline must live longer than this class. -class QueryPipelineProcessorsCollector -{ -public: - explicit QueryPipelineProcessorsCollector(QueryPipeline & pipeline_, IQueryPlanStep * step_ = nullptr); - ~QueryPipelineProcessorsCollector(); - - Processors detachProcessors(size_t group = 0); - -private: - QueryPipeline & pipeline; - IQueryPlanStep * step; - Processors processors; + friend class PushingPipelineExecutor; + friend class PullingPipelineExecutor; + friend class PushingAsyncPipelineExecutor; + friend class PullingAsyncPipelineExecutor; + friend class CompletedPipelineExecutor; + friend class QueryPipelineBuilder; }; } diff --git a/src/Processors/QueryPipelineBuilder.cpp b/src/Processors/QueryPipelineBuilder.cpp new file mode 100644 index 00000000000..8ed413166da --- /dev/null +++ b/src/Processors/QueryPipelineBuilder.cpp @@ -0,0 +1,601 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +void QueryPipelineBuilder::addQueryPlan(std::unique_ptr plan) +{ + pipe.addQueryPlan(std::move(plan)); +} + +void QueryPipelineBuilder::checkInitialized() +{ + if (!initialized()) + throw Exception("QueryPipeline wasn't initialized.", ErrorCodes::LOGICAL_ERROR); +} + +void QueryPipelineBuilder::checkInitializedAndNotCompleted() +{ + checkInitialized(); + + if (pipe.isCompleted()) + throw Exception("QueryPipeline was already completed.", ErrorCodes::LOGICAL_ERROR); +} + +static void checkSource(const ProcessorPtr & source, bool can_have_totals) +{ + if (!source->getInputs().empty()) + throw Exception("Source for query pipeline shouldn't have any input, but " + source->getName() + " has " + + toString(source->getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR); + + if (source->getOutputs().empty()) + throw Exception("Source for query pipeline should have single output, but it doesn't have any", + ErrorCodes::LOGICAL_ERROR); + + if (!can_have_totals && source->getOutputs().size() != 1) + throw Exception("Source for query pipeline should have single output, but " + source->getName() + " has " + + toString(source->getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR); + + if (source->getOutputs().size() > 2) + throw Exception("Source for query pipeline should have 1 or 2 outputs, but " + source->getName() + " has " + + toString(source->getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR); +} + +void QueryPipelineBuilder::init(Pipe pipe_) +{ + if (initialized()) + throw Exception("Pipeline has already been initialized.", ErrorCodes::LOGICAL_ERROR); + + if (pipe_.empty()) + throw Exception("Can't initialize pipeline with empty pipe.", ErrorCodes::LOGICAL_ERROR); + + pipe = std::move(pipe_); +} + +void QueryPipelineBuilder::init(QueryPipeline pipeline) +{ + if (initialized()) + throw Exception("Pipeline has already been initialized.", ErrorCodes::LOGICAL_ERROR); + + if (pipeline.pushing()) + throw Exception("Can't initialize pushing pipeline.", ErrorCodes::LOGICAL_ERROR); + + pipe.holder = std::move(pipeline.resources); + pipe.processors = std::move(pipeline.processors); + if (pipeline.output) + { + pipe.output_ports = {pipeline.output}; + pipe.header = pipeline.output->getHeader(); + } + else + { + pipe.output_ports.clear(); + pipe.header = {}; + } + + pipe.totals_port = pipeline.totals; + pipe.extremes_port = pipeline.extremes; + pipe.max_parallel_streams = pipeline.num_threads; +} + +void QueryPipelineBuilder::reset() +{ + Pipe pipe_to_destroy(std::move(pipe)); + *this = QueryPipelineBuilder(); +} + +void QueryPipelineBuilder::addSimpleTransform(const Pipe::ProcessorGetter & getter) +{ + checkInitializedAndNotCompleted(); + pipe.addSimpleTransform(getter); +} + +void QueryPipelineBuilder::addSimpleTransform(const Pipe::ProcessorGetterWithStreamKind & getter) +{ + checkInitializedAndNotCompleted(); + pipe.addSimpleTransform(getter); +} + +void QueryPipelineBuilder::addTransform(ProcessorPtr transform) +{ + checkInitializedAndNotCompleted(); + pipe.addTransform(std::move(transform)); +} + +void QueryPipelineBuilder::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes) +{ + checkInitializedAndNotCompleted(); + pipe.addTransform(std::move(transform), totals, extremes); +} + +void QueryPipelineBuilder::addChains(std::vector chains) +{ + checkInitializedAndNotCompleted(); + pipe.addChains(std::move(chains)); +} + +void QueryPipelineBuilder::addChain(Chain chain) +{ + checkInitializedAndNotCompleted(); + std::vector chains; + chains.emplace_back(std::move(chain)); + pipe.resize(1); + pipe.addChains(std::move(chains)); +} + +void QueryPipelineBuilder::transform(const Transformer & transformer) +{ + checkInitializedAndNotCompleted(); + pipe.transform(transformer); +} + +void QueryPipelineBuilder::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter) +{ + checkInitializedAndNotCompleted(); + pipe.setSinks(getter); +} + +void QueryPipelineBuilder::addDelayedStream(ProcessorPtr source) +{ + checkInitializedAndNotCompleted(); + + checkSource(source, false); + assertBlocksHaveEqualStructure(getHeader(), source->getOutputs().front().getHeader(), "QueryPipeline"); + + IProcessor::PortNumbers delayed_streams = { pipe.numOutputPorts() }; + pipe.addSource(std::move(source)); + + auto processor = std::make_shared(getHeader(), pipe.numOutputPorts(), delayed_streams); + addTransform(std::move(processor)); +} + +void QueryPipelineBuilder::addMergingAggregatedMemoryEfficientTransform(AggregatingTransformParamsPtr params, size_t num_merging_processors) +{ + DB::addMergingAggregatedMemoryEfficientTransform(pipe, std::move(params), num_merging_processors); +} + +void QueryPipelineBuilder::resize(size_t num_streams, bool force, bool strict) +{ + checkInitializedAndNotCompleted(); + pipe.resize(num_streams, force, strict); +} + +void QueryPipelineBuilder::addTotalsHavingTransform(ProcessorPtr transform) +{ + checkInitializedAndNotCompleted(); + + if (!typeid_cast(transform.get())) + throw Exception("TotalsHavingTransform expected for QueryPipeline::addTotalsHavingTransform.", + ErrorCodes::LOGICAL_ERROR); + + if (pipe.getTotalsPort()) + throw Exception("Totals having transform was already added to pipeline.", ErrorCodes::LOGICAL_ERROR); + + resize(1); + + auto * totals_port = &transform->getOutputs().back(); + pipe.addTransform(std::move(transform), totals_port, nullptr); +} + +void QueryPipelineBuilder::addDefaultTotals() +{ + checkInitializedAndNotCompleted(); + + if (pipe.getTotalsPort()) + throw Exception("Totals having transform was already added to pipeline.", ErrorCodes::LOGICAL_ERROR); + + const auto & current_header = getHeader(); + Columns columns; + columns.reserve(current_header.columns()); + + for (size_t i = 0; i < current_header.columns(); ++i) + { + auto column = current_header.getByPosition(i).type->createColumn(); + column->insertDefault(); + columns.emplace_back(std::move(column)); + } + + auto source = std::make_shared(current_header, Chunk(std::move(columns), 1)); + pipe.addTotalsSource(std::move(source)); +} + +void QueryPipelineBuilder::dropTotalsAndExtremes() +{ + pipe.dropTotals(); + pipe.dropExtremes(); +} + +void QueryPipelineBuilder::addExtremesTransform() +{ + checkInitializedAndNotCompleted(); + + /// It is possible that pipeline already have extremes. + /// For example, it may be added from VIEW subquery. + /// In this case, recalculate extremes again - they should be calculated for different rows. + if (pipe.getExtremesPort()) + pipe.dropExtremes(); + + resize(1); + auto transform = std::make_shared(getHeader()); + auto * port = &transform->getExtremesPort(); + pipe.addTransform(std::move(transform), nullptr, port); +} + +void QueryPipelineBuilder::setOutputFormat(ProcessorPtr output) +{ + checkInitializedAndNotCompleted(); + + if (output_format) + throw Exception("QueryPipeline already has output.", ErrorCodes::LOGICAL_ERROR); + + resize(1); + + output_format = dynamic_cast(output.get()); + pipe.setOutputFormat(std::move(output)); + + initRowsBeforeLimit(); +} + +QueryPipelineBuilder QueryPipelineBuilder::unitePipelines( + std::vector> pipelines, + size_t max_threads_limit, + Processors * collected_processors) +{ + if (pipelines.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot unite an empty set of pipelines"); + + Block common_header = pipelines.front()->getHeader(); + + /// Should we limit the number of threads for united pipeline. True if all pipelines have max_threads != 0. + /// If true, result max_threads will be sum(max_threads). + /// Note: it may be > than settings.max_threads, so we should apply this limit again. + bool will_limit_max_threads = true; + size_t max_threads = 0; + Pipes pipes; + + for (auto & pipeline_ptr : pipelines) + { + auto & pipeline = *pipeline_ptr; + pipeline.checkInitialized(); + pipeline.pipe.collected_processors = collected_processors; + + pipes.emplace_back(std::move(pipeline.pipe)); + + max_threads += pipeline.max_threads; + will_limit_max_threads = will_limit_max_threads && pipeline.max_threads != 0; + + /// If one of pipelines uses more threads then current limit, will keep it. + /// It may happen if max_distributed_connections > max_threads + if (pipeline.max_threads > max_threads_limit) + max_threads_limit = pipeline.max_threads; + } + + QueryPipelineBuilder pipeline; + pipeline.init(Pipe::unitePipes(std::move(pipes), collected_processors, false)); + + if (will_limit_max_threads) + { + pipeline.setMaxThreads(max_threads); + pipeline.limitMaxThreads(max_threads_limit); + } + + return pipeline; +} + +std::unique_ptr QueryPipelineBuilder::joinPipelines( + std::unique_ptr left, + std::unique_ptr right, + JoinPtr join, + size_t max_block_size, + Processors * collected_processors) +{ + left->checkInitializedAndNotCompleted(); + right->checkInitializedAndNotCompleted(); + + /// Extremes before join are useless. They will be calculated after if needed. + left->pipe.dropExtremes(); + right->pipe.dropExtremes(); + + left->pipe.collected_processors = collected_processors; + right->pipe.collected_processors = collected_processors; + + /// In case joined subquery has totals, and we don't, add default chunk to totals. + bool default_totals = false; + if (!left->hasTotals() && right->hasTotals()) + { + left->addDefaultTotals(); + default_totals = true; + } + + /// (left) ──────┐ + /// ╞> Joining ─> (joined) + /// (left) ─┐┌───┘ + /// └┼───┐ + /// (right) ┐ (totals) ──┼─┐ ╞> Joining ─> (joined) + /// ╞> Resize ┐ ╓─┘┌┼─┘ + /// (right) ┘ │ ╟──┘└─┐ + /// ╞> FillingJoin ─> Resize ╣ ╞> Joining ─> (totals) + /// (totals) ─────────┘ ╙─────┘ + + size_t num_streams = left->getNumStreams(); + right->resize(1); + + auto adding_joined = std::make_shared(right->getHeader(), join); + InputPort * totals_port = nullptr; + if (right->hasTotals()) + totals_port = adding_joined->addTotalsPort(); + + right->addTransform(std::move(adding_joined), totals_port, nullptr); + + size_t num_streams_including_totals = num_streams + (left->hasTotals() ? 1 : 0); + right->resize(num_streams_including_totals); + + /// This counter is needed for every Joining except totals, to decide which Joining will generate non joined rows. + auto finish_counter = std::make_shared(num_streams); + + auto lit = left->pipe.output_ports.begin(); + auto rit = right->pipe.output_ports.begin(); + + for (size_t i = 0; i < num_streams; ++i) + { + auto joining = std::make_shared(left->getHeader(), join, max_block_size, false, default_totals, finish_counter); + connect(**lit, joining->getInputs().front()); + connect(**rit, joining->getInputs().back()); + *lit = &joining->getOutputs().front(); + + ++lit; + ++rit; + + if (collected_processors) + collected_processors->emplace_back(joining); + + left->pipe.processors.emplace_back(std::move(joining)); + } + + if (left->hasTotals()) + { + auto joining = std::make_shared(left->getHeader(), join, max_block_size, true, default_totals); + connect(*left->pipe.totals_port, joining->getInputs().front()); + connect(**rit, joining->getInputs().back()); + left->pipe.totals_port = &joining->getOutputs().front(); + + ++rit; + + if (collected_processors) + collected_processors->emplace_back(joining); + + left->pipe.processors.emplace_back(std::move(joining)); + } + + left->pipe.processors.insert(left->pipe.processors.end(), right->pipe.processors.begin(), right->pipe.processors.end()); + left->pipe.holder = std::move(right->pipe.holder); + left->pipe.header = left->pipe.output_ports.front()->getHeader(); + left->pipe.max_parallel_streams = std::max(left->pipe.max_parallel_streams, right->pipe.max_parallel_streams); + return left; +} + +void QueryPipelineBuilder::addCreatingSetsTransform(const Block & res_header, SubqueryForSet subquery_for_set, const SizeLimits & limits, ContextPtr context) +{ + resize(1); + + auto transform = std::make_shared( + getHeader(), + res_header, + std::move(subquery_for_set), + limits, + context); + + InputPort * totals_port = nullptr; + + if (pipe.getTotalsPort()) + totals_port = transform->addTotalsPort(); + + pipe.addTransform(std::move(transform), totals_port, nullptr); +} + +void QueryPipelineBuilder::addPipelineBefore(QueryPipelineBuilder pipeline) +{ + checkInitializedAndNotCompleted(); + if (pipeline.getHeader()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline for CreatingSets should have empty header. Got: {}", + pipeline.getHeader().dumpStructure()); + + IProcessor::PortNumbers delayed_streams(pipe.numOutputPorts()); + for (size_t i = 0; i < delayed_streams.size(); ++i) + delayed_streams[i] = i; + + auto * collected_processors = pipe.collected_processors; + + Pipes pipes; + pipes.emplace_back(std::move(pipe)); + pipes.emplace_back(QueryPipelineBuilder::getPipe(std::move(pipeline))); + pipe = Pipe::unitePipes(std::move(pipes), collected_processors, true); + + auto processor = std::make_shared(getHeader(), pipe.numOutputPorts(), delayed_streams, true); + addTransform(std::move(processor)); +} + +void QueryPipelineBuilder::setProgressCallback(const ProgressCallback & callback) +{ + for (auto & processor : pipe.processors) + { + if (auto * source = dynamic_cast(processor.get())) + source->setProgressCallback(callback); + } +} + +void QueryPipelineBuilder::setProcessListElement(QueryStatus * elem) +{ + process_list_element = elem; + + for (auto & processor : pipe.processors) + { + if (auto * source = dynamic_cast(processor.get())) + source->setProcessListElement(elem); + } +} + +void QueryPipelineBuilder::initRowsBeforeLimit() +{ + RowsBeforeLimitCounterPtr rows_before_limit_at_least; + + /// TODO: add setRowsBeforeLimitCounter as virtual method to IProcessor. + std::vector limits; + std::vector sources; + std::vector remote_sources; + + std::unordered_set visited; + + struct QueuedEntry + { + IProcessor * processor; + bool visited_limit; + }; + + std::queue queue; + + queue.push({ output_format, false }); + visited.emplace(output_format); + + while (!queue.empty()) + { + auto * processor = queue.front().processor; + auto visited_limit = queue.front().visited_limit; + queue.pop(); + + if (!visited_limit) + { + if (auto * limit = typeid_cast(processor)) + { + visited_limit = true; + limits.emplace_back(limit); + } + + if (auto * source = typeid_cast(processor)) + sources.emplace_back(source); + + if (auto * source = typeid_cast(processor)) + remote_sources.emplace_back(source); + } + else if (auto * sorting = typeid_cast(processor)) + { + if (!rows_before_limit_at_least) + rows_before_limit_at_least = std::make_shared(); + + sorting->setRowsBeforeLimitCounter(rows_before_limit_at_least); + + /// Don't go to children. Take rows_before_limit from last PartialSortingTransform. + continue; + } + + /// Skip totals and extremes port for output format. + if (auto * format = dynamic_cast(processor)) + { + auto * child_processor = &format->getPort(IOutputFormat::PortKind::Main).getOutputPort().getProcessor(); + if (visited.emplace(child_processor).second) + queue.push({ child_processor, visited_limit }); + + continue; + } + + for (auto & child_port : processor->getInputs()) + { + auto * child_processor = &child_port.getOutputPort().getProcessor(); + if (visited.emplace(child_processor).second) + queue.push({ child_processor, visited_limit }); + } + } + + if (!rows_before_limit_at_least && (!limits.empty() || !sources.empty() || !remote_sources.empty())) + { + rows_before_limit_at_least = std::make_shared(); + + for (auto & limit : limits) + limit->setRowsBeforeLimitCounter(rows_before_limit_at_least); + + for (auto & source : sources) + source->setRowsBeforeLimitCounter(rows_before_limit_at_least); + + for (auto & source : remote_sources) + source->setRowsBeforeLimitCounter(rows_before_limit_at_least); + } + + /// If there is a limit, then enable rows_before_limit_at_least + /// It is needed when zero rows is read, but we still want rows_before_limit_at_least in result. + if (!limits.empty()) + rows_before_limit_at_least->add(0); + + if (rows_before_limit_at_least) + output_format->setRowsBeforeLimitCounter(rows_before_limit_at_least); +} + +PipelineExecutorPtr QueryPipelineBuilder::execute() +{ + if (!isCompleted()) + throw Exception("Cannot execute pipeline because it is not completed.", ErrorCodes::LOGICAL_ERROR); + + return std::make_shared(pipe.processors, process_list_element); +} + +QueryPipeline QueryPipelineBuilder::getPipeline(QueryPipelineBuilder builder) +{ + QueryPipeline res(std::move(builder.pipe)); + res.setNumThreads(builder.getNumThreads()); + return res; +} + +void QueryPipelineBuilder::setCollectedProcessors(Processors * processors) +{ + pipe.collected_processors = processors; +} + + +QueryPipelineProcessorsCollector::QueryPipelineProcessorsCollector(QueryPipelineBuilder & pipeline_, IQueryPlanStep * step_) + : pipeline(pipeline_), step(step_) +{ + pipeline.setCollectedProcessors(&processors); +} + +QueryPipelineProcessorsCollector::~QueryPipelineProcessorsCollector() +{ + pipeline.setCollectedProcessors(nullptr); +} + +Processors QueryPipelineProcessorsCollector::detachProcessors(size_t group) +{ + for (auto & processor : processors) + processor->setQueryPlanStep(step, group); + + Processors res; + res.swap(processors); + return res; +} + +} diff --git a/src/Processors/QueryPipelineBuilder.h b/src/Processors/QueryPipelineBuilder.h new file mode 100644 index 00000000000..78ae5dd41be --- /dev/null +++ b/src/Processors/QueryPipelineBuilder.h @@ -0,0 +1,199 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class IOutputFormat; + +class QueryPipelineProcessorsCollector; + +struct AggregatingTransformParams; +using AggregatingTransformParamsPtr = std::shared_ptr; + +class QueryPlan; + +struct SubqueryForSet; +using SubqueriesForSets = std::unordered_map; + +struct SizeLimits; + +struct ExpressionActionsSettings; + +class IJoin; +using JoinPtr = std::shared_ptr; + +class QueryPipelineBuilder +{ +public: + QueryPipelineBuilder() = default; + ~QueryPipelineBuilder() = default; + QueryPipelineBuilder(QueryPipelineBuilder &&) = default; + QueryPipelineBuilder(const QueryPipelineBuilder &) = delete; + QueryPipelineBuilder & operator= (QueryPipelineBuilder && rhs) = default; + QueryPipelineBuilder & operator= (const QueryPipelineBuilder & rhs) = delete; + + /// All pipes must have same header. + void init(Pipe pipe); + void init(QueryPipeline pipeline); + /// Clear and release all resources. + void reset(); + + bool initialized() { return !pipe.empty(); } + bool isCompleted() { return pipe.isCompleted(); } + + using StreamType = Pipe::StreamType; + + /// Add transform with simple input and simple output for each port. + void addSimpleTransform(const Pipe::ProcessorGetter & getter); + void addSimpleTransform(const Pipe::ProcessorGetterWithStreamKind & getter); + /// Add transform with getNumStreams() input ports. + void addTransform(ProcessorPtr transform); + void addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes); + + void addChains(std::vector chains); + void addChain(Chain chain); + + using Transformer = std::function; + /// Transform pipeline in general way. + void transform(const Transformer & transformer); + + /// Add TotalsHavingTransform. Resize pipeline to single input. Adds totals port. + void addTotalsHavingTransform(ProcessorPtr transform); + /// Add transform which calculates extremes. This transform adds extremes port and doesn't change inputs number. + void addExtremesTransform(); + /// Resize pipeline to single output and add IOutputFormat. Pipeline will be completed after this transformation. + void setOutputFormat(ProcessorPtr output); + /// Get current OutputFormat. + IOutputFormat * getOutputFormat() const { return output_format; } + /// Sink is a processor with single input port and no output ports. Creates sink for each output port. + /// Pipeline will be completed after this transformation. + void setSinks(const Pipe::ProcessorGetterWithStreamKind & getter); + + /// Add totals which returns one chunk with single row with defaults. + void addDefaultTotals(); + + /// Forget about current totals and extremes. It is needed before aggregation, cause they will be calculated again. + void dropTotalsAndExtremes(); + + /// Will read from this stream after all data was read from other streams. + void addDelayedStream(ProcessorPtr source); + + void addMergingAggregatedMemoryEfficientTransform(AggregatingTransformParamsPtr params, size_t num_merging_processors); + + /// Changes the number of output ports if needed. Adds ResizeTransform. + void resize(size_t num_streams, bool force = false, bool strict = false); + + /// Unite several pipelines together. Result pipeline would have common_header structure. + /// If collector is used, it will collect only newly-added processors, but not processors from pipelines. + static QueryPipelineBuilder unitePipelines( + std::vector> pipelines, + size_t max_threads_limit = 0, + Processors * collected_processors = nullptr); + + /// Join two pipelines together using JoinPtr. + /// If collector is used, it will collect only newly-added processors, but not processors from pipelines. + static std::unique_ptr joinPipelines( + std::unique_ptr left, + std::unique_ptr right, + JoinPtr join, + size_t max_block_size, + Processors * collected_processors = nullptr); + + /// Add other pipeline and execute it before current one. + /// Pipeline must have empty header, it should not generate any chunk. + /// This is used for CreatingSets. + void addPipelineBefore(QueryPipelineBuilder pipeline); + + void addCreatingSetsTransform(const Block & res_header, SubqueryForSet subquery_for_set, const SizeLimits & limits, ContextPtr context); + + PipelineExecutorPtr execute(); + + size_t getNumStreams() const { return pipe.numOutputPorts(); } + + bool hasTotals() const { return pipe.getTotalsPort() != nullptr; } + + const Block & getHeader() const { return pipe.getHeader(); } + + void addTableLock(TableLockHolder lock) { pipe.addTableLock(std::move(lock)); } + void addInterpreterContext(std::shared_ptr context) { pipe.addInterpreterContext(std::move(context)); } + void addStorageHolder(StoragePtr storage) { pipe.addStorageHolder(std::move(storage)); } + void addQueryPlan(std::unique_ptr plan); + void setLimits(const StreamLocalLimits & limits) { pipe.setLimits(limits); } + void setLeafLimits(const SizeLimits & limits) { pipe.setLeafLimits(limits); } + void setQuota(const std::shared_ptr & quota) { pipe.setQuota(quota); } + + /// For compatibility with IBlockInputStream. + void setProgressCallback(const ProgressCallback & callback); + void setProcessListElement(QueryStatus * elem); + + /// Recommend number of threads for pipeline execution. + size_t getNumThreads() const + { + auto num_threads = pipe.maxParallelStreams(); + + if (max_threads) //-V1051 + num_threads = std::min(num_threads, max_threads); + + return std::max(1, num_threads); + } + + /// Set upper limit for the recommend number of threads + void setMaxThreads(size_t max_threads_) { max_threads = max_threads_; } + + /// Update upper limit for the recommend number of threads + void limitMaxThreads(size_t max_threads_) + { + if (max_threads == 0 || max_threads_ < max_threads) + max_threads = max_threads_; + } + + /// Convert query pipeline to pipe. + static Pipe getPipe(QueryPipelineBuilder pipeline) { return std::move(pipeline.pipe); } + static QueryPipeline getPipeline(QueryPipelineBuilder builder); + +private: + + Pipe pipe; + IOutputFormat * output_format = nullptr; + + /// Limit on the number of threads. Zero means no limit. + /// Sometimes, more streams are created then the number of threads for more optimal execution. + size_t max_threads = 0; + + QueryStatus * process_list_element = nullptr; + + void checkInitialized(); + void checkInitializedAndNotCompleted(); + + void initRowsBeforeLimit(); + + void setCollectedProcessors(Processors * processors); + + friend class QueryPipelineProcessorsCollector; +}; + +/// This is a small class which collects newly added processors to QueryPipeline. +/// Pipeline must live longer than this class. +class QueryPipelineProcessorsCollector +{ +public: + explicit QueryPipelineProcessorsCollector(QueryPipelineBuilder & pipeline_, IQueryPlanStep * step_ = nullptr); + ~QueryPipelineProcessorsCollector(); + + Processors detachProcessors(size_t group = 0); + +private: + QueryPipelineBuilder & pipeline; + IQueryPlanStep * step; + Processors processors; +}; + +} diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 772390acb32..023f9016cc4 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -46,7 +46,7 @@ AggregatingStep::AggregatingStep( { } -void AggregatingStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { QueryPipelineProcessorsCollector collector(pipeline, this); diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h index 696aabd4de7..8583e5be485 100644 --- a/src/Processors/QueryPlan/AggregatingStep.h +++ b/src/Processors/QueryPlan/AggregatingStep.h @@ -27,7 +27,7 @@ public: String getName() const override { return "Aggregating"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; void describeActions(JSONBuilder::JSONMap & map) const override; @@ -57,4 +57,3 @@ private: }; } - diff --git a/src/Processors/QueryPlan/ArrayJoinStep.cpp b/src/Processors/QueryPlan/ArrayJoinStep.cpp index fa9ea298319..35b974baa83 100644 --- a/src/Processors/QueryPlan/ArrayJoinStep.cpp +++ b/src/Processors/QueryPlan/ArrayJoinStep.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include @@ -46,11 +46,11 @@ void ArrayJoinStep::updateInputStream(DataStream input_stream, Block result_head res_header = std::move(result_header); } -void ArrayJoinStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings & settings) +void ArrayJoinStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { - pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) { - bool on_totals = stream_type == QueryPipeline::StreamType::Totals; + bool on_totals = stream_type == QueryPipelineBuilder::StreamType::Totals; return std::make_shared(header, array_join, on_totals); }); diff --git a/src/Processors/QueryPlan/ArrayJoinStep.h b/src/Processors/QueryPlan/ArrayJoinStep.h index b3e08c2023c..83df4d021e8 100644 --- a/src/Processors/QueryPlan/ArrayJoinStep.h +++ b/src/Processors/QueryPlan/ArrayJoinStep.h @@ -13,7 +13,7 @@ public: explicit ArrayJoinStep(const DataStream & input_stream_, ArrayJoinActionPtr array_join_); String getName() const override { return "ArrayJoin"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings & settings) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) override; void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp index 811e5885219..83a4c291bf2 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.cpp +++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp @@ -1,5 +1,6 @@ #include -#include +#include +#include #include #include #include @@ -43,7 +44,7 @@ CreatingSetStep::CreatingSetStep( { } -void CreatingSetStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void CreatingSetStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { pipeline.addCreatingSetsTransform(getOutputStream().header, std::move(subquery_for_set), network_transfer_limits, getContext()); } @@ -84,7 +85,7 @@ CreatingSetsStep::CreatingSetsStep(DataStreams input_streams_) input_streams[i].header.dumpStructure()); } -QueryPipelinePtr CreatingSetsStep::updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings &) +QueryPipelineBuilderPtr CreatingSetsStep::updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &) { if (pipelines.empty()) throw Exception("CreatingSetsStep cannot be created with no inputs", ErrorCodes::LOGICAL_ERROR); @@ -95,11 +96,11 @@ QueryPipelinePtr CreatingSetsStep::updatePipeline(QueryPipelines pipelines, cons pipelines.erase(pipelines.begin()); - QueryPipeline delayed_pipeline; + QueryPipelineBuilder delayed_pipeline; if (pipelines.size() > 1) { QueryPipelineProcessorsCollector collector(delayed_pipeline, this); - delayed_pipeline = QueryPipeline::unitePipelines(std::move(pipelines)); + delayed_pipeline = QueryPipelineBuilder::unitePipelines(std::move(pipelines)); processors = collector.detachProcessors(); } else diff --git a/src/Processors/QueryPlan/CreatingSetsStep.h b/src/Processors/QueryPlan/CreatingSetsStep.h index fa6d34ef667..8d20c764e8a 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.h +++ b/src/Processors/QueryPlan/CreatingSetsStep.h @@ -21,7 +21,7 @@ public: String getName() const override { return "CreatingSet"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; @@ -39,7 +39,7 @@ public: String getName() const override { return "CreatingSets"; } - QueryPipelinePtr updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings &) override; + QueryPipelineBuilderPtr updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &) override; void describePipeline(FormatSettings & settings) const override; diff --git a/src/Processors/QueryPlan/CubeStep.cpp b/src/Processors/QueryPlan/CubeStep.cpp index 1a3016b7106..3d61d3ef36b 100644 --- a/src/Processors/QueryPlan/CubeStep.cpp +++ b/src/Processors/QueryPlan/CubeStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB { @@ -30,13 +30,13 @@ CubeStep::CubeStep(const DataStream & input_stream_, AggregatingTransformParamsP output_stream->distinct_columns.insert(params->params.src_header.getByPosition(key).name); } -void CubeStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void CubeStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { pipeline.resize(1); - pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr { - if (stream_type == QueryPipeline::StreamType::Totals) + if (stream_type == QueryPipelineBuilder::StreamType::Totals) return nullptr; return std::make_shared(header, std::move(params)); diff --git a/src/Processors/QueryPlan/CubeStep.h b/src/Processors/QueryPlan/CubeStep.h index 0e06ffc598a..45077d78a90 100644 --- a/src/Processors/QueryPlan/CubeStep.h +++ b/src/Processors/QueryPlan/CubeStep.h @@ -17,7 +17,7 @@ public: String getName() const override { return "Cube"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; const Aggregator::Params & getParams() const; private: diff --git a/src/Processors/QueryPlan/DistinctStep.cpp b/src/Processors/QueryPlan/DistinctStep.cpp index 5edd2f52f47..d53d1fa9310 100644 --- a/src/Processors/QueryPlan/DistinctStep.cpp +++ b/src/Processors/QueryPlan/DistinctStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include @@ -63,7 +63,7 @@ DistinctStep::DistinctStep( } } -void DistinctStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void DistinctStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { if (checkColumnsAlreadyDistinct(columns, input_streams.front().distinct_columns)) return; @@ -71,9 +71,9 @@ void DistinctStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryP if (!pre_distinct) pipeline.resize(1); - pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr { - if (stream_type != QueryPipeline::StreamType::Main) + if (stream_type != QueryPipelineBuilder::StreamType::Main) return nullptr; return std::make_shared(header, set_size_limits, limit_hint, columns); diff --git a/src/Processors/QueryPlan/DistinctStep.h b/src/Processors/QueryPlan/DistinctStep.h index 815601d6253..b08e93dffa9 100644 --- a/src/Processors/QueryPlan/DistinctStep.h +++ b/src/Processors/QueryPlan/DistinctStep.h @@ -18,7 +18,7 @@ public: String getName() const override { return "Distinct"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; diff --git a/src/Processors/QueryPlan/ExpressionStep.cpp b/src/Processors/QueryPlan/ExpressionStep.cpp index 656dcd46fe9..b4ff1a1281c 100644 --- a/src/Processors/QueryPlan/ExpressionStep.cpp +++ b/src/Processors/QueryPlan/ExpressionStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -52,7 +52,7 @@ void ExpressionStep::updateInputStream(DataStream input_stream, bool keep_header input_streams.emplace_back(std::move(input_stream)); } -void ExpressionStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings & settings) +void ExpressionStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { auto expression = std::make_shared(actions_dag, settings.getActionsSettings()); diff --git a/src/Processors/QueryPlan/ExpressionStep.h b/src/Processors/QueryPlan/ExpressionStep.h index 43272e19536..94c2ba21bc1 100644 --- a/src/Processors/QueryPlan/ExpressionStep.h +++ b/src/Processors/QueryPlan/ExpressionStep.h @@ -18,7 +18,7 @@ public: explicit ExpressionStep(const DataStream & input_stream_, ActionsDAGPtr actions_dag_); String getName() const override { return "Expression"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings & settings) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) override; void updateInputStream(DataStream input_stream, bool keep_header); diff --git a/src/Processors/QueryPlan/ExtremesStep.cpp b/src/Processors/QueryPlan/ExtremesStep.cpp index d3ec403f37e..117ccd414ca 100644 --- a/src/Processors/QueryPlan/ExtremesStep.cpp +++ b/src/Processors/QueryPlan/ExtremesStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include namespace DB { @@ -25,7 +25,7 @@ ExtremesStep::ExtremesStep(const DataStream & input_stream_) { } -void ExtremesStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void ExtremesStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { pipeline.addExtremesTransform(); } diff --git a/src/Processors/QueryPlan/ExtremesStep.h b/src/Processors/QueryPlan/ExtremesStep.h index 960b046b955..7898796306c 100644 --- a/src/Processors/QueryPlan/ExtremesStep.h +++ b/src/Processors/QueryPlan/ExtremesStep.h @@ -11,7 +11,7 @@ public: String getName() const override { return "Extremes"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; }; } diff --git a/src/Processors/QueryPlan/FillingStep.cpp b/src/Processors/QueryPlan/FillingStep.cpp index ba3588efa72..204559ecc3b 100644 --- a/src/Processors/QueryPlan/FillingStep.cpp +++ b/src/Processors/QueryPlan/FillingStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include @@ -36,11 +36,11 @@ FillingStep::FillingStep(const DataStream & input_stream_, SortDescription sort_ throw Exception("FillingStep expects single input", ErrorCodes::LOGICAL_ERROR); } -void FillingStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void FillingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr { - bool on_totals = stream_type == QueryPipeline::StreamType::Totals; + bool on_totals = stream_type == QueryPipelineBuilder::StreamType::Totals; return std::make_shared(header, sort_description, on_totals); }); } diff --git a/src/Processors/QueryPlan/FillingStep.h b/src/Processors/QueryPlan/FillingStep.h index f4c6782e9df..6a5bca1890d 100644 --- a/src/Processors/QueryPlan/FillingStep.h +++ b/src/Processors/QueryPlan/FillingStep.h @@ -13,7 +13,7 @@ public: String getName() const override { return "Filling"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; diff --git a/src/Processors/QueryPlan/FilterStep.cpp b/src/Processors/QueryPlan/FilterStep.cpp index 15fd5c7b673..483055810cf 100644 --- a/src/Processors/QueryPlan/FilterStep.cpp +++ b/src/Processors/QueryPlan/FilterStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -65,13 +65,13 @@ void FilterStep::updateInputStream(DataStream input_stream, bool keep_header) input_streams.emplace_back(std::move(input_stream)); } -void FilterStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings & settings) +void FilterStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { auto expression = std::make_shared(actions_dag, settings.getActionsSettings()); - pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) { - bool on_totals = stream_type == QueryPipeline::StreamType::Totals; + bool on_totals = stream_type == QueryPipelineBuilder::StreamType::Totals; return std::make_shared(header, expression, filter_column_name, remove_filter_column, on_totals); }); diff --git a/src/Processors/QueryPlan/FilterStep.h b/src/Processors/QueryPlan/FilterStep.h index d01d128a08c..7ac5bc036e0 100644 --- a/src/Processors/QueryPlan/FilterStep.h +++ b/src/Processors/QueryPlan/FilterStep.h @@ -18,7 +18,7 @@ public: bool remove_filter_column_); String getName() const override { return "Filter"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings & settings) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) override; void updateInputStream(DataStream input_stream, bool keep_header); diff --git a/src/Processors/QueryPlan/FinishSortingStep.cpp b/src/Processors/QueryPlan/FinishSortingStep.cpp index 718eeb96cd8..c219c09f3bd 100644 --- a/src/Processors/QueryPlan/FinishSortingStep.cpp +++ b/src/Processors/QueryPlan/FinishSortingStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -54,7 +54,7 @@ void FinishSortingStep::updateLimit(size_t limit_) } } -void FinishSortingStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void FinishSortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { bool need_finish_sorting = (prefix_description.size() < result_description.size()); if (pipeline.getNumStreams() > 1) @@ -74,9 +74,9 @@ void FinishSortingStep::transformPipeline(QueryPipeline & pipeline, const BuildQ if (need_finish_sorting) { - pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr { - if (stream_type != QueryPipeline::StreamType::Main) + if (stream_type != QueryPipelineBuilder::StreamType::Main) return nullptr; return std::make_shared(header, result_description, limit); diff --git a/src/Processors/QueryPlan/FinishSortingStep.h b/src/Processors/QueryPlan/FinishSortingStep.h index 5ea3a6d91b5..ac34aea9df4 100644 --- a/src/Processors/QueryPlan/FinishSortingStep.h +++ b/src/Processors/QueryPlan/FinishSortingStep.h @@ -19,7 +19,7 @@ public: String getName() const override { return "FinishSorting"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; diff --git a/src/Processors/QueryPlan/IQueryPlanStep.h b/src/Processors/QueryPlan/IQueryPlanStep.h index 9ff2b22e5b8..17bf6a4054e 100644 --- a/src/Processors/QueryPlan/IQueryPlanStep.h +++ b/src/Processors/QueryPlan/IQueryPlanStep.h @@ -8,9 +8,9 @@ namespace JSONBuilder { class JSONMap; } namespace DB { -class QueryPipeline; -using QueryPipelinePtr = std::unique_ptr; -using QueryPipelines = std::vector; +class QueryPipelineBuilder; +using QueryPipelineBuilderPtr = std::unique_ptr; +using QueryPipelineBuilders = std::vector; class IProcessor; using ProcessorPtr = std::shared_ptr; @@ -80,7 +80,7 @@ public: /// * header from each pipeline is the same as header from corresponding input_streams /// Result pipeline must contain any number of streams with compatible output header is hasOutputStream(), /// or pipeline should be completed otherwise. - virtual QueryPipelinePtr updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings & settings) = 0; + virtual QueryPipelineBuilderPtr updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings & settings) = 0; const DataStreams & getInputStreams() const { return input_streams; } diff --git a/src/Processors/QueryPlan/ISourceStep.cpp b/src/Processors/QueryPlan/ISourceStep.cpp index ec82e42fa34..61c0a9254cd 100644 --- a/src/Processors/QueryPlan/ISourceStep.cpp +++ b/src/Processors/QueryPlan/ISourceStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include namespace DB { @@ -9,9 +9,9 @@ ISourceStep::ISourceStep(DataStream output_stream_) output_stream = std::move(output_stream_); } -QueryPipelinePtr ISourceStep::updatePipeline(QueryPipelines, const BuildQueryPipelineSettings & settings) +QueryPipelineBuilderPtr ISourceStep::updatePipeline(QueryPipelineBuilders, const BuildQueryPipelineSettings & settings) { - auto pipeline = std::make_unique(); + auto pipeline = std::make_unique(); QueryPipelineProcessorsCollector collector(*pipeline, this); initializePipeline(*pipeline, settings); auto added_processors = collector.detachProcessors(); diff --git a/src/Processors/QueryPlan/ISourceStep.h b/src/Processors/QueryPlan/ISourceStep.h index fbef0fcce38..08c939b626d 100644 --- a/src/Processors/QueryPlan/ISourceStep.h +++ b/src/Processors/QueryPlan/ISourceStep.h @@ -10,9 +10,9 @@ class ISourceStep : public IQueryPlanStep public: explicit ISourceStep(DataStream output_stream_); - QueryPipelinePtr updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings & settings) override; + QueryPipelineBuilderPtr updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings & settings) override; - virtual void initializePipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings & settings) = 0; + virtual void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) = 0; void describePipeline(FormatSettings & settings) const override; diff --git a/src/Processors/QueryPlan/ITransformingStep.cpp b/src/Processors/QueryPlan/ITransformingStep.cpp index e71afd94c46..1c7f836378f 100644 --- a/src/Processors/QueryPlan/ITransformingStep.cpp +++ b/src/Processors/QueryPlan/ITransformingStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include namespace DB { @@ -36,7 +36,7 @@ DataStream ITransformingStep::createOutputStream( } -QueryPipelinePtr ITransformingStep::updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings & settings) +QueryPipelineBuilderPtr ITransformingStep::updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings & settings) { if (collect_processors) { diff --git a/src/Processors/QueryPlan/ITransformingStep.h b/src/Processors/QueryPlan/ITransformingStep.h index 9abe025729d..d87ca05d4bc 100644 --- a/src/Processors/QueryPlan/ITransformingStep.h +++ b/src/Processors/QueryPlan/ITransformingStep.h @@ -48,9 +48,9 @@ public: ITransformingStep(DataStream input_stream, Block output_header, Traits traits, bool collect_processors_ = true); - QueryPipelinePtr updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings & settings) override; + QueryPipelineBuilderPtr updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings & settings) override; - virtual void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings & settings) = 0; + virtual void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) = 0; const TransformTraits & getTransformTraits() const { return transform_traits; } const DataStreamTraits & getDataStreamTraits() const { return data_stream_traits; } diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp index d1bb1eb41e9..a4d81e69fe0 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include @@ -39,9 +39,9 @@ IntersectOrExceptStep::IntersectOrExceptStep( output_stream = DataStream{.header = header}; } -QueryPipelinePtr IntersectOrExceptStep::updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings &) +QueryPipelineBuilderPtr IntersectOrExceptStep::updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &) { - auto pipeline = std::make_unique(); + auto pipeline = std::make_unique(); QueryPipelineProcessorsCollector collector(*pipeline, this); if (pipelines.empty()) @@ -72,7 +72,7 @@ QueryPipelinePtr IntersectOrExceptStep::updatePipeline(QueryPipelines pipelines, cur_pipeline->addTransform(std::make_shared(header, cur_pipeline->getNumStreams(), 1)); } - *pipeline = QueryPipeline::unitePipelines(std::move(pipelines), max_threads); + *pipeline = QueryPipelineBuilder::unitePipelines(std::move(pipelines), max_threads); pipeline->addTransform(std::make_shared(header, current_operator)); processors = collector.detachProcessors(); diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.h b/src/Processors/QueryPlan/IntersectOrExceptStep.h index 9e87c921ab2..b2738cb297f 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.h +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.h @@ -16,7 +16,7 @@ public: String getName() const override { return "IntersectOrExcept"; } - QueryPipelinePtr updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings & settings) override; + QueryPipelineBuilderPtr updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings & settings) override; void describePipeline(FormatSettings & settings) const override; diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp index 736d7eb37c1..9c5f8ae2e5f 100644 --- a/src/Processors/QueryPlan/JoinStep.cpp +++ b/src/Processors/QueryPlan/JoinStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include @@ -26,12 +26,12 @@ JoinStep::JoinStep( }; } -QueryPipelinePtr JoinStep::updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings &) +QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &) { if (pipelines.size() != 2) throw Exception(ErrorCodes::LOGICAL_ERROR, "JoinStep expect two input steps"); - return QueryPipeline::joinPipelines(std::move(pipelines[0]), std::move(pipelines[1]), join, max_block_size, &processors); + return QueryPipelineBuilder::joinPipelines(std::move(pipelines[0]), std::move(pipelines[1]), join, max_block_size, &processors); } void JoinStep::describePipeline(FormatSettings & settings) const @@ -67,7 +67,7 @@ FilledJoinStep::FilledJoinStep(const DataStream & input_stream_, JoinPtr join_, throw Exception(ErrorCodes::LOGICAL_ERROR, "FilledJoinStep expects Join to be filled"); } -void FilledJoinStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void FilledJoinStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { bool default_totals = false; if (!pipeline.hasTotals() && join->getTotals()) @@ -78,9 +78,9 @@ void FilledJoinStep::transformPipeline(QueryPipeline & pipeline, const BuildQuer auto finish_counter = std::make_shared(pipeline.getNumStreams()); - pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) { - bool on_totals = stream_type == QueryPipeline::StreamType::Totals; + bool on_totals = stream_type == QueryPipelineBuilder::StreamType::Totals; auto counter = on_totals ? nullptr : finish_counter; return std::make_shared(header, join, max_block_size, on_totals, default_totals, counter); }); diff --git a/src/Processors/QueryPlan/JoinStep.h b/src/Processors/QueryPlan/JoinStep.h index 6430f7cbd59..71537f29a8e 100644 --- a/src/Processors/QueryPlan/JoinStep.h +++ b/src/Processors/QueryPlan/JoinStep.h @@ -20,7 +20,7 @@ public: String getName() const override { return "Join"; } - QueryPipelinePtr updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings &) override; + QueryPipelineBuilderPtr updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &) override; void describePipeline(FormatSettings & settings) const override; @@ -40,7 +40,7 @@ public: FilledJoinStep(const DataStream & input_stream_, JoinPtr join_, size_t max_block_size_); String getName() const override { return "FilledJoin"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; private: JoinPtr join; diff --git a/src/Processors/QueryPlan/LimitByStep.cpp b/src/Processors/QueryPlan/LimitByStep.cpp index 8ded0784b41..12ad933a159 100644 --- a/src/Processors/QueryPlan/LimitByStep.cpp +++ b/src/Processors/QueryPlan/LimitByStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include @@ -34,13 +34,13 @@ LimitByStep::LimitByStep( } -void LimitByStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void LimitByStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { pipeline.resize(1); - pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr { - if (stream_type != QueryPipeline::StreamType::Main) + if (stream_type != QueryPipelineBuilder::StreamType::Main) return nullptr; return std::make_shared(header, group_length, group_offset, columns); diff --git a/src/Processors/QueryPlan/LimitByStep.h b/src/Processors/QueryPlan/LimitByStep.h index 1b574cd02a1..eb91be8a814 100644 --- a/src/Processors/QueryPlan/LimitByStep.h +++ b/src/Processors/QueryPlan/LimitByStep.h @@ -14,7 +14,7 @@ public: String getName() const override { return "LimitBy"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; diff --git a/src/Processors/QueryPlan/LimitStep.cpp b/src/Processors/QueryPlan/LimitStep.cpp index 5f5a0bd0d64..3db59e0684a 100644 --- a/src/Processors/QueryPlan/LimitStep.cpp +++ b/src/Processors/QueryPlan/LimitStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -43,7 +43,7 @@ void LimitStep::updateInputStream(DataStream input_stream) output_stream = createOutputStream(input_streams.front(), output_stream->header, getDataStreamTraits()); } -void LimitStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void LimitStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { auto transform = std::make_shared( pipeline.getHeader(), limit, offset, pipeline.getNumStreams(), always_read_till_end, with_ties, description); diff --git a/src/Processors/QueryPlan/LimitStep.h b/src/Processors/QueryPlan/LimitStep.h index 772ba0722a7..f5bceeb29c7 100644 --- a/src/Processors/QueryPlan/LimitStep.h +++ b/src/Processors/QueryPlan/LimitStep.h @@ -18,7 +18,7 @@ public: String getName() const override { return "Limit"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; diff --git a/src/Processors/QueryPlan/MergeSortingStep.cpp b/src/Processors/QueryPlan/MergeSortingStep.cpp index c9e141281f4..820bbc31b74 100644 --- a/src/Processors/QueryPlan/MergeSortingStep.cpp +++ b/src/Processors/QueryPlan/MergeSortingStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -57,11 +57,11 @@ void MergeSortingStep::updateLimit(size_t limit_) } } -void MergeSortingStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void MergeSortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr { - if (stream_type == QueryPipeline::StreamType::Totals) + if (stream_type == QueryPipelineBuilder::StreamType::Totals) return nullptr; return std::make_shared( diff --git a/src/Processors/QueryPlan/MergeSortingStep.h b/src/Processors/QueryPlan/MergeSortingStep.h index dcecdffd122..d5daa041256 100644 --- a/src/Processors/QueryPlan/MergeSortingStep.h +++ b/src/Processors/QueryPlan/MergeSortingStep.h @@ -24,7 +24,7 @@ public: String getName() const override { return "MergeSorting"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; @@ -45,4 +45,3 @@ private: }; } - diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.cpp b/src/Processors/QueryPlan/MergingAggregatedStep.cpp index 71efb37b363..d02be59ae84 100644 --- a/src/Processors/QueryPlan/MergingAggregatedStep.cpp +++ b/src/Processors/QueryPlan/MergingAggregatedStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -40,7 +40,7 @@ MergingAggregatedStep::MergingAggregatedStep( output_stream->distinct_columns.insert(params->params.intermediate_header.getByPosition(key).name); } -void MergingAggregatedStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void MergingAggregatedStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { if (!memory_efficient_aggregation) { diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.h b/src/Processors/QueryPlan/MergingAggregatedStep.h index 2e94d536a8c..9171512571a 100644 --- a/src/Processors/QueryPlan/MergingAggregatedStep.h +++ b/src/Processors/QueryPlan/MergingAggregatedStep.h @@ -21,7 +21,7 @@ public: String getName() const override { return "MergingAggregated"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; diff --git a/src/Processors/QueryPlan/MergingSortedStep.cpp b/src/Processors/QueryPlan/MergingSortedStep.cpp index 7e866f4ccd2..87d1af4d2bd 100644 --- a/src/Processors/QueryPlan/MergingSortedStep.cpp +++ b/src/Processors/QueryPlan/MergingSortedStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -47,7 +47,7 @@ void MergingSortedStep::updateLimit(size_t limit_) } } -void MergingSortedStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void MergingSortedStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { /// If there are several streams, then we merge them into one if (pipeline.getNumStreams() > 1) diff --git a/src/Processors/QueryPlan/MergingSortedStep.h b/src/Processors/QueryPlan/MergingSortedStep.h index 4f82e3830d0..5d27e59ab76 100644 --- a/src/Processors/QueryPlan/MergingSortedStep.h +++ b/src/Processors/QueryPlan/MergingSortedStep.h @@ -19,7 +19,7 @@ public: String getName() const override { return "MergingSorted"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; @@ -34,5 +34,3 @@ private: }; } - - diff --git a/src/Processors/QueryPlan/OffsetStep.cpp b/src/Processors/QueryPlan/OffsetStep.cpp index 34ddb687ddd..b48327eb36c 100644 --- a/src/Processors/QueryPlan/OffsetStep.cpp +++ b/src/Processors/QueryPlan/OffsetStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include @@ -29,7 +29,7 @@ OffsetStep::OffsetStep(const DataStream & input_stream_, size_t offset_) { } -void OffsetStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void OffsetStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { auto transform = std::make_shared( pipeline.getHeader(), offset, pipeline.getNumStreams()); diff --git a/src/Processors/QueryPlan/OffsetStep.h b/src/Processors/QueryPlan/OffsetStep.h index a10fcc7baec..488c55b6460 100644 --- a/src/Processors/QueryPlan/OffsetStep.h +++ b/src/Processors/QueryPlan/OffsetStep.h @@ -13,7 +13,7 @@ public: String getName() const override { return "Offset"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; diff --git a/src/Processors/QueryPlan/PartialSortingStep.cpp b/src/Processors/QueryPlan/PartialSortingStep.cpp index d713a63028a..cf7cb157e4c 100644 --- a/src/Processors/QueryPlan/PartialSortingStep.cpp +++ b/src/Processors/QueryPlan/PartialSortingStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -47,11 +47,11 @@ void PartialSortingStep::updateLimit(size_t limit_) } } -void PartialSortingStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void PartialSortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr { - if (stream_type != QueryPipeline::StreamType::Main) + if (stream_type != QueryPipelineBuilder::StreamType::Main) return nullptr; return std::make_shared(header, sort_description, limit); @@ -61,9 +61,9 @@ void PartialSortingStep::transformPipeline(QueryPipeline & pipeline, const Build limits.mode = LimitsMode::LIMITS_CURRENT; //-V1048 limits.size_limits = size_limits; - pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr { - if (stream_type != QueryPipeline::StreamType::Main) + if (stream_type != QueryPipelineBuilder::StreamType::Main) return nullptr; auto transform = std::make_shared(header, limits); diff --git a/src/Processors/QueryPlan/PartialSortingStep.h b/src/Processors/QueryPlan/PartialSortingStep.h index aeca42f7096..bd8fd30ce02 100644 --- a/src/Processors/QueryPlan/PartialSortingStep.h +++ b/src/Processors/QueryPlan/PartialSortingStep.h @@ -18,7 +18,7 @@ public: String getName() const override { return "PartialSorting"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 0b1b78b6c34..6fb6a24f65b 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -134,7 +134,7 @@ void QueryPlan::addStep(QueryPlanStepPtr step) " input expected", ErrorCodes::LOGICAL_ERROR); } -QueryPipelinePtr QueryPlan::buildQueryPipeline( +QueryPipelineBuilderPtr QueryPlan::buildQueryPipeline( const QueryPlanOptimizationSettings & optimization_settings, const BuildQueryPipelineSettings & build_pipeline_settings) { @@ -144,10 +144,10 @@ QueryPipelinePtr QueryPlan::buildQueryPipeline( struct Frame { Node * node = {}; - QueryPipelines pipelines = {}; + QueryPipelineBuilders pipelines = {}; }; - QueryPipelinePtr last_pipeline; + QueryPipelineBuilderPtr last_pipeline; std::stack stack; stack.push(Frame{.node = root}); @@ -193,7 +193,7 @@ Pipe QueryPlan::convertToPipe( if (isCompleted()) throw Exception("Cannot convert completed QueryPlan to Pipe", ErrorCodes::LOGICAL_ERROR); - return QueryPipeline::getPipe(std::move(*buildQueryPipeline(optimization_settings, build_pipeline_settings))); + return QueryPipelineBuilder::getPipe(std::move(*buildQueryPipeline(optimization_settings, build_pipeline_settings))); } void QueryPlan::addInterpreterContext(ContextPtr context) diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h index 06639ebfd38..4e342d746d1 100644 --- a/src/Processors/QueryPlan/QueryPlan.h +++ b/src/Processors/QueryPlan/QueryPlan.h @@ -17,8 +17,8 @@ class DataStream; class IQueryPlanStep; using QueryPlanStepPtr = std::unique_ptr; -class QueryPipeline; -using QueryPipelinePtr = std::unique_ptr; +class QueryPipelineBuilder; +using QueryPipelineBuilderPtr = std::unique_ptr; class WriteBuffer; @@ -56,7 +56,7 @@ public: void optimize(const QueryPlanOptimizationSettings & optimization_settings); - QueryPipelinePtr buildQueryPipeline( + QueryPipelineBuilderPtr buildQueryPipeline( const QueryPlanOptimizationSettings & optimization_settings, const BuildQueryPipelineSettings & build_pipeline_settings); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 86d4a7c93b1..dac9e5798b8 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -875,7 +875,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( log, num_streams, result.index_stats, - true /* use_skip_indexes */); + context->getSettings().use_skip_indexes); } catch (...) { @@ -927,7 +927,7 @@ ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() const return std::get(result_ptr->result); } -void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { auto result = getAnalysisResult(); LOG_DEBUG( diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index fc06314ee0c..46b62467ae0 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -102,7 +102,7 @@ public: String getName() const override { return "ReadFromMergeTree"; } - void initializePipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; void describeActions(FormatSettings & format_settings) const override; void describeIndexes(FormatSettings & format_settings) const override; diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp index 0d1a0fdc619..c8213d58db6 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp @@ -1,5 +1,5 @@ #include -#include +#include namespace DB { @@ -11,7 +11,7 @@ ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_, std::shared_ptr +#include #include #include #include @@ -163,7 +164,7 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFacto if (try_results.empty() || local_delay < max_remote_delay) { auto plan = createLocalPlan(query, header, context, stage, shard_num, shard_count); - return QueryPipeline::getPipe(std::move(*plan->buildQueryPipeline( + return QueryPipelineBuilder::getPipe(std::move(*plan->buildQueryPipeline( QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)))); } @@ -219,7 +220,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::IStreamFactory:: addConvertingActions(pipes.back(), output_stream->header); } -void ReadFromRemote::initializePipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void ReadFromRemote::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { Pipes pipes; for (const auto & shard : shards) diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h index ba0060d5470..f963164dd3f 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.h +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -34,7 +34,7 @@ public: String getName() const override { return "ReadFromRemote"; } - void initializePipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; private: ClusterProxy::IStreamFactory::Shards shards; diff --git a/src/Processors/QueryPlan/ReadNothingStep.cpp b/src/Processors/QueryPlan/ReadNothingStep.cpp index c0c97e5d523..7019b88f0b2 100644 --- a/src/Processors/QueryPlan/ReadNothingStep.cpp +++ b/src/Processors/QueryPlan/ReadNothingStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include namespace DB @@ -10,7 +10,7 @@ ReadNothingStep::ReadNothingStep(Block output_header) { } -void ReadNothingStep::initializePipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void ReadNothingStep::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { pipeline.init(Pipe(std::make_shared(getOutputStream().header))); } diff --git a/src/Processors/QueryPlan/ReadNothingStep.h b/src/Processors/QueryPlan/ReadNothingStep.h index 4c5b4adb7ce..dad554e3d15 100644 --- a/src/Processors/QueryPlan/ReadNothingStep.h +++ b/src/Processors/QueryPlan/ReadNothingStep.h @@ -12,7 +12,7 @@ public: String getName() const override { return "ReadNothing"; } - void initializePipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; }; } diff --git a/src/Processors/QueryPlan/RollupStep.cpp b/src/Processors/QueryPlan/RollupStep.cpp index 45573b352d6..114fe661c70 100644 --- a/src/Processors/QueryPlan/RollupStep.cpp +++ b/src/Processors/QueryPlan/RollupStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB { @@ -30,13 +30,13 @@ RollupStep::RollupStep(const DataStream & input_stream_, AggregatingTransformPar output_stream->distinct_columns.insert(params->params.src_header.getByPosition(key).name); } -void RollupStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void RollupStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { pipeline.resize(1); - pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr { - if (stream_type == QueryPipeline::StreamType::Totals) + if (stream_type == QueryPipelineBuilder::StreamType::Totals) return nullptr; return std::make_shared(header, std::move(params)); diff --git a/src/Processors/QueryPlan/RollupStep.h b/src/Processors/QueryPlan/RollupStep.h index 21faf539990..2ff3040d7a7 100644 --- a/src/Processors/QueryPlan/RollupStep.h +++ b/src/Processors/QueryPlan/RollupStep.h @@ -16,7 +16,7 @@ public: String getName() const override { return "Rollup"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; private: AggregatingTransformParamsPtr params; diff --git a/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.cpp b/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.cpp index 734e6db318d..47f8187c3aa 100644 --- a/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.cpp +++ b/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include namespace DB @@ -39,7 +39,7 @@ SettingQuotaAndLimitsStep::SettingQuotaAndLimitsStep( { } -void SettingQuotaAndLimitsStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void SettingQuotaAndLimitsStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { /// Table lock is stored inside pipeline here. pipeline.setLimits(limits); diff --git a/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.h b/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.h index 3c73c208b70..b36ddfb3768 100644 --- a/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.h +++ b/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.h @@ -32,7 +32,7 @@ public: String getName() const override { return "SettingQuotaAndLimits"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; private: ContextPtr context; diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp index db82538d5a0..e0388ed480e 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.cpp +++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -49,7 +49,7 @@ TotalsHavingStep::TotalsHavingStep( { } -void TotalsHavingStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings & settings) +void TotalsHavingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { auto expression_actions = actions_dag ? std::make_shared(actions_dag, settings.getActionsSettings()) : nullptr; diff --git a/src/Processors/QueryPlan/TotalsHavingStep.h b/src/Processors/QueryPlan/TotalsHavingStep.h index bc053c96970..3d79d47ad22 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.h +++ b/src/Processors/QueryPlan/TotalsHavingStep.h @@ -24,7 +24,7 @@ public: String getName() const override { return "TotalsHaving"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings & settings) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) override; void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; @@ -41,4 +41,3 @@ private: }; } - diff --git a/src/Processors/QueryPlan/UnionStep.cpp b/src/Processors/QueryPlan/UnionStep.cpp index 418e9b4e2e7..6d4036b1272 100644 --- a/src/Processors/QueryPlan/UnionStep.cpp +++ b/src/Processors/QueryPlan/UnionStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -37,9 +37,9 @@ UnionStep::UnionStep(DataStreams input_streams_, size_t max_threads_) output_stream = DataStream{.header = header}; } -QueryPipelinePtr UnionStep::updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings &) +QueryPipelineBuilderPtr UnionStep::updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &) { - auto pipeline = std::make_unique(); + auto pipeline = std::make_unique(); QueryPipelineProcessorsCollector collector(*pipeline, this); if (pipelines.empty()) @@ -71,7 +71,7 @@ QueryPipelinePtr UnionStep::updatePipeline(QueryPipelines pipelines, const Build } } - *pipeline = QueryPipeline::unitePipelines(std::move(pipelines), max_threads); + *pipeline = QueryPipelineBuilder::unitePipelines(std::move(pipelines), max_threads); processors = collector.detachProcessors(); return pipeline; diff --git a/src/Processors/QueryPlan/UnionStep.h b/src/Processors/QueryPlan/UnionStep.h index 81bd033d045..c23223bc6fa 100644 --- a/src/Processors/QueryPlan/UnionStep.h +++ b/src/Processors/QueryPlan/UnionStep.h @@ -13,7 +13,7 @@ public: String getName() const override { return "Union"; } - QueryPipelinePtr updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings &) override; + QueryPipelineBuilderPtr updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &) override; void describePipeline(FormatSettings & settings) const override; diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp index 29f2999ec83..ca09f4a9474 100644 --- a/src/Processors/QueryPlan/WindowStep.cpp +++ b/src/Processors/QueryPlan/WindowStep.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include @@ -63,7 +63,7 @@ WindowStep::WindowStep(const DataStream & input_stream_, } -void WindowStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +void WindowStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { // This resize is needed for cases such as `over ()` when we don't have a // sort node, and the input might have multiple streams. The sort node would diff --git a/src/Processors/QueryPlan/WindowStep.h b/src/Processors/QueryPlan/WindowStep.h index b5018b1d5a7..a65b157f481 100644 --- a/src/Processors/QueryPlan/WindowStep.h +++ b/src/Processors/QueryPlan/WindowStep.h @@ -20,7 +20,7 @@ public: String getName() const override { return "Window"; } - void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp new file mode 100644 index 00000000000..9ec0939f3a8 --- /dev/null +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -0,0 +1,23 @@ +#include +#include + +namespace DB +{ + +SinkToStorage::SinkToStorage(const Block & header) : ExceptionKeepingTransform(header, header, false) {} + +void SinkToStorage::transform(Chunk & chunk) +{ + /** Throw an exception if the sizes of arrays - elements of nested data structures doesn't match. + * We have to make this assertion before writing to table, because storage engine may assume that they have equal sizes. + * NOTE It'd better to do this check in serialization of nested structures (in place when this assumption is required), + * but currently we don't have methods for serialization of nested structures "as a whole". + */ + Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); + + consume(chunk.clone()); + if (lastBlockIsDuplicate()) + chunk.clear(); +} + +} diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index 3e0ac470be7..76920777bdc 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -1,21 +1,27 @@ #pragma once -#include #include +#include namespace DB { /// Sink which is returned from Storage::write. -/// The same as ISink, but also can hold table lock. -class SinkToStorage : public ISink +class SinkToStorage : public ExceptionKeepingTransform { public: - using ISink::ISink; + explicit SinkToStorage(const Block & header); + const Block & getHeader() const { return inputs.front().getHeader(); } void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } +protected: + virtual void consume(Chunk chunk) = 0; + virtual bool lastBlockIsDuplicate() const { return false; } + private: std::vector table_locks; + + void transform(Chunk & chunk) override; }; using SinkToStoragePtr = std::shared_ptr; diff --git a/src/Processors/Sources/SinkToOutputStream.cpp b/src/Processors/Sources/SinkToOutputStream.cpp deleted file mode 100644 index f8a890a0d9d..00000000000 --- a/src/Processors/Sources/SinkToOutputStream.cpp +++ /dev/null @@ -1,25 +0,0 @@ -#include -#include - - -namespace DB -{ - -SinkToOutputStream::SinkToOutputStream(BlockOutputStreamPtr stream_) - : SinkToStorage(stream_->getHeader()) - , stream(std::move(stream_)) -{ - stream->writePrefix(); -} - -void SinkToOutputStream::consume(Chunk chunk) -{ - stream->write(getPort().getHeader().cloneWithColumns(chunk.detachColumns())); -} - -void SinkToOutputStream::onFinish() -{ - stream->writeSuffix(); -} - -} diff --git a/src/Processors/Sources/SinkToOutputStream.h b/src/Processors/Sources/SinkToOutputStream.h deleted file mode 100644 index 946a53b685e..00000000000 --- a/src/Processors/Sources/SinkToOutputStream.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once -#include - -namespace DB -{ - -class IBlockOutputStream; -using BlockOutputStreamPtr = std::shared_ptr; - -/// Sink which writes data to IBlockOutputStream. -/// It's a temporary wrapper. -class SinkToOutputStream : public SinkToStorage -{ -public: - explicit SinkToOutputStream(BlockOutputStreamPtr stream); - - String getName() const override { return "SinkToOutputStream"; } - -protected: - void consume(Chunk chunk) override; - void onFinish() override; - -private: - BlockOutputStreamPtr stream; -}; - -} diff --git a/src/Processors/Sources/SourceFromSingleChunk.h b/src/Processors/Sources/SourceFromSingleChunk.h index d304bdbab93..8268fa5b0a6 100644 --- a/src/Processors/Sources/SourceFromSingleChunk.h +++ b/src/Processors/Sources/SourceFromSingleChunk.h @@ -9,6 +9,7 @@ class SourceFromSingleChunk : public SourceWithProgress { public: explicit SourceFromSingleChunk(Block header, Chunk chunk_) : SourceWithProgress(std::move(header)), chunk(std::move(chunk_)) {} + explicit SourceFromSingleChunk(Block data) : SourceWithProgress(data.cloneEmpty()), chunk(data.getColumns(), data.rows()) {} String getName() const override { return "SourceFromSingleChunk"; } protected: diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp index d9b383030d3..6776caae9bf 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.cpp +++ b/src/Processors/Transforms/CreatingSetsTransform.cpp @@ -1,6 +1,6 @@ #include -#include - +#include +#include #include #include @@ -20,6 +20,7 @@ namespace ErrorCodes extern const int SET_SIZE_LIMIT_EXCEEDED; } +CreatingSetsTransform::~CreatingSetsTransform() = default; CreatingSetsTransform::CreatingSetsTransform( Block in_header_, @@ -50,7 +51,8 @@ void CreatingSetsTransform::startSubquery() LOG_TRACE(log, "Filling temporary table."); if (subquery.table) - table_out = std::make_shared(subquery.table->write({}, subquery.table->getInMemoryMetadataPtr(), getContext())); + /// TODO: make via port + table_out = QueryPipeline(subquery.table->write({}, subquery.table->getInMemoryMetadataPtr(), getContext())); done_with_set = !subquery.set; done_with_table = !subquery.table; @@ -58,8 +60,11 @@ void CreatingSetsTransform::startSubquery() if (done_with_set /*&& done_with_join*/ && done_with_table) throw Exception("Logical error: nothing to do with subquery", ErrorCodes::LOGICAL_ERROR); - if (table_out) - table_out->writePrefix(); + if (table_out.initialized()) + { + executor = std::make_unique(table_out); + executor->start(); + } } void CreatingSetsTransform::finishSubquery() @@ -104,7 +109,7 @@ void CreatingSetsTransform::consume(Chunk chunk) if (!done_with_table) { block = materializeBlock(block); - table_out->write(block); + executor->push(block); rows_to_transfer += block.rows(); bytes_to_transfer += block.bytes(); @@ -123,8 +128,12 @@ Chunk CreatingSetsTransform::generate() if (subquery.set) subquery.set->finishInsert(); - if (table_out) - table_out->writeSuffix(); + if (table_out.initialized()) + { + executor->finish(); + executor.reset(); + table_out.reset(); + } finishSubquery(); return {}; diff --git a/src/Processors/Transforms/CreatingSetsTransform.h b/src/Processors/Transforms/CreatingSetsTransform.h index a847582a988..eca12c33f54 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.h +++ b/src/Processors/Transforms/CreatingSetsTransform.h @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include #include @@ -16,6 +18,8 @@ class QueryStatus; struct Progress; using ProgressCallback = std::function; +class PushingPipelineExecutor; + /// This processor creates set during execution. /// Don't return any data. Sets are created when Finish status is returned. /// In general, several work() methods need to be called to finish. @@ -30,6 +34,8 @@ public: SizeLimits network_transfer_limits_, ContextPtr context_); + ~CreatingSetsTransform() override; + String getName() const override { return "CreatingSetsTransform"; } void work() override; @@ -39,7 +45,8 @@ public: private: SubqueryForSet subquery; - BlockOutputStreamPtr table_out; + std::unique_ptr executor; + QueryPipeline table_out; UInt64 read_rows = 0; Stopwatch watch; diff --git a/src/Processors/Transforms/ExceptionKeepingTransform.cpp b/src/Processors/Transforms/ExceptionKeepingTransform.cpp new file mode 100644 index 00000000000..41fc2b7d17c --- /dev/null +++ b/src/Processors/Transforms/ExceptionKeepingTransform.cpp @@ -0,0 +1,164 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +ExceptionKeepingTransform::ExceptionKeepingTransform(const Block & in_header, const Block & out_header, bool ignore_on_start_and_finish_) + : IProcessor({in_header}, {out_header}) + , input(inputs.front()), output(outputs.front()) + , ignore_on_start_and_finish(ignore_on_start_and_finish_) +{ +} + +IProcessor::Status ExceptionKeepingTransform::prepare() +{ + if (!ignore_on_start_and_finish && !was_on_start_called) + return Status::Ready; + + /// Check can output. + + if (output.isFinished()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Output port is finished for {}", getName()); + + if (!output.canPush()) + { + input.setNotNeeded(); + return Status::PortFull; + } + + /// Output if has data. + if (ready_output) + { + output.pushData(std::move(data)); + ready_output = false; + return Status::PortFull; + } + + if (!ready_input) + { + if (input.isFinished()) + { + if (!ignore_on_start_and_finish && !was_on_finish_called && !has_exception) + return Status::Ready; + + output.finish(); + return Status::Finished; + } + + input.setNeeded(); + + if (!input.hasData()) + return Status::NeedData; + + data = input.pullData(true); + + if (data.exception) + { + has_exception = true; + output.pushData(std::move(data)); + return Status::PortFull; + } + + if (has_exception) + /// In case of exception, just drop all other data. + /// If transform is stateful, it's state may be broken after exception from transform() + data.chunk.clear(); + else + ready_input = true; + } + + return Status::Ready; +} + +static std::exception_ptr runStep(std::function step, ThreadStatus * thread_status, std::atomic_uint64_t * elapsed_ms) +{ + std::exception_ptr res; + std::optional watch; + + auto * original_thread = current_thread; + SCOPE_EXIT({ current_thread = original_thread; }); + + if (thread_status) + { + /// Change thread context to store individual metrics. Once the work in done, go back to the original thread + thread_status->resetPerformanceCountersLastUsage(); + current_thread = thread_status; + } + + if (elapsed_ms) + watch.emplace(); + + try + { + step(); + } + catch (...) + { + res = std::current_exception(); + } + + if (thread_status) + thread_status->updatePerformanceCounters(); + + if (elapsed_ms && watch) + *elapsed_ms += watch->elapsedMilliseconds(); + + return res; +} + +void ExceptionKeepingTransform::work() +{ + if (!ignore_on_start_and_finish && !was_on_start_called) + { + was_on_start_called = true; + + if (auto exception = runStep([this] { onStart(); }, thread_status, elapsed_counter_ms)) + { + has_exception = true; + ready_output = true; + data.exception = std::move(exception); + } + } + else if (ready_input) + { + ready_input = false; + + if (auto exception = runStep([this] { transform(data.chunk); }, thread_status, elapsed_counter_ms)) + { + has_exception = true; + data.chunk.clear(); + data.exception = std::move(exception); + } + + if (data.chunk || data.exception) + ready_output = true; + } + else if (!ignore_on_start_and_finish && !was_on_finish_called) + { + was_on_finish_called = true; + + if (auto exception = runStep([this] { onFinish(); }, thread_status, elapsed_counter_ms)) + { + has_exception = true; + ready_output = true; + data.exception = std::move(exception); + } + } +} + +void ExceptionKeepingTransform::setRuntimeData(ThreadStatus * thread_status_, std::atomic_uint64_t * elapsed_counter_ms_) +{ + thread_status = thread_status_; + elapsed_counter_ms = elapsed_counter_ms_; +} + +} diff --git a/src/Processors/Transforms/ExceptionKeepingTransform.h b/src/Processors/Transforms/ExceptionKeepingTransform.h new file mode 100644 index 00000000000..867f13bf53a --- /dev/null +++ b/src/Processors/Transforms/ExceptionKeepingTransform.h @@ -0,0 +1,60 @@ +#pragma once +#include + +namespace DB +{ + + +class ThreadStatus; + +/// Has one input and one output. +/// Works similarly to ISimpleTransform, but with much care about exceptions. +/// +/// If input contain exception, this exception is pushed directly to output port. +/// If input contain data chunk, transform() is called for it. +/// When transform throws exception itself, data chunk is replaced by caught exception. +/// Transformed chunk or newly caught exception is pushed to output. +/// +/// There may be any number of exceptions read from input, transform keeps the order. +/// It is expected that output port won't be closed from the other side before all data is processed. +/// +/// Method onStart() is called before reading any data. +/// Method onFinish() is called after all data from input is processed, if no exception happened. +/// In case of exception, it is additionally pushed into pipeline. +class ExceptionKeepingTransform : public IProcessor +{ +protected: + InputPort & input; + OutputPort & output; + Port::Data data; + + bool ready_input = false; + bool ready_output = false; + bool has_exception = false; + + const bool ignore_on_start_and_finish = true; + bool was_on_start_called = false; + bool was_on_finish_called = false; + +//protected: + virtual void transform(Chunk & chunk) = 0; + virtual void onStart() {} + virtual void onFinish() {} + +public: + ExceptionKeepingTransform(const Block & in_header, const Block & out_header, bool ignore_on_start_and_finish_ = true); + + Status prepare() override; + void work() override; + + InputPort & getInputPort() { return input; } + OutputPort & getOutputPort() { return output; } + + void setRuntimeData(ThreadStatus * thread_status_, std::atomic_uint64_t * elapsed_counter_ms_); + +private: + ThreadStatus * thread_status = nullptr; + std::atomic_uint64_t * elapsed_counter_ms = nullptr; +}; + +} diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index efdff086efa..ca788f1dd9f 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -25,4 +25,20 @@ void ExpressionTransform::transform(Chunk & chunk) chunk.setColumns(block.getColumns(), num_rows); } +ConvertingTransform::ConvertingTransform(const Block & header_, ExpressionActionsPtr expression_) + : ExceptionKeepingTransform(header_, ExpressionTransform::transformHeader(header_, expression_->getActionsDAG())) + , expression(std::move(expression_)) +{ +} + +void ConvertingTransform::transform(Chunk & chunk) +{ + size_t num_rows = chunk.getNumRows(); + auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); + + expression->execute(block, num_rows); + + chunk.setColumns(block.getColumns(), num_rows); +} + } diff --git a/src/Processors/Transforms/ExpressionTransform.h b/src/Processors/Transforms/ExpressionTransform.h index d4cc1c8f78a..a76dc733e14 100644 --- a/src/Processors/Transforms/ExpressionTransform.h +++ b/src/Processors/Transforms/ExpressionTransform.h @@ -1,4 +1,5 @@ #pragma once +#include #include namespace DB @@ -14,7 +15,7 @@ class ActionsDAG; * For example: hits * 2 + 3, url LIKE '%yandex%' * The expression processes each row independently of the others. */ -class ExpressionTransform : public ISimpleTransform +class ExpressionTransform final : public ISimpleTransform { public: ExpressionTransform( @@ -32,4 +33,20 @@ private: ExpressionActionsPtr expression; }; +class ConvertingTransform final : public ExceptionKeepingTransform +{ +public: + ConvertingTransform( + const Block & header_, + ExpressionActionsPtr expression_); + + String getName() const override { return "ConvertingTransform"; } + +protected: + void transform(Chunk & chunk) override; + +private: + ExpressionActionsPtr expression; +}; + } diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 398ce9eb9fb..f5aef01463a 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -1,27 +1,45 @@ #include +#include namespace DB { SquashingChunksTransform::SquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, bool reserve_memory) - : IAccumulatingTransform(header, header) + : ExceptionKeepingTransform(header, header, false) , squashing(min_block_size_rows, min_block_size_bytes, reserve_memory) { } -void SquashingChunksTransform::consume(Chunk chunk) +void SquashingChunksTransform::transform(Chunk & chunk) { if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) { - setReadyChunk(Chunk(block.getColumns(), block.rows())); + chunk.setColumns(block.getColumns(), block.rows()); } } -Chunk SquashingChunksTransform::generate() +void SquashingChunksTransform::onFinish() { auto block = squashing.add({}); - return Chunk(block.getColumns(), block.rows()); + finish_chunk.setColumns(block.getColumns(), block.rows()); +} + +void SquashingChunksTransform::work() +{ + if (has_exception) + { + data.chunk.clear(); + ready_input = false; + return; + } + + ExceptionKeepingTransform::work(); + if (finish_chunk) + { + data.chunk = std::move(finish_chunk); + ready_output = true; + } } } diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index bcacf5abcda..75a799e5af1 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -1,11 +1,11 @@ #pragma once -#include +#include #include namespace DB { -class SquashingChunksTransform : public IAccumulatingTransform +class SquashingChunksTransform : public ExceptionKeepingTransform { public: explicit SquashingChunksTransform( @@ -13,12 +13,16 @@ public: String getName() const override { return "SquashingTransform"; } + void work() override; + protected: - void consume(Chunk chunk) override; - Chunk generate() override; + void transform(Chunk & chunk) override; + void onFinish() override; + private: SquashingTransform squashing; + Chunk finish_chunk; }; } diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 50a627311e3..0fb9d82aca6 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -24,8 +23,12 @@ #include #include #include +#include +#include #include -#include +#include +#include +#include #include #include #include @@ -577,7 +580,6 @@ namespace void createExternalTables(); void generateOutput(); - void generateOutputWithProcessors(); void finishQuery(); void onException(const Exception & exception); @@ -850,7 +852,7 @@ namespace void Call::processInput() { - if (!io.out) + if (!io.pipeline.pushing()) return; bool has_data_to_insert = (insert_query && insert_query->data) @@ -865,18 +867,19 @@ namespace /// This is significant, because parallel parsing may be used. /// So we mustn't touch the input stream from other thread. - initializeBlockInputStream(io.out->getHeader()); + initializeBlockInputStream(io.pipeline.getHeader()); - io.out->writePrefix(); + PushingPipelineExecutor executor(io.pipeline); + executor.start(); Block block; while (pipeline_executor->pull(block)) { if (block) - io.out->write(block); + executor.push(block); } - io.out->writeSuffix(); + executor.finish(); } void Call::initializeBlockInputStream(const Block & header) @@ -939,10 +942,10 @@ namespace }); assert(!pipeline); - pipeline = std::make_unique(); auto source = FormatFactory::instance().getInput( input_format, *read_buffer, header, query_context, query_context->getSettings().max_insert_block_size); - pipeline->init(Pipe(source)); + QueryPipelineBuilder builder; + builder.init(Pipe(source)); /// Add default values if necessary. if (ast) @@ -956,7 +959,7 @@ namespace const auto & columns = storage->getInMemoryMetadataPtr()->getColumns(); if (!columns.empty()) { - pipeline->addSimpleTransform([&](const Block & cur_header) + builder.addSimpleTransform([&](const Block & cur_header) { return std::make_shared(cur_header, columns, *source, query_context); }); @@ -965,6 +968,7 @@ namespace } } + pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); pipeline_executor = std::make_unique(*pipeline); } @@ -1008,7 +1012,7 @@ namespace { /// The data will be written directly to the table. auto metadata_snapshot = storage->getInMemoryMetadataPtr(); - auto out_stream = std::make_shared(storage->write(ASTPtr(), metadata_snapshot, query_context)); + auto sink = storage->write(ASTPtr(), metadata_snapshot, query_context); ReadBufferFromMemory data(external_table.data().data(), external_table.data().size()); String format = external_table.format(); if (format.empty()) @@ -1025,14 +1029,20 @@ namespace external_table_context->checkSettingsConstraints(settings_changes); external_table_context->applySettingsChanges(settings_changes); } - auto in_stream = external_table_context->getInputFormat( - format, data, metadata_snapshot->getSampleBlock(), external_table_context->getSettings().max_insert_block_size); - in_stream->readPrefix(); - out_stream->writePrefix(); - while (auto block = in_stream->read()) - out_stream->write(block); - in_stream->readSuffix(); - out_stream->writeSuffix(); + auto in = FormatFactory::instance().getInput( + format, data, metadata_snapshot->getSampleBlock(), + external_table_context, external_table_context->getSettings().max_insert_block_size); + + QueryPipelineBuilder cur_pipeline; + cur_pipeline.init(Pipe(std::move(in))); + cur_pipeline.addTransform(std::move(sink)); + cur_pipeline.setSinks([&](const Block & header, Pipe::StreamType) + { + return std::make_shared(header); + }); + + auto executor = cur_pipeline.execute(); + executor->execute(1); } } @@ -1067,145 +1077,95 @@ namespace void Call::generateOutput() { - if (io.pipeline.initialized()) - { - generateOutputWithProcessors(); - return; - } - - if (!io.in) + if (!io.pipeline.initialized() || io.pipeline.pushing()) return; - AsynchronousBlockInputStream async_in(io.in); + Block header; + if (io.pipeline.pulling()) + header = io.pipeline.getHeader(); + write_buffer.emplace(*result.mutable_output()); - block_output_stream = query_context->getOutputStream(output_format, *write_buffer, async_in.getHeader()); + block_output_stream = query_context->getOutputStream(output_format, *write_buffer, header); + block_output_stream->writePrefix(); Stopwatch after_send_progress; /// Unless the input() function is used we are not going to receive input data anymore. if (!input_function_is_used) check_query_info_contains_cancel_only = true; - auto check_for_cancel = [&] + if (io.pipeline.pulling()) { - if (isQueryCancelled()) + auto executor = std::make_shared(io.pipeline); + auto check_for_cancel = [&] { - async_in.cancel(false); - return false; - } - return true; - }; + if (isQueryCancelled()) + { + executor->cancel(); + return false; + } + return true; + }; - async_in.readPrefix(); - block_output_stream->writePrefix(); - - while (check_for_cancel()) - { Block block; - if (async_in.poll(interactive_delay / 1000)) + while (check_for_cancel()) { - block = async_in.read(); - if (!block) + if (!executor->pull(block, interactive_delay / 1000)) + break; + + throwIfFailedToSendResult(); + if (!check_for_cancel()) + break; + + if (block && !io.null_format) + block_output_stream->write(block); + + if (after_send_progress.elapsedMicroseconds() >= interactive_delay) + { + addProgressToResult(); + after_send_progress.restart(); + } + + addLogsToResult(); + + bool has_output = write_buffer->offset(); + if (has_output || result.has_progress() || result.logs_size()) + sendResult(); + + throwIfFailedToSendResult(); + if (!check_for_cancel()) break; } - throwIfFailedToSendResult(); - if (!check_for_cancel()) - break; - - if (block && !io.null_format) - block_output_stream->write(block); - - if (after_send_progress.elapsedMicroseconds() >= interactive_delay) + if (!isQueryCancelled()) { - addProgressToResult(); - after_send_progress.restart(); + addTotalsToResult(executor->getTotalsBlock()); + addExtremesToResult(executor->getExtremesBlock()); + addProfileInfoToResult(executor->getProfileInfo()); } - - addLogsToResult(); - - bool has_output = write_buffer->offset(); - if (has_output || result.has_progress() || result.logs_size()) - sendResult(); - - throwIfFailedToSendResult(); - if (!check_for_cancel()) - break; } - - async_in.readSuffix(); - block_output_stream->writeSuffix(); - - if (!isQueryCancelled()) + else { - addTotalsToResult(io.in->getTotals()); - addExtremesToResult(io.in->getExtremes()); - addProfileInfoToResult(io.in->getProfileInfo()); - } - } - - void Call::generateOutputWithProcessors() - { - if (!io.pipeline.initialized()) - return; - - auto executor = std::make_shared(io.pipeline); - write_buffer.emplace(*result.mutable_output()); - block_output_stream = query_context->getOutputStream(output_format, *write_buffer, executor->getHeader()); - block_output_stream->writePrefix(); - Stopwatch after_send_progress; - - /// Unless the input() function is used we are not going to receive input data anymore. - if (!input_function_is_used) - check_query_info_contains_cancel_only = true; - - auto check_for_cancel = [&] - { - if (isQueryCancelled()) + auto executor = std::make_shared(io.pipeline); + auto callback = [&]() -> bool { - executor->cancel(); - return false; - } - return true; - }; - Block block; - while (check_for_cancel()) - { - if (!executor->pull(block, interactive_delay / 1000)) - break; - - throwIfFailedToSendResult(); - if (!check_for_cancel()) - break; - - if (block && !io.null_format) - block_output_stream->write(block); - - if (after_send_progress.elapsedMicroseconds() >= interactive_delay) - { + throwIfFailedToSendResult(); addProgressToResult(); - after_send_progress.restart(); - } + addLogsToResult(); - addLogsToResult(); + bool has_output = write_buffer->offset(); + if (has_output || result.has_progress() || result.logs_size()) + sendResult(); - bool has_output = write_buffer->offset(); - if (has_output || result.has_progress() || result.logs_size()) - sendResult(); + throwIfFailedToSendResult(); - throwIfFailedToSendResult(); - if (!check_for_cancel()) - break; + return isQueryCancelled(); + }; + executor->setCancelCallback(std::move(callback), interactive_delay / 1000); + executor->execute(); } block_output_stream->writeSuffix(); - - if (!isQueryCancelled()) - { - addTotalsToResult(executor->getTotalsBlock()); - addExtremesToResult(executor->getExtremesBlock()); - addProfileInfoToResult(executor->getProfileInfo()); - } } void Call::finishQuery() diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp index 0716d828520..226f1fea324 100644 --- a/src/Server/PostgreSQLHandler.cpp +++ b/src/Server/PostgreSQLHandler.cpp @@ -7,6 +7,7 @@ #include "PostgreSQLHandler.h" #include #include +#include #include #if !defined(ARCADIA_BUILD) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index eb756a47156..5415bf96443 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -37,6 +36,10 @@ #include #include +#include +#include +#include +#include #include "Core/Protocol.h" #include "TCPHandler.h" @@ -115,6 +118,20 @@ void TCPHandler::runImpl() try { receiveHello(); + sendHello(); + + if (!is_interserver_mode) /// In interserver mode queries are executed without a session context. + { + session->makeSessionContext(); + + /// If session created, then settings in session context has been updated. + /// So it's better to update the connection settings for flexibility. + extractConnectionSettingsFromContext(session->sessionContext()); + + /// When connecting, the default database could be specified. + if (!default_database.empty()) + session->sessionContext()->setCurrentDatabase(default_database); + } } catch (const Exception & e) /// Typical for an incorrect username, password, or address. { @@ -140,21 +157,6 @@ void TCPHandler::runImpl() throw; } - sendHello(); - - if (!is_interserver_mode) /// In interserver mode queries are executed without a session context. - { - session->makeSessionContext(); - - /// If session created, then settings in session context has been updated. - /// So it's better to update the connection settings for flexibility. - extractConnectionSettingsFromContext(session->sessionContext()); - - /// When connecting, the default database could be specified. - if (!default_database.empty()) - session->sessionContext()->setCurrentDatabase(default_database); - } - while (true) { /// We are waiting for a packet from the client. Thus, every `poll_interval` seconds check whether we need to shut down. @@ -292,24 +294,37 @@ void TCPHandler::runImpl() after_check_cancelled.restart(); after_send_progress.restart(); + if (state.io.pipeline.pushing()) /// FIXME: check explicitly that insert query suggests to receive data via native protocol, - /// and don't check implicitly via existence of |state.io.in|. - if (state.io.out && !state.io.in) { state.need_receive_data_for_insert = true; processInsertQuery(); } - else if (state.need_receive_data_for_input) // It implies pipeline execution + else if (state.io.pipeline.pulling()) { - /// It is special case for input(), all works for reading data from client will be done in callbacks. - auto executor = state.io.pipeline.execute(); - executor->execute(state.io.pipeline.getNumThreads()); - } - else if (state.io.pipeline.initialized()) processOrdinaryQueryWithProcessors(); - else if (state.io.in) - /// TODO: check that this branch works well for insert query with embedded data. - processOrdinaryQuery(); + } + else if (state.io.pipeline.completed()) + { + CompletedPipelineExecutor executor(state.io.pipeline); + /// Should not check for cancel in case of input. + if (!state.need_receive_data_for_input) + { + auto callback = [this]() + { + if (isQueryCancelled()) + return true; + + sendProgress(); + sendLogs(); + + return false; + }; + + executor.setCancelCallback(callback, interactive_delay / 1000); + } + executor.execute(); + } state.io.onFinish(); @@ -544,110 +559,61 @@ void TCPHandler::skipData() void TCPHandler::processInsertQuery() { - /** Made above the rest of the lines, so that in case of `writePrefix` function throws an exception, - * client receive exception before sending data. - */ - state.io.out->writePrefix(); + size_t num_threads = state.io.pipeline.getNumThreads(); - /// Send ColumnsDescription for insertion table - if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA) + auto send_table_columns = [&]() { - const auto & table_id = query_context->getInsertionTable(); - if (query_context->getSettingsRef().input_format_defaults_for_omitted_fields) + /// Send ColumnsDescription for insertion table + if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA) { - if (!table_id.empty()) + const auto & table_id = query_context->getInsertionTable(); + if (query_context->getSettingsRef().input_format_defaults_for_omitted_fields) { - auto storage_ptr = DatabaseCatalog::instance().getTable(table_id, query_context); - sendTableColumns(storage_ptr->getInMemoryMetadataPtr()->getColumns()); + if (!table_id.empty()) + { + auto storage_ptr = DatabaseCatalog::instance().getTable(table_id, query_context); + sendTableColumns(storage_ptr->getInMemoryMetadataPtr()->getColumns()); + } } } - } + }; - /// Send block to the client - table structure. - sendData(state.io.out->getHeader()); - - try + if (num_threads > 1) { - readData(); + PushingAsyncPipelineExecutor executor(state.io.pipeline); + /** Made above the rest of the lines, so that in case of `writePrefix` function throws an exception, + * client receive exception before sending data. + */ + executor.start(); + + send_table_columns(); + + /// Send block to the client - table structure. + sendData(executor.getHeader()); + + sendLogs(); + + while (readDataNext()) + executor.push(std::move(state.block_for_insert)); + + executor.finish(); } - catch (...) + else { - /// To avoid flushing from the destructor, that may lead to uncaught exception. - state.io.out->writeSuffix(); - throw; + PushingPipelineExecutor executor(state.io.pipeline); + executor.start(); + + send_table_columns(); + + sendData(executor.getHeader()); + + sendLogs(); + + while (readDataNext()) + executor.push(std::move(state.block_for_insert)); + + executor.finish(); } - state.io.out->writeSuffix(); -} - - -void TCPHandler::processOrdinaryQuery() -{ - OpenTelemetrySpanHolder span(__PRETTY_FUNCTION__); - - /// Pull query execution result, if exists, and send it to network. - if (state.io.in) - { - - if (query_context->getSettingsRef().allow_experimental_query_deduplication) - sendPartUUIDs(); - - /// This allows the client to prepare output format - if (Block header = state.io.in->getHeader()) - sendData(header); - - /// Use of async mode here enables reporting progress and monitoring client cancelling the query - AsynchronousBlockInputStream async_in(state.io.in); - - async_in.readPrefix(); - while (true) - { - if (isQueryCancelled()) - { - async_in.cancel(false); - break; - } - - if (after_send_progress.elapsed() / 1000 >= interactive_delay) - { - /// Some time passed. - after_send_progress.restart(); - sendProgress(); - } - - sendLogs(); - - if (async_in.poll(interactive_delay / 1000)) - { - const auto block = async_in.read(); - if (!block) - break; - - if (!state.io.null_format) - sendData(block); - } - } - async_in.readSuffix(); - - /** When the data has run out, we send the profiling data and totals up to the terminating empty block, - * so that this information can be used in the suffix output of stream. - * If the request has been interrupted, then sendTotals and other methods should not be called, - * because we have not read all the data. - */ - if (!isQueryCancelled()) - { - sendTotals(state.io.in->getTotals()); - sendExtremes(state.io.in->getExtremes()); - sendProfileInfo(state.io.in->getProfileInfo()); - sendProgress(); - } - - if (state.is_connection_closed) - return; - - sendData({}); - } - - sendProgress(); } @@ -1347,10 +1313,11 @@ bool TCPHandler::receiveData(bool scalar) } auto metadata_snapshot = storage->getInMemoryMetadataPtr(); /// The data will be written directly to the table. - auto temporary_table_out = std::make_shared(storage->write(ASTPtr(), metadata_snapshot, query_context)); - temporary_table_out->write(block); - temporary_table_out->writeSuffix(); - + QueryPipeline temporary_table_out(storage->write(ASTPtr(), metadata_snapshot, query_context)); + PushingPipelineExecutor executor(temporary_table_out); + executor.start(); + executor.push(block); + executor.finish(); } else if (state.need_receive_data_for_input) { @@ -1360,7 +1327,7 @@ bool TCPHandler::receiveData(bool scalar) else { /// INSERT query. - state.io.out->write(block); + state.block_for_insert = block; } return true; } @@ -1402,8 +1369,8 @@ void TCPHandler::initBlockInput() state.maybe_compressed_in = in; Block header; - if (state.io.out) - header = state.io.out->getHeader(); + if (state.io.pipeline.pushing()) + header = state.io.pipeline.getHeader(); else if (state.need_receive_data_for_input) header = state.input_header; diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 7f75d0ac04b..f8860c16b2d 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -53,6 +53,7 @@ struct QueryState /// Where to write result data. std::shared_ptr maybe_compressed_out; BlockOutputStreamPtr block_out; + Block block_for_insert; /// Query text. String query; diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index b0254da7a44..1d057d1bb10 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -180,6 +180,15 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.if_exists = command_ast->if_exists; return command; } + else if (command_ast->type == ASTAlterCommand::MODIFY_COMMENT) + { + AlterCommand command; + command.ast = command_ast->clone(); + command.type = COMMENT_TABLE; + const auto & ast_comment = command_ast->comment->as(); + command.comment = ast_comment.value.get(); + return command; + } else if (command_ast->type == ASTAlterCommand::MODIFY_ORDER_BY) { AlterCommand command; @@ -459,6 +468,10 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) metadata.columns.modify(column_name, [&](ColumnDescription & column) { column.comment = *comment; }); } + else if (type == COMMENT_TABLE) + { + metadata.comment = *comment; + } else if (type == ADD_INDEX) { if (std::any_of( @@ -751,7 +764,7 @@ bool AlterCommand::isRequireMutationStage(const StorageInMemoryMetadata & metada bool AlterCommand::isCommentAlter() const { - if (type == COMMENT_COLUMN) + if (type == COMMENT_COLUMN || type == COMMENT_TABLE) { return true; } diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index 41b48b4b034..046238bd5f5 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -45,6 +45,7 @@ struct AlterCommand RENAME_COLUMN, REMOVE_TTL, MODIFY_DATABASE_SETTING, + COMMENT_TABLE }; /// Which property user wants to remove from column @@ -75,7 +76,7 @@ struct AlterCommand ColumnDefaultKind default_kind{}; ASTPtr default_expression{}; - /// For COMMENT column + /// For COMMENT column or table std::optional comment; /// For ADD or MODIFY - after which column to add a new one. If an empty string, add to the end. diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index c674a705de1..935db1c912f 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -266,7 +266,7 @@ namespace return nullptr; } - void writeAndConvert(RemoteBlockOutputStream & remote, ReadBufferFromFile & in) + void writeAndConvert(RemoteInserter & remote, ReadBufferFromFile & in) { CompressedReadBuffer decompressing_in(in); NativeBlockInputStream block_in(decompressing_in, DBMS_TCP_PROTOCOL_VERSION); @@ -287,7 +287,7 @@ namespace void writeRemoteConvert( const DistributedHeader & distributed_header, - RemoteBlockOutputStream & remote, + RemoteInserter & remote, bool compression_expected, ReadBufferFromFile & in, Poco::Logger * log) @@ -619,14 +619,13 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa formatReadableSizeWithBinarySuffix(distributed_header.bytes)); auto connection = pool->get(timeouts, &distributed_header.insert_settings); - RemoteBlockOutputStream remote{*connection, timeouts, + RemoteInserter remote{*connection, timeouts, distributed_header.insert_query, distributed_header.insert_settings, distributed_header.client_info}; - remote.writePrefix(); bool compression_expected = connection->getCompression() == Protocol::Compression::Enable; writeRemoteConvert(distributed_header, remote, compression_expected, in, log); - remote.writeSuffix(); + remote.onFinish(); } catch (Exception & e) { @@ -833,7 +832,7 @@ struct StorageDistributedDirectoryMonitor::Batch private: void sendBatch(Connection & connection, const ConnectionTimeouts & timeouts) { - std::unique_ptr remote; + std::unique_ptr remote; for (UInt64 file_idx : file_indices) { @@ -847,18 +846,17 @@ private: if (!remote) { - remote = std::make_unique(connection, timeouts, + remote = std::make_unique(connection, timeouts, distributed_header.insert_query, distributed_header.insert_settings, distributed_header.client_info); - remote->writePrefix(); } bool compression_expected = connection.getCompression() == Protocol::Compression::Enable; writeRemoteConvert(distributed_header, *remote, compression_expected, in, parent.log); } if (remote) - remote->writeSuffix(); + remote->onFinish(); } void sendSeparateFiles(Connection & connection, const ConnectionTimeouts & timeouts) @@ -880,14 +878,13 @@ private: ReadBufferFromFile in(file_path->second); const auto & distributed_header = readDistributedHeader(in, parent.log); - RemoteBlockOutputStream remote(connection, timeouts, + RemoteInserter remote(connection, timeouts, distributed_header.insert_query, distributed_header.insert_settings, distributed_header.client_info); - remote.writePrefix(); bool compression_expected = connection.getCompression() == Protocol::Compression::Enable; writeRemoteConvert(distributed_header, remote, compression_expected, in, parent.log); - remote.writeSuffix(); + remote.onFinish(); } catch (Exception & e) { @@ -972,7 +969,7 @@ private: Data data; }; -ProcessorPtr StorageDistributedDirectoryMonitor::createSourceFromFile(const String & file_name) +std::shared_ptr StorageDistributedDirectoryMonitor::createSourceFromFile(const String & file_name) { return std::make_shared(file_name); } diff --git a/src/Storages/Distributed/DirectoryMonitor.h b/src/Storages/Distributed/DirectoryMonitor.h index cd1d25179f3..307b57a5668 100644 --- a/src/Storages/Distributed/DirectoryMonitor.h +++ b/src/Storages/Distributed/DirectoryMonitor.h @@ -24,6 +24,8 @@ class BackgroundSchedulePool; class IProcessor; using ProcessorPtr = std::shared_ptr; +class ISource; + /** Details of StorageDistributed. * This type is not designed for standalone use. */ @@ -48,7 +50,7 @@ public: void shutdownAndDropAllData(); - static ProcessorPtr createSourceFromFile(const String & file_name); + static std::shared_ptr createSourceFromFile(const String & file_name); /// For scheduling via DistributedBlockOutputStream bool addAndSchedule(size_t file_size, size_t ms); diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index ec3f82d914c..6596598476d 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -78,11 +79,11 @@ static Block adoptBlock(const Block & header, const Block & block, Poco::Logger } -static void writeBlockConvert(const BlockOutputStreamPtr & out, const Block & block, size_t repeats, Poco::Logger * log) +static void writeBlockConvert(PushingPipelineExecutor & executor, const Block & block, size_t repeats, Poco::Logger * log) { - Block adopted_block = adoptBlock(out->getHeader(), block, log); + Block adopted_block = adoptBlock(executor.getHeader(), block, log); for (size_t i = 0; i < repeats; ++i) - out->write(adopted_block); + executor.push(adopted_block); } @@ -124,7 +125,7 @@ void DistributedSink::consume(Chunk chunk) is_first_chunk = false; } - auto ordinary_block = getPort().getHeader().cloneWithColumns(chunk.detachColumns()); + auto ordinary_block = getHeader().cloneWithColumns(chunk.detachColumns()); if (!allow_materialized) { @@ -322,7 +323,7 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si if (!job.is_local_job || !settings.prefer_localhost_replica) { - if (!job.stream) + if (!job.executor) { auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(settings); if (shard_info.hasInternalReplication()) @@ -354,19 +355,20 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si if (throttler) job.connection_entry->setThrottler(throttler); - job.stream = std::make_shared( - *job.connection_entry, timeouts, query_string, settings, context->getClientInfo()); - job.stream->writePrefix(); + job.pipeline = QueryPipeline(std::make_shared( + *job.connection_entry, timeouts, query_string, settings, context->getClientInfo())); + job.executor = std::make_unique(job.pipeline); + job.executor->start(); } CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend}; - Block adopted_shard_block = adoptBlock(job.stream->getHeader(), shard_block, log); - job.stream->write(adopted_shard_block); + Block adopted_shard_block = adoptBlock(job.executor->getHeader(), shard_block, log); + job.executor->push(adopted_shard_block); } else // local { - if (!job.stream) + if (!job.executor) { /// Forward user settings job.local_context = Context::createCopy(context); @@ -382,11 +384,12 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si InterpreterInsertQuery interp(copy_query_ast, job.local_context, allow_materialized); auto block_io = interp.execute(); - job.stream = block_io.out; - job.stream->writePrefix(); + job.pipeline = std::move(block_io.pipeline); + job.executor = std::make_unique(job.pipeline); + job.executor->start(); } - writeBlockConvert(job.stream, shard_block, shard_info.getLocalNodeCount(), log); + writeBlockConvert(*job.executor, shard_block, shard_info.getLocalNodeCount(), log); } job.blocks_written += 1; @@ -498,11 +501,11 @@ void DistributedSink::onFinish() { for (JobReplica & job : shard_jobs.replicas_jobs) { - if (job.stream) + if (job.executor) { pool->scheduleOrThrowOnError([&job]() { - job.stream->writeSuffix(); + job.executor->finish(); }); } } @@ -618,10 +621,11 @@ void DistributedSink::writeToLocal(const Block & block, size_t repeats) InterpreterInsertQuery interp(query_ast, context, allow_materialized); auto block_io = interp.execute(); + PushingPipelineExecutor executor(block_io.pipeline); - block_io.out->writePrefix(); - writeBlockConvert(block_io.out, block, repeats, log); - block_io.out->writeSuffix(); + executor.start(); + writeBlockConvert(executor, block, repeats, log); + executor.finish(); } diff --git a/src/Storages/Distributed/DistributedSink.h b/src/Storages/Distributed/DistributedSink.h index af04f8c8aac..1fdf5c0291f 100644 --- a/src/Storages/Distributed/DistributedSink.h +++ b/src/Storages/Distributed/DistributedSink.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -24,6 +25,7 @@ namespace DB class Context; class StorageDistributed; +class PushingPipelineExecutor; /** If insert_sync_ is true, the write is synchronous. Uses insert_timeout_ if it is not zero. * Otherwise, the write is asynchronous - the data is first written to the local filesystem, and then sent to the remote servers. @@ -119,7 +121,8 @@ private: ConnectionPool::Entry connection_entry; ContextPtr local_context; - BlockOutputStreamPtr stream; + QueryPipeline pipeline; + std::unique_ptr executor; UInt64 blocks_written = 0; UInt64 rows_written = 0; diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index d241bd07294..96d67ad0e08 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -85,10 +85,15 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemoryposition() = impl->buffer().begin() + offset(); auto result = impl->next(); if (result) - { - working_buffer = internal_buffer = impl->buffer(); - pos = working_buffer.begin(); - } - else - return false; - return true; + BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset); /// use the buffer returned by `impl` + + return result; } diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 9600eb975b4..ae66f258d5b 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -195,7 +195,7 @@ public: writer->writePrefix(); is_first_chunk = false; } - writer->write(getPort().getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void onFinish() override diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index fe5da1ce8db..8effb5ed9bf 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index baf57c40185..f64437e1889 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -54,8 +54,8 @@ using QueryPlanPtr = std::unique_ptr; class SinkToStorage; using SinkToStoragePtr = std::shared_ptr; -class QueryPipeline; -using QueryPipelinePtr = std::unique_ptr; +class QueryPipelineBuilder; +using QueryPipelineBuilderPtr = std::unique_ptr; class IStoragePolicy; using StoragePolicyPtr = std::shared_ptr; @@ -359,7 +359,7 @@ public: * * Returns query pipeline if distributed writing is possible, and nullptr otherwise. */ - virtual QueryPipelinePtr distributedWrite( + virtual QueryPipelineBuilderPtr distributedWrite( const ASTInsertQuery & /*query*/, ContextPtr /*context*/) { diff --git a/src/Storages/Kafka/KafkaBlockOutputStream.cpp b/src/Storages/Kafka/KafkaBlockOutputStream.cpp index c7fe71f42c1..395065de084 100644 --- a/src/Storages/Kafka/KafkaBlockOutputStream.cpp +++ b/src/Storages/Kafka/KafkaBlockOutputStream.cpp @@ -19,13 +19,13 @@ KafkaSink::KafkaSink( void KafkaSink::onStart() { - buffer = storage.createWriteBuffer(getPort().getHeader()); + buffer = storage.createWriteBuffer(getHeader()); auto format_settings = getFormatSettings(context); format_settings.protobuf.allow_multiple_rows_without_delimiter = true; child = FormatFactory::instance().getOutputStream(storage.getFormatName(), *buffer, - getPort().getHeader(), context, + getHeader(), context, [this](const Columns & columns, size_t row) { buffer->countRow(columns, row); @@ -35,7 +35,7 @@ void KafkaSink::onStart() void KafkaSink::consume(Chunk chunk) { - child->write(getPort().getHeader().cloneWithColumns(chunk.detachColumns())); + child->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void KafkaSink::onFinish() diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index cba67bc3bcb..8ab9127bf13 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -616,7 +617,7 @@ bool StorageKafka::streamToViews() streams.reserve(stream_count); for (size_t i = 0; i < stream_count; ++i) { - auto stream = std::make_shared(*this, metadata_snapshot, kafka_context, block_io.out->getHeader().getNames(), log, block_size, false); + auto stream = std::make_shared(*this, metadata_snapshot, kafka_context, block_io.pipeline.getHeader().getNames(), log, block_size, false); streams.emplace_back(stream); // Limit read batch to maximum block size to allow DDL @@ -639,12 +640,23 @@ bool StorageKafka::streamToViews() // We can't cancel during copyData, as it's not aware of commits and other kafka-related stuff. // It will be cancelled on underlying layer (kafka buffer) - std::atomic stub = {false}; + size_t rows = 0; - copyData(*in, *block_io.out, [&rows](const Block & block) { - rows += block.rows(); - }, &stub); + PushingPipelineExecutor executor(block_io.pipeline); + + in->readPrefix(); + executor.start(); + + while (auto block = in->read()) + { + rows += block.rows(); + executor.push(std::move(block)); + } + + in->readSuffix(); + executor.finish(); + } bool some_stream_is_stalled = false; for (auto & stream : streams) diff --git a/src/Storages/LiveView/LiveViewEventsSource.h b/src/Storages/LiveView/LiveViewEventsSource.h index daf9edfef95..77ee06c702c 100644 --- a/src/Storages/LiveView/LiveViewEventsSource.h +++ b/src/Storages/LiveView/LiveViewEventsSource.h @@ -56,7 +56,7 @@ public: void onCancel() override { - if (isCancelled() || storage->shutdown_called) + if (storage->shutdown_called) return; std::lock_guard lock(storage->mutex); diff --git a/src/Storages/LiveView/LiveViewSink.h b/src/Storages/LiveView/LiveViewSink.h index 433a5554152..bbb8bf02c45 100644 --- a/src/Storages/LiveView/LiveViewSink.h +++ b/src/Storages/LiveView/LiveViewSink.h @@ -76,7 +76,7 @@ public: void consume(Chunk chunk) override { - auto block = getPort().getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); block.updateHash(*new_hash); new_blocks->push_back(std::move(block)); } diff --git a/src/Storages/LiveView/LiveViewSource.h b/src/Storages/LiveView/LiveViewSource.h index af07d8558ad..ec726359581 100644 --- a/src/Storages/LiveView/LiveViewSource.h +++ b/src/Storages/LiveView/LiveViewSource.h @@ -34,11 +34,11 @@ public: active = active_ptr.lock(); } - String getName() const override { return "LiveViewBlockInputStream"; } + String getName() const override { return "LiveViewSource"; } void onCancel() override { - if (isCancelled() || storage->shutdown_called) + if (storage->shutdown_called) return; std::lock_guard lock(storage->mutex); @@ -145,7 +145,6 @@ protected: /// Or spurious wakeup. bool signaled = std::cv_status::no_timeout == storage->condition.wait_for(lock, std::chrono::microseconds(std::max(UInt64(0), heartbeat_interval_usec - (timestamp_usec - last_event_timestamp_usec)))); - if (isCancelled() || storage->shutdown_called) { return { Block(), true }; diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 69390850ccc..335765405dd 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include +#include #include #include #include @@ -111,15 +112,16 @@ MergeableBlocksPtr StorageLiveView::collectMergeableBlocks(ContextPtr local_cont InterpreterSelectQuery interpreter(mergeable_query->clone(), local_context, SelectQueryOptions(QueryProcessingStage::WithMergeableState), Names()); - auto io = interpreter.execute(); - io.pipeline.addSimpleTransform([&](const Block & cur_header) + auto builder = interpreter.buildQueryPipeline(); + builder.addSimpleTransform([&](const Block & cur_header) { return std::make_shared(cur_header); }); - new_mergeable_blocks->sample_block = io.pipeline.getHeader(); + new_mergeable_blocks->sample_block = builder.getHeader(); - PullingPipelineExecutor executor(io.pipeline); + auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); + PullingPipelineExecutor executor(pipeline); Block this_block; while (executor.pull(this_block)) @@ -142,7 +144,7 @@ Pipes StorageLiveView::blocksToPipes(BlocksPtrs blocks, Block & sample_block) } /// Complete query using input streams from mergeable blocks -QueryPipeline StorageLiveView::completeQuery(Pipes pipes) +QueryPipelineBuilder StorageLiveView::completeQuery(Pipes pipes) { //FIXME it's dangerous to create Context on stack auto block_context = Context::createCopy(getContext()); @@ -157,8 +159,8 @@ QueryPipeline StorageLiveView::completeQuery(Pipes pipes) }; block_context->addExternalTable(getBlocksTableName(), TemporaryTableHolder(getContext(), creator)); InterpreterSelectQuery select(getInnerBlocksQuery(), block_context, StoragePtr(), nullptr, SelectQueryOptions(QueryProcessingStage::Complete)); - auto io = select.execute(); - io.pipeline.addSimpleTransform([&](const Block & cur_header) + auto builder = select.buildQueryPipeline(); + builder.addSimpleTransform([&](const Block & cur_header) { return std::make_shared(cur_header); }); @@ -166,7 +168,7 @@ QueryPipeline StorageLiveView::completeQuery(Pipes pipes) /// Squashing is needed here because the view query can generate a lot of blocks /// even when only one block is inserted into the parent table (e.g. if the query is a GROUP BY /// and two-level aggregation is triggered). - io.pipeline.addSimpleTransform([&](const Block & cur_header) + builder.addSimpleTransform([&](const Block & cur_header) { return std::make_shared( cur_header, @@ -174,7 +176,7 @@ QueryPipeline StorageLiveView::completeQuery(Pipes pipes) getContext()->getSettingsRef().min_insert_block_size_bytes); }); - return std::move(io.pipeline); + return builder; } void StorageLiveView::writeIntoLiveView( @@ -236,13 +238,14 @@ void StorageLiveView::writeIntoLiveView( InterpreterSelectQuery select_block(mergeable_query, local_context, blocks_storage.getTable(), blocks_storage.getTable()->getInMemoryMetadataPtr(), QueryProcessingStage::WithMergeableState); - auto io = select_block.execute(); - io.pipeline.addSimpleTransform([&](const Block & cur_header) + auto builder = select_block.buildQueryPipeline(); + builder.addSimpleTransform([&](const Block & cur_header) { return std::make_shared(cur_header); }); - PullingPipelineExecutor executor(io.pipeline); + auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); + PullingPipelineExecutor executor(pipeline); Block this_block; while (executor.pull(this_block)) @@ -261,10 +264,10 @@ void StorageLiveView::writeIntoLiveView( } auto pipeline = live_view.completeQuery(std::move(from)); - pipeline.resize(1); - pipeline.setSinks([&](const Block &, Pipe::StreamType) + pipeline.addChain(Chain(std::move(output))); + pipeline.setSinks([&](const Block & cur_header, Pipe::StreamType) { - return std::move(output); + return std::make_shared(cur_header); }); auto executor = pipeline.execute(); @@ -380,7 +383,8 @@ bool StorageLiveView::getNewBlocks() /// inserted data to be duplicated auto new_mergeable_blocks = collectMergeableBlocks(live_view_context); Pipes from = blocksToPipes(new_mergeable_blocks->blocks, new_mergeable_blocks->sample_block); - auto pipeline = completeQuery(std::move(from)); + auto builder = completeQuery(std::move(from)); + auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); PullingPipelineExecutor executor(pipeline); Block block; diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 15afc642989..b08b034ec3f 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -167,7 +167,7 @@ public: /// Collect mergeable blocks and their sample. Must be called holding mutex MergeableBlocksPtr collectMergeableBlocks(ContextPtr context); /// Complete query using input streams from mergeable blocks - QueryPipeline completeQuery(Pipes pipes); + QueryPipelineBuilder completeQuery(Pipes pipes); void setMergeableBlocks(MergeableBlocksPtr blocks) { mergeable_blocks = blocks; } std::shared_ptr getActivePtr() { return active_ptr; } diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index d9d691ab2e7..3a495fccc0e 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -194,6 +194,7 @@ std::pair MergeFromLogEntryT future_merged_part, metadata_snapshot, merge_mutate_entry.get(), + {} /* projection_merge_list_element */, table_lock_holder, entry.create_time, storage.getContext(), diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h index bab4420de83..24df4ba5e42 100644 --- a/src/Storages/MergeTree/MergeList.h +++ b/src/Storages/MergeTree/MergeList.h @@ -114,6 +114,8 @@ struct MergeListElement : boost::noncopyable MergeInfo getInfo() const; + MergeListElement * ptr() { return this; } + ~MergeListElement(); }; diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index e8770fdc76e..d52ffe32f7f 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -95,6 +95,7 @@ void MergePlainMergeTreeTask::prepare() future_part, metadata_snapshot, merge_list_entry.get(), + {} /* projection_merge_list_element */, table_lock_holder, time(nullptr), storage.getContext(), diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.h b/src/Storages/MergeTree/MergePlainMergeTreeTask.h index f199557684c..9a95a404135 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.h +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.h @@ -23,12 +23,12 @@ public: TableLockHolder table_lock_holder_, Callback && task_result_callback_) : storage(storage_) - , metadata_snapshot(metadata_snapshot_) + , metadata_snapshot(std::move(metadata_snapshot_)) , deduplicate(deduplicate_) - , deduplicate_by_columns(deduplicate_by_columns_) - , merge_mutate_entry(merge_mutate_entry_) - , table_lock_holder(table_lock_holder_) - , task_result_callback(task_result_callback_) {} + , deduplicate_by_columns(std::move(deduplicate_by_columns_)) + , merge_mutate_entry(std::move(merge_mutate_entry_)) + , table_lock_holder(std::move(table_lock_holder_)) + , task_result_callback(std::forward(task_result_callback_)) {} bool executeStep() override; diff --git a/src/Storages/MergeTree/MergeProgress.h b/src/Storages/MergeTree/MergeProgress.h index 2862a934411..d21edac76df 100644 --- a/src/Storages/MergeTree/MergeProgress.h +++ b/src/Storages/MergeTree/MergeProgress.h @@ -47,21 +47,21 @@ class MergeProgressCallback { public: MergeProgressCallback( - MergeList::Entry & merge_entry_, UInt64 & watch_prev_elapsed_, MergeStageProgress & stage_) - : merge_entry(merge_entry_) + MergeListElement * merge_list_element_ptr_, UInt64 & watch_prev_elapsed_, MergeStageProgress & stage_) + : merge_list_element_ptr(merge_list_element_ptr_) , watch_prev_elapsed(watch_prev_elapsed_) , stage(stage_) { updateWatch(); } - MergeList::Entry & merge_entry; + MergeListElement * merge_list_element_ptr; UInt64 & watch_prev_elapsed; MergeStageProgress & stage; void updateWatch() { - UInt64 watch_curr_elapsed = merge_entry->watch.elapsed(); + UInt64 watch_curr_elapsed = merge_list_element_ptr->watch.elapsed(); ProfileEvents::increment(ProfileEvents::MergesTimeMilliseconds, (watch_curr_elapsed - watch_prev_elapsed) / 1000000); watch_prev_elapsed = watch_curr_elapsed; } @@ -76,15 +76,15 @@ public: } updateWatch(); - merge_entry->bytes_read_uncompressed += value.read_bytes; + merge_list_element_ptr->bytes_read_uncompressed += value.read_bytes; if (stage.is_first) - merge_entry->rows_read += value.read_rows; + merge_list_element_ptr->rows_read += value.read_rows; stage.total_rows += value.total_rows_to_read; stage.rows_read += value.read_rows; if (stage.total_rows > 0) { - merge_entry->progress.store( + merge_list_element_ptr->progress.store( stage.initial_progress + stage.weight * stage.rows_read / stage.total_rows, std::memory_order_relaxed); } diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 855198f697e..e6b37d0657c 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -141,7 +141,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() global_ctx->future_part->part_info, local_single_disk_volume, local_tmp_prefix + global_ctx->future_part->name + (global_ctx->parent_part ? ".proj" : ""), - global_ctx->parent_part.get()); + global_ctx->parent_part); global_ctx->new_data_part->uuid = global_ctx->future_part->uuid; global_ctx->new_data_part->setColumns(global_ctx->storage_columns); @@ -171,10 +171,10 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() ctx->need_remove_expired_values = false; } - ctx->sum_input_rows_upper_bound = (*global_ctx->merge_entry)->total_rows_count; - ctx->sum_compressed_bytes_upper_bound = (*global_ctx->merge_entry)->total_size_bytes_compressed; + ctx->sum_input_rows_upper_bound = global_ctx->merge_list_element_ptr->total_rows_count; + ctx->sum_compressed_bytes_upper_bound = global_ctx->merge_list_element_ptr->total_size_bytes_compressed; global_ctx->chosen_merge_algorithm = chooseMergeAlgorithm(); - (*global_ctx->merge_entry)->merge_algorithm.store(global_ctx->chosen_merge_algorithm, std::memory_order_relaxed); + global_ctx->merge_list_element_ptr->merge_algorithm.store(global_ctx->chosen_merge_algorithm, std::memory_order_relaxed); LOG_DEBUG(ctx->log, "Selected MergeAlgorithm: {}", toString(global_ctx->chosen_merge_algorithm)); @@ -184,7 +184,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() /// the order is reverse. This annoys TSan even though one lock is locked in shared mode and thus /// deadlock is impossible. ctx->compression_codec = global_ctx->data->getCompressionCodecForPart( - (*global_ctx->merge_entry)->total_size_bytes_compressed, global_ctx->new_data_part->ttl_infos, global_ctx->time_of_merge); + global_ctx->merge_list_element_ptr->total_size_bytes_compressed, global_ctx->new_data_part->ttl_infos, global_ctx->time_of_merge); ctx->tmp_disk = global_ctx->context->getTemporaryVolume()->getDisk(); @@ -307,8 +307,8 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::executeImpl() const_cast(*global_ctx->to).write(block); - (*global_ctx->merge_entry)->rows_written = global_ctx->merged_stream->getProfileInfo().rows; - (*global_ctx->merge_entry)->bytes_written_uncompressed = global_ctx->merged_stream->getProfileInfo().bytes; + global_ctx->merge_list_element_ptr->rows_written = global_ctx->merged_stream->getProfileInfo().rows; + global_ctx->merge_list_element_ptr->bytes_written_uncompressed = global_ctx->merged_stream->getProfileInfo().bytes; /// Reservation updates is not performed yet, during the merge it may lead to higher free space requirements if (global_ctx->space_reservation && ctx->sum_input_rows_upper_bound) @@ -317,7 +317,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::executeImpl() /// But now we are using inaccurate row-based estimation in Horizontal case for backward compatibility Float64 progress = (global_ctx->chosen_merge_algorithm == MergeAlgorithm::Horizontal) ? std::min(1., 1. * global_ctx->rows_written / ctx->sum_input_rows_upper_bound) - : std::min(1., (*global_ctx->merge_entry)->progress.load(std::memory_order_relaxed)); + : std::min(1., global_ctx->merge_list_element_ptr->progress.load(std::memory_order_relaxed)); global_ctx->space_reservation->update(static_cast((1. - progress) * ctx->initial_reservation)); } @@ -336,7 +336,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::executeImpl() throw Exception("Cancelled merging parts with expired TTL", ErrorCodes::ABORTED); const auto data_settings = global_ctx->data->getSettings(); - const size_t sum_compressed_bytes_upper_bound = (*global_ctx->merge_entry)->total_size_bytes_compressed; + const size_t sum_compressed_bytes_upper_bound = global_ctx->merge_list_element_ptr->total_size_bytes_compressed; ctx->need_sync = needSyncPart(ctx->sum_input_rows_upper_bound, sum_compressed_bytes_upper_bound, *data_settings); return false; @@ -349,9 +349,9 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const if (global_ctx->chosen_merge_algorithm != MergeAlgorithm::Vertical) return false; - size_t sum_input_rows_exact = (*global_ctx->merge_entry)->rows_read; - (*global_ctx->merge_entry)->columns_written = global_ctx->merging_column_names.size(); - (*global_ctx->merge_entry)->progress.store(ctx->column_sizes->keyColumnsWeight(), std::memory_order_relaxed); + size_t sum_input_rows_exact = global_ctx->merge_list_element_ptr->rows_read; + global_ctx->merge_list_element_ptr->columns_written = global_ctx->merging_column_names.size(); + global_ctx->merge_list_element_ptr->progress.store(ctx->column_sizes->keyColumnsWeight(), std::memory_order_relaxed); ctx->column_part_streams = BlockInputStreams(global_ctx->future_part->parts.size()); @@ -385,7 +385,7 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const const String & column_name = ctx->it_name_and_type->name; Names column_names{column_name}; - ctx->progress_before = (*global_ctx->merge_entry)->progress.load(std::memory_order_relaxed); + ctx->progress_before = global_ctx->merge_list_element_ptr->progress.load(std::memory_order_relaxed); global_ctx->column_progress = std::make_unique(ctx->progress_before, ctx->column_sizes->columnWeight(column_name)); @@ -396,11 +396,10 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const /// Dereference unique_ptr column_part_source->setProgressCallback( - MergeProgressCallback(*global_ctx->merge_entry, global_ctx->watch_prev_elapsed, *global_ctx->column_progress)); + MergeProgressCallback(global_ctx->merge_list_element_ptr, global_ctx->watch_prev_elapsed, *global_ctx->column_progress)); - QueryPipeline column_part_pipeline; - column_part_pipeline.init(Pipe(std::move(column_part_source))); - column_part_pipeline.setMaxThreads(1); + QueryPipeline column_part_pipeline(Pipe(std::move(column_part_source))); + column_part_pipeline.setNumThreads(1); ctx->column_part_streams[part_num] = std::make_shared(std::move(column_part_pipeline)); @@ -460,9 +459,9 @@ void MergeTask::VerticalMergeStage::finalizeVerticalMergeForOneColumn() const /// NOTE: 'progress' is modified by single thread, but it may be concurrently read from MergeListElement::getInfo() (StorageSystemMerges). - (*global_ctx->merge_entry)->columns_written += 1; - (*global_ctx->merge_entry)->bytes_written_uncompressed += ctx->column_gathered_stream->getProfileInfo().bytes; - (*global_ctx->merge_entry)->progress.store(ctx->progress_before + ctx->column_sizes->columnWeight(column_name), std::memory_order_relaxed); + global_ctx->merge_list_element_ptr->columns_written += 1; + global_ctx->merge_list_element_ptr->bytes_written_uncompressed += ctx->column_gathered_stream->getProfileInfo().bytes; + global_ctx->merge_list_element_ptr->progress.store(ctx->progress_before + ctx->column_sizes->columnWeight(column_name), std::memory_order_relaxed); /// This is the external cycle increment. ++ctx->column_num_for_vertical_merge; @@ -487,16 +486,16 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c /// Print overall profiling info. NOTE: it may duplicates previous messages { - double elapsed_seconds = (*global_ctx->merge_entry)->watch.elapsedSeconds(); + double elapsed_seconds = global_ctx->merge_list_element_ptr->watch.elapsedSeconds(); LOG_DEBUG(ctx->log, "Merge sorted {} rows, containing {} columns ({} merged, {} gathered) in {} sec., {} rows/sec., {}/sec.", - (*global_ctx->merge_entry)->rows_read, + global_ctx->merge_list_element_ptr->rows_read, global_ctx->all_column_names.size(), global_ctx->merging_column_names.size(), global_ctx->gathering_column_names.size(), elapsed_seconds, - (*global_ctx->merge_entry)->rows_read / elapsed_seconds, - ReadableSize((*global_ctx->merge_entry)->bytes_read_uncompressed / elapsed_seconds)); + global_ctx->merge_list_element_ptr->rows_read / elapsed_seconds, + ReadableSize(global_ctx->merge_list_element_ptr->bytes_read_uncompressed / elapsed_seconds)); } @@ -536,18 +535,18 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c if (projection.type == ProjectionDescription::Type::Aggregate) projection_merging_params.mode = MergeTreeData::MergingParams::Aggregating; - // TODO Should we use a new merge_entry for projection? ctx->tasks_for_projections.emplace_back(std::make_shared( projection_future_part, projection.metadata, global_ctx->merge_entry, + std::make_unique((*global_ctx->merge_entry)->table_id, projection_future_part), global_ctx->time_of_merge, global_ctx->context, global_ctx->space_reservation, global_ctx->deduplicate, global_ctx->deduplicate_by_columns, projection_merging_params, - global_ctx->new_data_part, + global_ctx->new_data_part.get(), "", // empty string for projection global_ctx->data, global_ctx->merges_blocker, @@ -576,21 +575,17 @@ bool MergeTask::MergeProjectionsStage::executeProjections() const bool MergeTask::MergeProjectionsStage::finalizeProjectionsAndWholeMerge() const { - const auto & projections = global_ctx->metadata_snapshot->getProjections(); - - size_t iter = 0; - - for (const auto & projection : projections) + for (const auto & task : ctx->tasks_for_projections) { - auto future = ctx->tasks_for_projections[iter]->getFuture(); - ++iter; - global_ctx->new_data_part->addProjectionPart(projection.name, future.get()); + auto part = task->getFuture().get(); + global_ctx->new_data_part->addProjectionPart(part->name, std::move(part)); } if (global_ctx->chosen_merge_algorithm != MergeAlgorithm::Vertical) global_ctx->to->writeSuffixAndFinalizePart(global_ctx->new_data_part, ctx->need_sync); else - global_ctx->to->writeSuffixAndFinalizePart(global_ctx->new_data_part, ctx->need_sync, &global_ctx->storage_columns, &global_ctx->checksums_gathered_columns); + global_ctx->to->writeSuffixAndFinalizePart( + global_ctx->new_data_part, ctx->need_sync, &global_ctx->storage_columns, &global_ctx->checksums_gathered_columns); global_ctx->promise.set_value(global_ctx->new_data_part); @@ -717,7 +712,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() /// Dereference unique_ptr and pass horizontal_stage_progress by reference input->setProgressCallback( - MergeProgressCallback(*global_ctx->merge_entry, global_ctx->watch_prev_elapsed, *global_ctx->horizontal_stage_progress)); + MergeProgressCallback(global_ctx->merge_list_element_ptr, global_ctx->watch_prev_elapsed, *global_ctx->horizontal_stage_progress)); Pipe pipe(std::move(input)); @@ -795,10 +790,10 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() break; } - QueryPipeline pipeline; - pipeline.init(Pipe::unitePipes(std::move(pipes))); - pipeline.addTransform(std::move(merged_transform)); - pipeline.setMaxThreads(1); + auto res_pipe = Pipe::unitePipes(std::move(pipes)); + res_pipe.addTransform(std::move(merged_transform)); + QueryPipeline pipeline(std::move(res_pipe)); + pipeline.setNumThreads(1); global_ctx->merged_stream = std::make_shared(std::move(pipeline)); @@ -822,7 +817,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() MergeAlgorithm MergeTask::ExecuteAndFinalizeHorizontalPart::chooseMergeAlgorithm() const { - const size_t sum_rows_upper_bound = (*global_ctx->merge_entry)->total_rows_count; + const size_t sum_rows_upper_bound = global_ctx->merge_list_element_ptr->total_rows_count; const auto data_settings = global_ctx->data->getSettings(); if (global_ctx->deduplicate) diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index e6bee9a16a3..54b0255fd5c 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -49,13 +49,14 @@ public: FutureMergedMutatedPartPtr future_part_, StorageMetadataPtr metadata_snapshot_, MergeList::Entry * merge_entry_, + std::unique_ptr projection_merge_list_element_, time_t time_of_merge_, ContextPtr context_, ReservationSharedPtr space_reservation_, bool deduplicate_, Names deduplicate_by_columns_, MergeTreeData::MergingParams merging_params_, - MergeTreeDataPartPtr parent_part_, + const IMergeTreeDataPart * parent_part_, String prefix_, MergeTreeData * data_, ActionBlocker * merges_blocker_, @@ -66,6 +67,9 @@ public: global_ctx->future_part = std::move(future_part_); global_ctx->metadata_snapshot = std::move(metadata_snapshot_); global_ctx->merge_entry = std::move(merge_entry_); + global_ctx->projection_merge_list_element = std::move(projection_merge_list_element_); + global_ctx->merge_list_element_ptr + = global_ctx->projection_merge_list_element ? global_ctx->projection_merge_list_element.get() : (*global_ctx->merge_entry)->ptr(); global_ctx->time_of_merge = std::move(time_of_merge_); global_ctx->context = std::move(context_); global_ctx->space_reservation = std::move(space_reservation_); @@ -112,12 +116,16 @@ private: struct GlobalRuntimeContext : public IStageRuntimeContext //-V730 { MergeList::Entry * merge_entry{nullptr}; + /// If not null, use this instead of the global MergeList::Entry. This is for merging projections. + std::unique_ptr projection_merge_list_element; + MergeListElement * merge_list_element_ptr{nullptr}; MergeTreeData * data{nullptr}; ActionBlocker * merges_blocker{nullptr}; ActionBlocker * ttl_merges_blocker{nullptr}; StorageMetadataPtr metadata_snapshot{nullptr}; FutureMergedMutatedPartPtr future_part{nullptr}; - MergeTreeDataPartPtr parent_part{nullptr}; + /// This will be either nullptr or new_data_part, so raw pointer is ok. + const IMergeTreeDataPart * parent_part{nullptr}; ContextPtr context{nullptr}; time_t time_of_merge{0}; ReservationSharedPtr space_reservation{nullptr}; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index bf3d98e3fcf..0c97598dc37 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -416,29 +416,30 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart( FutureMergedMutatedPartPtr future_part, const StorageMetadataPtr & metadata_snapshot, MergeList::Entry * merge_entry, - TableLockHolder holder, + std::unique_ptr projection_merge_list_element, + TableLockHolder, time_t time_of_merge, ContextPtr context, ReservationSharedPtr space_reservation, bool deduplicate, const Names & deduplicate_by_columns, const MergeTreeData::MergingParams & merging_params, - const IMergeTreeDataPart * /*parent_part*/, - const String & /*prefix*/) + const IMergeTreeDataPart * parent_part, + const String & prefix) { - (void)holder; return std::make_shared( future_part, const_cast(metadata_snapshot), merge_entry, + std::move(projection_merge_list_element), time_of_merge, context, space_reservation, deduplicate, deduplicate_by_columns, merging_params, - nullptr, - "", + parent_part, + prefix, &data, &merges_blocker, &ttl_merges_blocker); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index cda8cfd2c57..82a7b541369 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -99,6 +99,7 @@ public: FutureMergedMutatedPartPtr future_part, const StorageMetadataPtr & metadata_snapshot, MergeListEntry * merge_entry, + std::unique_ptr projection_merge_list_element, TableLockHolder table_lock_holder, time_t time_of_merge, ContextPtr context, diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index cb617b0ef22..a11eaa9a4be 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -511,6 +511,8 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals( RPNElement & out, const ASTPtr & parent) { + std::cerr << "MergeTreeIndexConditionBloomFilter::traverseASTEquals " << function_name << " ast " << key_ast->formatForErrorMessage() << std::endl; + if (header.has(key_ast->getColumnName())) { size_t position = header.getPositionByName(key_ast->getColumnName()); @@ -622,7 +624,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals( return match_with_subtype; } - if (function->name == "arrayElement") + if (function->name == "arrayElement" && (function_name == "equals" || function_name == "notEquals")) { /** Try to parse arrayElement for mapKeys index. * It is important to ignore keys like column_map['Key'] = '' because if key does not exists in map @@ -637,25 +639,38 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals( const auto & col_name = assert_cast(function->arguments.get()->children[0].get())->name(); auto map_keys_index_column_name = fmt::format("mapKeys({})", col_name); + auto map_values_index_column_name = fmt::format("mapValues({})", col_name); - if (!header.has(map_keys_index_column_name)) - return false; + size_t position = 0; + Field const_value = value_field; - size_t position = header.getPositionByName(map_keys_index_column_name); - const DataTypePtr & index_type = header.getByPosition(position).type; - out.function = function_name == "equals" ? RPNElement::FUNCTION_EQUALS : RPNElement::FUNCTION_NOT_EQUALS; - - auto & argument = function->arguments.get()->children[1]; - - if (const auto * literal = argument->as()) + if (header.has(map_keys_index_column_name)) { - auto element_key = literal->value; - const DataTypePtr actual_type = BloomFilter::getPrimitiveType(index_type); - out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), element_key))); - return true; + position = header.getPositionByName(map_keys_index_column_name); + + auto & argument = function->arguments.get()->children[1]; + + if (const auto * literal = argument->as()) + const_value = literal->value; + else + return false; + } + else if (header.has(map_values_index_column_name)) + { + position = header.getPositionByName(map_values_index_column_name); + } + else + { + return false; } - return false; + out.function = function_name == "equals" ? RPNElement::FUNCTION_EQUALS : RPNElement::FUNCTION_NOT_EQUALS; + + const auto & index_type = header.getByPosition(position).type; + const auto actual_type = BloomFilter::getPrimitiveType(index_type); + out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), const_value))); + + return true; } } diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index c3193ac68b5..5e97f80d849 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -17,7 +17,7 @@ void MergeTreeSink::onStart() void MergeTreeSink::consume(Chunk chunk) { - auto block = getPort().getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context); for (auto & current_block : part_blocks) diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.h b/src/Storages/MergeTree/MutatePlainMergeTreeTask.h index 0b65c202eb6..e8f210f4175 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.h +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.h @@ -27,13 +27,13 @@ public: StorageMergeTree & storage_, StorageMetadataPtr metadata_snapshot_, MergeMutateSelectedEntryPtr merge_mutate_entry_, - TableLockHolder & table_lock_holder_, + TableLockHolder table_lock_holder_, Callback && task_result_callback_) : storage(storage_) - , metadata_snapshot(metadata_snapshot_) - , merge_mutate_entry(merge_mutate_entry_) - , table_lock_holder(table_lock_holder_) - , task_result_callback(task_result_callback_) {} + , metadata_snapshot(std::move(metadata_snapshot_)) + , merge_mutate_entry(std::move(merge_mutate_entry_)) + , table_lock_holder(std::move(table_lock_holder_)) + , task_result_callback(std::forward(task_result_callback_)) {} bool executeStep() override; @@ -60,7 +60,7 @@ private: StorageMetadataPtr metadata_snapshot; MergeMutateSelectedEntryPtr merge_mutate_entry{nullptr}; - TableLockHolder & table_lock_holder; + TableLockHolder table_lock_holder; FutureMergedMutatedPartPtr future_part{nullptr}; std::unique_ptr stopwatch; diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index f48a9285c53..0655806bf0e 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -631,8 +631,9 @@ public: LOG_DEBUG(log, "Merged {} parts in level {} to {}", selected_parts.size(), current_level, projection_future_part->name); auto tmp_part_merge_task = ctx->mutator->mergePartsToTemporaryPart( projection_future_part, - ctx->metadata_snapshot, + projection.metadata, ctx->mutate_entry, + std::make_unique((*ctx->mutate_entry)->table_id, projection_future_part), *ctx->holder, ctx->time_of_mutation, ctx->context, @@ -1261,7 +1262,7 @@ bool MutateTask::prepare() ctx->mutation_kind = ctx->interpreter->getMutationKind(); ctx->mutating_stream = ctx->interpreter->execute(); ctx->updated_header = ctx->interpreter->getUpdatedHeader(); - ctx->mutating_stream->setProgressCallback(MergeProgressCallback(*ctx->mutate_entry, ctx->watch_prev_elapsed, *ctx->stage_progress)); + ctx->mutating_stream->setProgressCallback(MergeProgressCallback((*ctx->mutate_entry)->ptr(), ctx->watch_prev_elapsed, *ctx->stage_progress)); } ctx->single_disk_volume = std::make_shared("volume_" + ctx->future_part->name, ctx->space_reservation->getDisk(), 0); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index c81f587cbbc..75308f872dc 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -118,7 +118,7 @@ void ReplicatedMergeTreeSink::checkQuorumPrecondition(zkutil::ZooKeeperPtr & zoo void ReplicatedMergeTreeSink::consume(Chunk chunk) { - auto block = getPort().getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); last_block_is_duplicate = false; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 2a6702736df..75f002c6b42 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -44,8 +44,12 @@ public: void writeExistingPart(MergeTreeData::MutableDataPartPtr & part); /// For proper deduplication in MaterializedViews - bool lastBlockIsDuplicate() const + bool lastBlockIsDuplicate() const override { + /// If MV is responsible for deduplication, block is not considered duplicating. + if (context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) + return false; + return last_block_is_duplicate; } diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 997e6e8bb74..88e27cef7bf 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index e7b5fa8256c..fdc30919ee7 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include @@ -487,11 +489,15 @@ void MaterializedPostgreSQLConsumer::syncTables() insert->columns = buffer.columnsAST; InterpreterInsertQuery interpreter(insert, insert_context, true); - auto block_io = interpreter.execute(); - OneBlockInputStream input(result_rows); + auto io = interpreter.execute(); + auto input = std::make_shared( + result_rows.cloneEmpty(), Chunk(result_rows.getColumns(), result_rows.rows())); - assertBlocksHaveEqualStructure(input.getHeader(), block_io.out->getHeader(), "postgresql replica table sync"); - copyData(input, *block_io.out); + assertBlocksHaveEqualStructure(input->getPort().getHeader(), io.pipeline.getHeader(), "postgresql replica table sync"); + io.pipeline.complete(Pipe(std::move(input))); + + CompletedPipelineExecutor executor(io.pipeline); + executor.execute(); buffer.columns = buffer.description.sample_block.cloneEmptyColumns(); } diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 456ca2c514e..7fda31b33bd 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -1,8 +1,7 @@ #include "PostgreSQLReplicationHandler.h" #include -#include -#include +#include #include #include #include @@ -263,16 +262,11 @@ StoragePtr PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection & auto sample_block = storage_metadata.getSampleBlockNonMaterialized(); auto input = std::make_unique>(tx, query_str, sample_block, DEFAULT_BLOCK_SIZE); - QueryPipeline pipeline; - pipeline.init(Pipe(std::move(input))); - assertBlocksHaveEqualStructure(pipeline.getHeader(), block_io.out->getHeader(), "postgresql replica load from snapshot"); + assertBlocksHaveEqualStructure(input->getPort().getHeader(), block_io.pipeline.getHeader(), "postgresql replica load from snapshot"); + block_io.pipeline.complete(Pipe(std::move(input))); - PullingPipelineExecutor executor(pipeline); - Block block; - block_io.out->writePrefix(); - while (executor.pull(block)) - block_io.out->write(block); - block_io.out->writeSuffix(); + CompletedPipelineExecutor executor(block_io.pipeline); + executor.execute(); nested_storage = materialized_storage->prepare(); auto nested_table_id = nested_storage->getStorageID(); diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index b07fd5183c9..c0b96bd9f54 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -13,8 +13,9 @@ #include #include #include +#include +#include -#include namespace DB { @@ -218,21 +219,23 @@ void ProjectionDescription::recalculateWithNewColumns(const ColumnsDescription & Block ProjectionDescription::calculate(const Block & block, ContextPtr context) const { - auto in = InterpreterSelectQuery( + auto builder = InterpreterSelectQuery( query_ast, context, Pipe(std::make_shared(block, Chunk(block.getColumns(), block.rows()))), SelectQueryOptions{ type == ProjectionDescription::Type::Normal ? QueryProcessingStage::FetchColumns : QueryProcessingStage::WithMergeableState}) - .execute() - .getInputStream(); - in = std::make_shared(in, block.rows(), 0); - in->readPrefix(); - auto ret = in->read(); - if (in->read()) + .buildQueryPipeline(); + builder.resize(1); + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); + + auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); + PullingPipelineExecutor executor(pipeline); + Block ret; + executor.pull(ret); + if (executor.pull(ret)) throw Exception("Projection cannot increase the number of rows in a block", ErrorCodes::LOGICAL_ERROR); - in->readSuffix(); return ret; } diff --git a/src/Storages/RabbitMQ/RabbitMQSink.cpp b/src/Storages/RabbitMQ/RabbitMQSink.cpp index 9c556ee0832..b71f3ce86b7 100644 --- a/src/Storages/RabbitMQ/RabbitMQSink.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSink.cpp @@ -31,8 +31,7 @@ void RabbitMQSink::onStart() auto format_settings = getFormatSettings(context); format_settings.protobuf.allow_multiple_rows_without_delimiter = true; - child = FormatFactory::instance().getOutputStream(storage.getFormatName(), *buffer, - getPort().getHeader(), context, + child = FormatFactory::instance().getOutputStream(storage.getFormatName(), *buffer, getHeader(), context, [this](const Columns & /* columns */, size_t /* rows */) { buffer->countRow(); @@ -43,7 +42,7 @@ void RabbitMQSink::onStart() void RabbitMQSink::consume(Chunk chunk) { - child->write(getPort().getHeader().cloneWithColumns(chunk.detachColumns())); + child->write(getHeader().cloneWithColumns(chunk.detachColumns())); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index c0853dd0ad2..ba851b2f92c 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include namespace DB @@ -956,7 +957,7 @@ bool StorageRabbitMQ::streamToViews() auto block_io = interpreter.execute(); auto metadata_snapshot = getInMemoryMetadataPtr(); - auto column_names = block_io.out->getHeader().getNames(); + auto column_names = block_io.pipeline.getHeader().getNames(); auto sample_block = metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()); auto block_size = getMaxBlockSize(); @@ -990,15 +991,21 @@ bool StorageRabbitMQ::streamToViews() else in = streams[0]; - std::atomic stub = {false}; - if (!connection->getHandler().loopRunning()) { connection->getHandler().updateLoopState(Loop::RUN); looping_task->activateAndSchedule(); } - copyData(*in, *block_io.out, &stub); + { + PushingPipelineExecutor executor(block_io.pipeline); + in->readPrefix(); + executor.start(); + while (auto block = in->read()) + executor.push(std::move(block)); + in->readSuffix(); + executor.finish(); + } /* Note: sending ack() with loop running in another thread will lead to a lot of data races inside the library, but only in case * error occurs or connection is lost while ack is being sent diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp index ddf839b6427..1c918c15775 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp @@ -20,7 +20,7 @@ EmbeddedRocksDBSink::EmbeddedRocksDBSink( , storage(storage_) , metadata_snapshot(metadata_snapshot_) { - for (const auto & elem : getPort().getHeader()) + for (const auto & elem : getHeader()) { if (elem.name == storage.primary_key) break; @@ -31,7 +31,7 @@ EmbeddedRocksDBSink::EmbeddedRocksDBSink( void EmbeddedRocksDBSink::consume(Chunk chunk) { auto rows = chunk.getNumRows(); - auto block = getPort().getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 459c0879cda..0b09a1f94d5 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -36,6 +36,7 @@ #include #include +#include namespace fs = std::filesystem; @@ -213,9 +214,9 @@ public: std::vector slices_keys(num_keys); const auto & sample_block = metadata_snapshot->getSampleBlock(); - const auto & key_column = sample_block.getByName(storage.primary_key); + const auto & key_column = sample_block.getByName(storage.getPrimaryKey()); auto columns = sample_block.cloneEmptyColumns(); - size_t primary_key_pos = sample_block.getPositionByName(storage.primary_key); + size_t primary_key_pos = sample_block.getPositionByName(storage.getPrimaryKey()); size_t rows_processed = 0; while (it < end && rows_processed < max_block_size) @@ -230,8 +231,7 @@ public: } std::vector values; - auto statuses = storage.rocksdb_ptr->MultiGet(rocksdb::ReadOptions(), slices_keys, &values); - + auto statuses = storage.multiGet(slices_keys, values); for (size_t i = 0; i < statuses.size(); ++i) { if (statuses[i].ok()) @@ -280,18 +280,21 @@ StorageEmbeddedRocksDB::StorageEmbeddedRocksDB(const StorageID & table_id_, { fs::create_directories(rocksdb_dir); } - initDb(); + initDB(); } void StorageEmbeddedRocksDB::truncate(const ASTPtr &, const StorageMetadataPtr & , ContextPtr, TableExclusiveLockHolder &) { + std::unique_lock lock(rocksdb_ptr_mx); rocksdb_ptr->Close(); + rocksdb_ptr = nullptr; + fs::remove_all(rocksdb_dir); fs::create_directories(rocksdb_dir); - initDb(); + initDB(); } -void StorageEmbeddedRocksDB::initDb() +void StorageEmbeddedRocksDB::initDB() { rocksdb::Status status; rocksdb::Options base; @@ -460,9 +463,20 @@ static StoragePtr create(const StorageFactory::Arguments & args) std::shared_ptr StorageEmbeddedRocksDB::getRocksDBStatistics() const { + std::shared_lock lock(rocksdb_ptr_mx); + if (!rocksdb_ptr) + return nullptr; return rocksdb_ptr->GetOptions().statistics; } +std::vector StorageEmbeddedRocksDB::multiGet(const std::vector & slices_keys, std::vector & values) const +{ + std::shared_lock lock(rocksdb_ptr_mx); + if (!rocksdb_ptr) + return {}; + return rocksdb_ptr->MultiGet(rocksdb::ReadOptions(), slices_keys, &values); +} + void registerStorageEmbeddedRocksDB(StorageFactory & factory) { StorageFactory::StorageFeatures features{ diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index 3f1b3b49492..b095673a6f5 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -1,8 +1,10 @@ #pragma once #include +#include #include #include +#include namespace rocksdb @@ -20,7 +22,6 @@ class Context; class StorageEmbeddedRocksDB final : public shared_ptr_helper, public IStorage, WithContext { friend struct shared_ptr_helper; - friend class EmbeddedRocksDBSource; friend class EmbeddedRocksDBSink; friend class EmbeddedRocksDBBlockInputStream; public: @@ -50,6 +51,8 @@ public: Strings getDataPaths() const override { return {rocksdb_dir}; } std::shared_ptr getRocksDBStatistics() const; + std::vector multiGet(const std::vector & slices_keys, std::vector & values) const; + const String & getPrimaryKey() const { return primary_key; } protected: StorageEmbeddedRocksDB(const StorageID & table_id_, @@ -63,8 +66,9 @@ private: const String primary_key; using RocksDBPtr = std::unique_ptr; RocksDBPtr rocksdb_ptr; + mutable std::shared_mutex rocksdb_ptr_mx; String rocksdb_dir; - void initDb(); + void initDB(); }; } diff --git a/src/Storages/RocksDB/StorageSystemRocksDB.cpp b/src/Storages/RocksDB/StorageSystemRocksDB.cpp index 7d31d5ddc21..cbb96ed4001 100644 --- a/src/Storages/RocksDB/StorageSystemRocksDB.cpp +++ b/src/Storages/RocksDB/StorageSystemRocksDB.cpp @@ -43,7 +43,8 @@ void StorageSystemRocksDB::fillData(MutableColumns & res_columns, ContextPtr con const auto access = context->getAccess(); const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); - std::map> tables; + using RocksDBStoragePtr = std::shared_ptr; + std::map> tables; for (const auto & db : DatabaseCatalog::instance().getDatabases()) { const bool check_access_for_tables = check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, db.first); @@ -51,18 +52,17 @@ void StorageSystemRocksDB::fillData(MutableColumns & res_columns, ContextPtr con for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) { StoragePtr table = iterator->table(); - if (!table) + RocksDBStoragePtr rocksdb_table = table ? std::dynamic_pointer_cast(table) : nullptr; + if (!rocksdb_table) continue; - if (!dynamic_cast(table.get())) - continue; if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, db.first, iterator->name())) continue; - tables[db.first][iterator->name()] = table; + + tables[db.first][iterator->name()] = rocksdb_table; } } - MutableColumnPtr col_database_mut = ColumnString::create(); MutableColumnPtr col_table_mut = ColumnString::create(); @@ -101,10 +101,9 @@ void StorageSystemRocksDB::fillData(MutableColumns & res_columns, ContextPtr con String database = (*col_database_to_filter)[i].safeGet(); String table = (*col_table_to_filter)[i].safeGet(); - auto & rocksdb_table = dynamic_cast(*tables[database][table]); - auto statistics = rocksdb_table.getRocksDBStatistics(); + auto statistics = tables[database][table]->getRocksDBStatistics(); if (!statistics) - throw Exception(ErrorCodes::LOGICAL_ERROR, "rocksdb statistics is not enabled"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "RocksDB statistics are not available"); for (auto [tick, name] : rocksdb::TickersNameMap) { diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index d7844c28def..c71335f8478 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -26,11 +26,13 @@ #include #include #include +#include #include #include #include #include #include +#include namespace ProfileEvents @@ -519,7 +521,7 @@ public: , metadata_snapshot(metadata_snapshot_) { // Check table structure. - metadata_snapshot->check(getPort().getHeader(), true); + metadata_snapshot->check(getHeader(), true); } String getName() const override { return "BufferSink"; } @@ -530,7 +532,7 @@ public: if (!rows) return; - auto block = getPort().getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); StoragePtr destination; if (storage.destination_id) @@ -954,9 +956,10 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl InterpreterInsertQuery interpreter{insert, insert_context, allow_materialized}; auto block_io = interpreter.execute(); - block_io.out->writePrefix(); - block_io.out->write(block_to_write); - block_io.out->writeSuffix(); + PushingPipelineExecutor executor(block_io.pipeline); + executor.start(); + executor.push(std::move(block_to_write)); + executor.finish(); } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index c8df0e89e29..dd304065a14 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -53,6 +53,7 @@ #include #include +#include #include #include #include @@ -695,7 +696,7 @@ SinkToStoragePtr StorageDistributed::write(const ASTPtr &, const StorageMetadata } -QueryPipelinePtr StorageDistributed::distributedWrite(const ASTInsertQuery & query, ContextPtr local_context) +QueryPipelineBuilderPtr StorageDistributed::distributedWrite(const ASTInsertQuery & query, ContextPtr local_context) { const Settings & settings = local_context->getSettingsRef(); std::shared_ptr storage_src; @@ -739,7 +740,7 @@ QueryPipelinePtr StorageDistributed::distributedWrite(const ASTInsertQuery & que const auto & cluster = getCluster(); const auto & shards_info = cluster->getShardsInfo(); - std::vector> pipelines; + std::vector> pipelines; String new_query_str = queryToString(new_query); for (size_t shard_index : collections::range(0, shards_info.size())) @@ -748,7 +749,8 @@ QueryPipelinePtr StorageDistributed::distributedWrite(const ASTInsertQuery & que if (shard_info.isLocal()) { InterpreterInsertQuery interpreter(new_query, local_context); - pipelines.emplace_back(std::make_unique(interpreter.execute().pipeline)); + pipelines.emplace_back(std::make_unique()); + pipelines.back()->init(interpreter.execute().pipeline); } else { @@ -761,16 +763,16 @@ QueryPipelinePtr StorageDistributed::distributedWrite(const ASTInsertQuery & que /// INSERT SELECT query returns empty block auto remote_query_executor = std::make_shared(shard_info.pool, std::move(connections), new_query_str, Block{}, local_context); - pipelines.emplace_back(std::make_unique()); + pipelines.emplace_back(std::make_unique()); pipelines.back()->init(Pipe(std::make_shared(remote_query_executor, false, settings.async_socket_for_remote))); - pipelines.back()->setSinks([](const Block & header, QueryPipeline::StreamType) -> ProcessorPtr + pipelines.back()->setSinks([](const Block & header, QueryPipelineBuilder::StreamType) -> ProcessorPtr { return std::make_shared(header); }); } } - return std::make_unique(QueryPipeline::unitePipelines(std::move(pipelines))); + return std::make_unique(QueryPipelineBuilder::unitePipelines(std::move(pipelines))); } diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 80800e50059..3f785d54422 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -83,7 +83,7 @@ public: SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; - QueryPipelinePtr distributedWrite(const ASTInsertQuery & query, ContextPtr context) override; + QueryPipelineBuilderPtr distributedWrite(const ASTInsertQuery & query, ContextPtr context) override; /// Removes temporary data in local filesystem. void truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) override; diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 56b410a3ea4..989cc3a1f91 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -12,6 +13,10 @@ #include #include +#include +#include +#include +#include #include #include #include @@ -75,6 +80,29 @@ StorageExecutable::StorageExecutable( setInMemoryMetadata(storage_metadata); } +class SendingChunkHeaderTransform final : public ISimpleTransform +{ +public: + SendingChunkHeaderTransform(const Block & header, WriteBuffer & buffer_) + : ISimpleTransform(header, header, false) + , buffer(buffer_) + { + } + + String getName() const override { return "SendingChunkHeaderTransform"; } + +protected: + + void transform(Chunk & chunk) override + { + writeText(chunk.getNumRows(), buffer); + writeChar('\n', buffer); + } + +private: + WriteBuffer & buffer; +}; + Pipe StorageExecutable::read( const Names & /*column_names*/, const StorageMetadataPtr & metadata_snapshot, @@ -92,14 +120,13 @@ Pipe StorageExecutable::read( script_name, user_scripts_path); - std::vector inputs; + std::vector inputs; inputs.reserve(input_queries.size()); for (auto & input_query : input_queries) { InterpreterSelectWithUnionQuery interpreter(input_query, context, {}); - auto input = interpreter.execute().getInputStream(); - inputs.emplace_back(std::move(input)); + inputs.emplace_back(interpreter.buildQueryPipeline()); } ShellCommand::Config config(script_path); @@ -134,11 +161,8 @@ Pipe StorageExecutable::read( for (size_t i = 0; i < inputs.size(); ++i) { - BlockInputStreamPtr input_stream = inputs[i]; WriteBufferFromFile * write_buffer = nullptr; - bool send_chunk_header = settings.send_chunk_header; - if (i == 0) { write_buffer = &process->in; @@ -153,27 +177,23 @@ Pipe StorageExecutable::read( write_buffer = &it->second; } - ShellCommandSource::SendDataTask task = [input_stream, write_buffer, context, is_executable_pool, send_chunk_header, this]() + inputs[i].resize(1); + if (settings.send_chunk_header) { - auto output_stream = context->getOutputStream(format, *write_buffer, input_stream->getHeader().cloneEmpty()); - input_stream->readPrefix(); - output_stream->writePrefix(); + auto transform = std::make_shared(inputs[i].getHeader(), *write_buffer); + inputs[i].addTransform(std::move(transform)); + } - while (auto block = input_stream->read()) - { - if (send_chunk_header) - { - writeText(block.rows(), *write_buffer); - writeChar('\n', *write_buffer); - } + auto pipeline = std::make_shared(QueryPipelineBuilder::getPipeline(std::move(inputs[i]))); - output_stream->write(block); - } + auto out = FormatFactory::instance().getOutputFormat(format, *write_buffer, materializeBlock(pipeline->getHeader()), context); + out->setAutoFlush(); + pipeline->complete(std::move(out)); - input_stream->readSuffix(); - output_stream->writeSuffix(); - - output_stream->flush(); + ShellCommandSource::SendDataTask task = [pipeline, write_buffer, is_executable_pool]() + { + CompletedPipelineExecutor executor(*pipeline); + executor.execute(); if (!is_executable_pool) write_buffer->close(); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 8501a941ac9..c6c40453874 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -336,8 +336,7 @@ public: /// Special case for distributed format. Defaults are not needed here. if (storage->format_name == "Distributed") { - pipeline = std::make_unique(); - pipeline->init(Pipe(StorageDistributedDirectoryMonitor::createSourceFromFile(current_path))); + pipeline = std::make_unique(StorageDistributedDirectoryMonitor::createSourceFromFile(current_path)); reader = std::make_unique(*pipeline); continue; } @@ -394,17 +393,19 @@ public: auto format = FormatFactory::instance().getInput( storage->format_name, *read_buf, get_block_for_format(), context, max_block_size, storage->format_settings); - pipeline = std::make_unique(); - pipeline->init(Pipe(format)); + QueryPipelineBuilder builder; + builder.init(Pipe(format)); if (columns_description.hasDefaults()) { - pipeline->addSimpleTransform([&](const Block & header) + builder.addSimpleTransform([&](const Block & header) { return std::make_shared(header, columns_description, *format, context); }); } + pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); + reader = std::make_unique(*pipeline); } @@ -589,7 +590,7 @@ public: void consume(Chunk chunk) override { - writer->write(getPort().getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void onFinish() override diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 0dfebb6195a..0fd94bac95a 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -306,7 +306,7 @@ private: void LogSink::consume(Chunk chunk) { - auto block = getPort().getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); metadata_snapshot->check(block, true); /// The set of written offset columns so that you do not write shared offsets of columns for nested structures multiple times @@ -332,7 +332,7 @@ void LogSink::onFinish() WrittenStreams written_streams; ISerialization::SerializeBinaryBulkSettings settings; - for (const auto & column : getPort().getHeader()) + for (const auto & column : getHeader()) { auto it = serialize_states.find(column.name); if (it != serialize_states.end()) @@ -463,6 +463,8 @@ void LogSink::writeMarks(MarksForColumns && marks) } } +StorageLog::~StorageLog() = default; + StorageLog::StorageLog( DiskPtr disk_, const String & relative_path_, diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index 116bdc31520..b4cd15ef258 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -23,6 +23,7 @@ class StorageLog final : public shared_ptr_helper, public IStorage friend struct shared_ptr_helper; public: + ~StorageLog() override; String getName() const override { return "Log"; } Pipe read( diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 907e06fb4f6..d2493ff7c43 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 4eafaa5c5c5..3fe6083ab13 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -107,7 +107,7 @@ public: void consume(Chunk chunk) override { - auto block = getPort().getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); metadata_snapshot->check(block, true); if (storage.compress) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index c1066329e6f..b5958e234d1 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -35,6 +35,7 @@ namespace DB namespace ErrorCodes { + extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; extern const int ILLEGAL_PREWHERE; @@ -49,7 +50,7 @@ StorageMerge::StorageMerge( const String & comment, const String & source_database_name_or_regexp_, bool database_is_regexp_, - const DbToTableSetMap & source_databases_and_tables_, + const DBToTableSetMap & source_databases_and_tables_, ContextPtr context_) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) @@ -380,10 +381,10 @@ Pipe StorageMerge::createSources( if (!storage) { - pipe = QueryPipeline::getPipe(InterpreterSelectQuery( + pipe = QueryPipelineBuilder::getPipe(InterpreterSelectQuery( modified_query_info.query, modified_context, std::make_shared(header), - SelectQueryOptions(processed_stage).analyze()).execute().pipeline); + SelectQueryOptions(processed_stage).analyze()).buildQueryPipeline()); pipe.addInterpreterContext(modified_context); return pipe; @@ -424,7 +425,7 @@ Pipe StorageMerge::createSources( InterpreterSelectQuery interpreter{modified_query_info.query, modified_context, SelectQueryOptions(processed_stage)}; - pipe = QueryPipeline::getPipe(interpreter.execute().pipeline); + pipe = QueryPipelineBuilder::getPipe(interpreter.buildQueryPipeline()); /** Materialization is needed, since from distributed storage the constants come materialized. * If you do not do this, different types (Const and non-Const) columns will be produced in different threads, @@ -573,11 +574,14 @@ DatabaseTablesIteratorPtr StorageMerge::getDatabaseIterator(const String & datab { auto database = DatabaseCatalog::instance().getDatabase(database_name); - auto table_name_match = [this, &database_name](const String & table_name_) -> bool { + auto table_name_match = [this, database_name](const String & table_name_) -> bool + { if (source_databases_and_tables) { - const auto & source_tables = (*source_databases_and_tables).at(database_name); - return source_tables.count(table_name_); + if (auto it = source_databases_and_tables->find(database_name); it != source_databases_and_tables->end()) + return it->second.count(table_name_); + else + return false; } else return source_table_regexp->match(table_name_); @@ -742,6 +746,26 @@ IStorage::ColumnSizeByName StorageMerge::getColumnSizes() const return first_materialized_mysql->getColumnSizes(); } + +std::tuple StorageMerge::evaluateDatabaseName(const ASTPtr & node, ContextPtr context_) +{ + if (const auto * func = node->as(); func && func->name == "REGEXP") + { + if (func->arguments->children.size() != 1) + throw Exception("REGEXP in Merge ENGINE takes only one argument", ErrorCodes::BAD_ARGUMENTS); + + auto * literal = func->arguments->children[0]->as(); + if (!literal || literal->value.safeGet().empty()) + throw Exception("Argument for REGEXP in Merge ENGINE should be a non empty String Literal", ErrorCodes::BAD_ARGUMENTS); + + return {true, func->arguments->children[0]}; + } + + auto ast = evaluateConstantExpressionForDatabaseName(node, context_); + return {false, ast}; +} + + void registerStorageMerge(StorageFactory & factory) { factory.registerStorage("Merge", [](const StorageFactory::Arguments & args) @@ -757,10 +781,11 @@ void registerStorageMerge(StorageFactory & factory) " - name of source database and regexp for table names.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - auto [is_regexp, database_ast] = evaluateDatabaseNameForMergeEngine(engine_args[0], args.getLocalContext()); + auto [is_regexp, database_ast] = StorageMerge::evaluateDatabaseName(engine_args[0], args.getLocalContext()); if (!is_regexp) engine_args[0] = database_ast; + String source_database_name_or_regexp = database_ast->as().value.safeGet(); engine_args[1] = evaluateConstantExpressionAsLiteral(engine_args[1], args.getLocalContext()); diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 20460e95156..b6001815f85 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -48,12 +48,15 @@ public: bool mayBenefitFromIndexForIn( const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const override; + /// Evaluate database name or regexp for StorageMerge and TableFunction merge + static std::tuple evaluateDatabaseName(const ASTPtr & node, ContextPtr context); + private: - using DbToTableSetMap = std::map>; + using DBToTableSetMap = std::map>; std::optional source_database_regexp; std::optional source_table_regexp; - std::optional source_databases_and_tables; + std::optional source_databases_and_tables; String source_database_name_or_regexp; bool database_is_regexp = false; @@ -86,7 +89,7 @@ protected: const String & comment, const String & source_database_name_or_regexp_, bool database_is_regexp_, - const DbToTableSetMap & source_databases_and_tables_, + const DBToTableSetMap & source_databases_and_tables_, ContextPtr context_); StorageMerge( diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index e2a0c36b463..faa16ac875b 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -1035,7 +1036,8 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign if (merge_entry) { - auto task = std::make_shared(*this, metadata_snapshot, false, Names{}, merge_entry, share_lock, common_assignee_trigger); + auto task = std::make_shared( + *this, metadata_snapshot, false, Names{}, merge_entry, share_lock, common_assignee_trigger); assignee.scheduleMergeMutateTask(task); return true; } @@ -1054,7 +1056,8 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign } bool scheduled = false; - if (time_after_previous_cleanup_temporary_directories.compareAndRestartDeferred(getContext()->getSettingsRef().merge_tree_clear_old_temporary_directories_interval_seconds)) + if (time_after_previous_cleanup_temporary_directories.compareAndRestartDeferred( + getContext()->getSettingsRef().merge_tree_clear_old_temporary_directories_interval_seconds)) { assignee.scheduleMergeMutateTask(ExecutableLambdaAdapter::create( [this, share_lock] () @@ -1064,7 +1067,8 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign }, common_assignee_trigger, getStorageID())); scheduled = true; } - if (auto lock = time_after_previous_cleanup_parts.compareAndRestartDeferred(getContext()->getSettingsRef().merge_tree_clear_old_parts_interval_seconds)) + if (auto lock = time_after_previous_cleanup_parts.compareAndRestartDeferred( + getContext()->getSettingsRef().merge_tree_clear_old_parts_interval_seconds)) { assignee.scheduleMergeMutateTask(ExecutableLambdaAdapter::create( [this, share_lock] () @@ -1078,7 +1082,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign return true; }, common_assignee_trigger, getStorageID())); scheduled = true; - } + } return scheduled; } diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 7f08dfbfe99..34a4dffc69d 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -132,7 +132,7 @@ public: void consume(Chunk chunk) override { - auto block = getPort().getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); auto blocks = splitBlocks(block, max_batch_rows); mysqlxx::Transaction trans(entry); try diff --git a/src/Storages/StorageNull.cpp b/src/Storages/StorageNull.cpp index 3076ec733b7..2b3585e360e 100644 --- a/src/Storages/StorageNull.cpp +++ b/src/Storages/StorageNull.cpp @@ -44,7 +44,8 @@ void StorageNull::checkAlterIsPossible(const AlterCommands & commands, ContextPt if (command.type != AlterCommand::Type::ADD_COLUMN && command.type != AlterCommand::Type::MODIFY_COLUMN && command.type != AlterCommand::Type::DROP_COLUMN - && command.type != AlterCommand::Type::COMMENT_COLUMN) + && command.type != AlterCommand::Type::COMMENT_COLUMN + && command.type != AlterCommand::Type::COMMENT_TABLE) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter of type '{}' is not supported by storage {}", command.type, getName()); diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index d4fedaf78c8..c4c2f40ded4 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -129,8 +129,7 @@ public: void consume(Chunk chunk) override { - auto block = getPort().getHeader().cloneWithColumns(chunk.detachColumns()); - + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); if (!inserter) { if (on_conflict.empty()) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e67947ae811..3b84c4c4a03 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -45,6 +45,7 @@ #include #include +#include #include #include diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index ae4e523d2d8..ce1b049758e 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -34,7 +34,7 @@ #include #include -#include +#include #include #include @@ -233,17 +233,18 @@ bool StorageS3Source::initialize() std::make_unique(client, bucket, current_key, max_single_read_retries, DBMS_DEFAULT_BUFFER_SIZE), chooseCompressionMethod(current_key, compression_hint)); auto input_format = FormatFactory::instance().getInput(format, *read_buf, sample_block, getContext(), max_block_size, format_settings); - pipeline = std::make_unique(); - pipeline->init(Pipe(input_format)); + QueryPipelineBuilder builder; + builder.init(Pipe(input_format)); if (columns_desc.hasDefaults()) { - pipeline->addSimpleTransform([&](const Block & header) + builder.addSimpleTransform([&](const Block & header) { return std::make_shared(header, columns_desc, *input_format, getContext()); }); } + pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); reader = std::make_unique(*pipeline); initialized = false; @@ -320,7 +321,7 @@ public: writer->writePrefix(); is_first_chunk = false; } - writer->write(getPort().getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void onFinish() override diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index 758284e8d50..e4dcc1af8d5 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -102,7 +102,7 @@ public: void consume(Chunk chunk) override { - auto block = getPort().getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString sqlbuf; sqlbuf << "INSERT INTO "; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index ec830c3fb95..fe55123335a 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -82,7 +82,7 @@ SetOrJoinSink::SetOrJoinSink( void SetOrJoinSink::consume(Chunk chunk) { /// Sort columns in the block. This is necessary, since Set and Join count on the same column order in different blocks. - Block sorted_block = getPort().getHeader().cloneWithColumns(chunk.detachColumns()).sortColumns(); + Block sorted_block = getHeader().cloneWithColumns(chunk.detachColumns()).sortColumns(); table.insertBlock(sorted_block); if (persistent) diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 99e6ba2fa1f..f1ab365e458 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -206,7 +206,7 @@ public: void consume(Chunk chunk) override { - block_out.write(getPort().getHeader().cloneWithColumns(chunk.detachColumns())); + block_out.write(getHeader().cloneWithColumns(chunk.detachColumns())); } void onFinish() override diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index 7ab891d5936..d1778342ec5 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -334,7 +334,7 @@ void TinyLogSink::onFinish() WrittenStreams written_streams; ISerialization::SerializeBinaryBulkSettings settings; - for (const auto & column : getPort().getHeader()) + for (const auto & column : getHeader()) { auto it = serialize_states.find(column.name); if (it != serialize_states.end()) @@ -369,7 +369,7 @@ void TinyLogSink::onFinish() void TinyLogSink::consume(Chunk chunk) { - auto block = getPort().getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); metadata_snapshot->check(block, true); /// The set of written offset columns so that you do not write shared columns for nested structures multiple times diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 81820ce5e1d..fe89ad967d7 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include #include @@ -111,14 +111,15 @@ namespace compression_method); auto input_format = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size, format_settings); - pipeline = std::make_unique(); - pipeline->init(Pipe(input_format)); + QueryPipelineBuilder builder; + builder.init(Pipe(input_format)); - pipeline->addSimpleTransform([&](const Block & cur_header) + builder.addSimpleTransform([&](const Block & cur_header) { return std::make_shared(cur_header, columns, *input_format, context); }); + pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); reader = std::make_unique(*pipeline); } @@ -176,7 +177,7 @@ void StorageURLSink::consume(Chunk chunk) is_first_chunk = false; } - writer->write(getPort().getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void StorageURLSink::onFinish() diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index e4b0e57a563..df774554365 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index cba10548852..9aedee66b5f 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -16,6 +16,7 @@ #include #include #include +#include namespace DB @@ -94,18 +95,18 @@ static bool extractPathImpl(const IAST & elem, Paths & res, ContextPtr context) if (value->as()) { auto interpreter_subquery = interpretSubquery(value, context, {}, {}); - auto stream = interpreter_subquery->execute().getInputStream(); + auto pipeline = interpreter_subquery->execute().pipeline; SizeLimits limites(context->getSettingsRef().max_rows_in_set, context->getSettingsRef().max_bytes_in_set, OverflowMode::THROW); Set set(limites, true, context->getSettingsRef().transform_null_in); - set.setHeader(stream->getHeader().getColumnsWithTypeAndName()); + set.setHeader(pipeline.getHeader().getColumnsWithTypeAndName()); - stream->readPrefix(); - while (Block block = stream->read()) + PullingPipelineExecutor executor(pipeline); + Block block; + while (executor.pull(block)) { set.insertFromBlock(block.getColumnsWithTypeAndName()); } set.finishInsert(); - stream->readSuffix(); set.checkColumnsNumber(1); const auto & set_column = *set.getSetElements()[0]; diff --git a/src/Storages/tests/gtest_storage_log.cpp b/src/Storages/tests/gtest_storage_log.cpp index b3ceef7e697..6cd03cad6d0 100644 --- a/src/Storages/tests/gtest_storage_log.cpp +++ b/src/Storages/tests/gtest_storage_log.cpp @@ -1,7 +1,6 @@ #include #include -#include #include #include #include @@ -16,7 +15,10 @@ #include #include -#include +#include +#include +#include +#include #include #if !defined(__clang__) @@ -100,9 +102,11 @@ std::string writeData(int rows, DB::StoragePtr & table, const DB::ContextPtr con block.insert(column); } - auto out = std::make_shared(table->write({}, metadata_snapshot, context)); - out->write(block); - out->writeSuffix(); + QueryPipeline pipeline(table->write({}, metadata_snapshot, context)); + + PushingPipelineExecutor executor(pipeline); + executor.push(block); + executor.finish(); return data; } @@ -120,9 +124,8 @@ std::string readData(DB::StoragePtr & table, const DB::ContextPtr context) QueryProcessingStage::Enum stage = table->getQueryProcessingStage( context, QueryProcessingStage::Complete, metadata_snapshot, query_info); - QueryPipeline pipeline; - pipeline.init(table->read(column_names, metadata_snapshot, query_info, context, stage, 8192, 1)); - BlockInputStreamPtr in = std::make_shared(std::move(pipeline)); + QueryPipeline pipeline(table->read(column_names, metadata_snapshot, query_info, context, stage, 8192, 1)); + PullingPipelineExecutor executor(pipeline); Block sample; { @@ -137,8 +140,12 @@ std::string readData(DB::StoragePtr & table, const DB::ContextPtr context) WriteBufferFromOwnString out_buf; BlockOutputStreamPtr output = FormatFactory::instance().getOutputStream("Values", out_buf, sample, context); - copyData(*in, *output); + Block data; + output->writePrefix(); + while (executor.pull(data)) + output->write(data); + output->writeSuffix(); output->flush(); return out_buf.str(); diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index 1b96a0fe713..afd81638da4 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -14,6 +14,8 @@ #include #include +#include + #include @@ -85,8 +87,8 @@ ColumnsDescription ITableFunctionFileLike::getActualTableStructure(ContextPtr co Strings paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read); if (paths.empty()) throw Exception("Cannot get table structure from file, because no files match specified name", ErrorCodes::INCORRECT_FILE_NAME); - auto read_stream = StorageDistributedDirectoryMonitor::createSourceFromFile(paths[0]); - return ColumnsDescription{read_stream->getOutputs().front().getHeader().getNamesAndTypesList()}; + auto source = StorageDistributedDirectoryMonitor::createSourceFromFile(paths[0]); + return ColumnsDescription{source->getOutputs().front().getHeader().getNamesAndTypesList()}; } return parseColumnsListFromString(structure, context); } diff --git a/src/TableFunctions/TableFunctionMerge.cpp b/src/TableFunctions/TableFunctionMerge.cpp index 81dde4a12a4..f1ef4262d08 100644 --- a/src/TableFunctions/TableFunctionMerge.cpp +++ b/src/TableFunctions/TableFunctionMerge.cpp @@ -52,7 +52,7 @@ void TableFunctionMerge::parseArguments(const ASTPtr & ast_function, ContextPtr " - name of source database and regexp for table names.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - auto [is_regexp, database_ast] = evaluateDatabaseNameForMergeEngine(args[0], context); + auto [is_regexp, database_ast] = StorageMerge::evaluateDatabaseName(args[0], context); database_is_regexp = is_regexp; @@ -65,7 +65,7 @@ void TableFunctionMerge::parseArguments(const ASTPtr & ast_function, ContextPtr } -const TableFunctionMerge::DbToTableSetMap & TableFunctionMerge::getSourceDatabasesAndTables(ContextPtr context) const +const TableFunctionMerge::DBToTableSetMap & TableFunctionMerge::getSourceDatabasesAndTables(ContextPtr context) const { if (source_databases_and_tables) return *source_databases_and_tables; @@ -88,17 +88,10 @@ const TableFunctionMerge::DbToTableSetMap & TableFunctionMerge::getSourceDatabas auto databases = DatabaseCatalog::instance().getDatabases(); for (const auto & db : databases) - { if (database_re.match(db.first)) - { - auto source_tables = getMatchedTablesWithAccess(db.first, source_table_regexp, context); + (*source_databases_and_tables)[db.first] = getMatchedTablesWithAccess(db.first, source_table_regexp, context); - if (!source_tables.empty()) - (*source_databases_and_tables)[db.first] = source_tables; - } - } - - if ((*source_databases_and_tables).empty()) + if (source_databases_and_tables->empty()) throwNoTablesMatchRegexp(source_database_name_or_regexp, source_table_regexp); } diff --git a/src/TableFunctions/TableFunctionMerge.h b/src/TableFunctions/TableFunctionMerge.h index 73b61f8eb79..10221c8c72c 100644 --- a/src/TableFunctions/TableFunctionMerge.h +++ b/src/TableFunctions/TableFunctionMerge.h @@ -21,8 +21,8 @@ private: const char * getStorageTypeName() const override { return "Merge"; } using TableSet = std::set; - using DbToTableSetMap = std::map; - const DbToTableSetMap & getSourceDatabasesAndTables(ContextPtr context) const; + using DBToTableSetMap = std::map; + const DBToTableSetMap & getSourceDatabasesAndTables(ContextPtr context) const; ColumnsDescription getActualTableStructure(ContextPtr context) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; static TableSet getMatchedTablesWithAccess(const String & database_name, const String & table_regexp, const ContextPtr & context); @@ -30,7 +30,7 @@ private: String source_database_name_or_regexp; String source_table_regexp; bool database_is_regexp = false; - mutable std::optional source_databases_and_tables; + mutable std::optional source_databases_and_tables; }; diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 470b88de574..7ac434e03cd 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -609,7 +609,7 @@ class TestCase: if skip_reason is not None: return TestResult(self.name, TestStatus.SKIPPED, skip_reason, 0., "") - if args.testname and self.send_test_name_failed(suite, self.case): + if args.testname and self.send_test_name_failed(suite.suite, self.case): description = "\nServer does not respond to health check\n" return TestResult(self.name, TestStatus.FAIL, FailureReason.SERVER_DIED, 0., description) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index e56c0d4f3c2..b4e017b6228 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -1748,7 +1748,7 @@ CLICKHOUSE_START_COMMAND = "clickhouse server --config-file=/etc/clickhouse-serv " --log-file=/var/log/clickhouse-server/clickhouse-server.log " \ " --errorlog-file=/var/log/clickhouse-server/clickhouse-server.err.log" -CLICKHOUSE_STAY_ALIVE_COMMAND = 'bash -c "{} --daemon; tail -f /dev/null"'.format(CLICKHOUSE_START_COMMAND) +CLICKHOUSE_STAY_ALIVE_COMMAND = 'bash -c "trap \'killall tail\' INT TERM; {} --daemon; coproc tail -f /dev/null; wait $$!"'.format(CLICKHOUSE_START_COMMAND) DOCKER_COMPOSE_TEMPLATE = ''' version: '2.3' @@ -2001,10 +2001,18 @@ class ClickHouseInstance: logging.warning("ClickHouse process already stopped") return - self.exec_in_container(["bash", "-c", "pkill {} clickhouse".format("-9" if kill else "")], user='root') - time.sleep(stop_wait_sec) - ps_clickhouse = self.exec_in_container(["bash", "-c", "ps -C clickhouse"], user='root') - if ps_clickhouse != " PID TTY STAT TIME COMMAND" : + sleep_time = 0.1 + num_steps = int(stop_wait_sec / sleep_time) + stopped = False + for step in range(num_steps): + self.exec_in_container(["bash", "-c", "pkill {} clickhouse".format("-9" if kill else "")], user='root') + time.sleep(sleep_time) + ps_clickhouse = self.exec_in_container(["bash", "-c", "ps -C clickhouse"], user='root') + if ps_clickhouse == " PID TTY STAT TIME COMMAND": + stopped = True + break + + if not stopped: logging.warning(f"Force kill clickhouse in stop_clickhouse. ps:{ps_clickhouse}") self.stop_clickhouse(kill=True) except Exception as e: @@ -2419,6 +2427,8 @@ class ClickHouseInstance: if self.stay_alive: entrypoint_cmd = CLICKHOUSE_STAY_ALIVE_COMMAND.replace("{main_config_file}", self.main_config_name) + else: + entrypoint_cmd = '[' + ', '.join(map(lambda x: '"' + x + '"', entrypoint_cmd.split())) + ']' logging.debug("Entrypoint cmd: {}".format(entrypoint_cmd)) diff --git a/tests/integration/test_rocksdb_options/test.py b/tests/integration/test_rocksdb_options/test.py index 6689c232081..e8542749d8d 100644 --- a/tests/integration/test_rocksdb_options/test.py +++ b/tests/integration/test_rocksdb_options/test.py @@ -58,8 +58,28 @@ def test_valid_column_family_options(start_cluster): DROP TABLE test; """) +def test_invalid_column_family_options(start_cluster): + node.exec_in_container(['bash', '-c', "sed -i 's/num_levels/no_such_column_family_option/g' /etc/clickhouse-server/config.d/rocksdb.xml"]) + node.restart_clickhouse() + with pytest.raises(QueryRuntimeException): + node.query(""" + CREATE TABLE test (key UInt64, value String) Engine=EmbeddedRocksDB PRIMARY KEY(key); + """) + node.exec_in_container(['bash', '-c', "sed -i 's/no_such_column_family_option/num_levels/g' /etc/clickhouse-server/config.d/rocksdb.xml"]) + node.restart_clickhouse() + def test_table_valid_column_family_options(start_cluster): node.query(""" CREATE TABLE test (key UInt64, value String) Engine=EmbeddedRocksDB PRIMARY KEY(key); DROP TABLE test; """) + +def test_table_invalid_column_family_options(start_cluster): + node.exec_in_container(['bash', '-c', "sed -i 's/max_bytes_for_level_base/no_such_table_column_family_option/g' /etc/clickhouse-server/config.d/rocksdb.xml"]) + node.restart_clickhouse() + with pytest.raises(QueryRuntimeException): + node.query(""" + CREATE TABLE test (key UInt64, value String) Engine=EmbeddedRocksDB PRIMARY KEY(key); + """) + node.exec_in_container(['bash', '-c', "sed -i 's/no_such_table_column_family_option/max_bytes_for_level_base/g' /etc/clickhouse-server/config.d/rocksdb.xml"]) + node.restart_clickhouse() diff --git a/tests/integration/test_send_crash_reports/test.py b/tests/integration/test_send_crash_reports/test.py index ab52879c7c3..55c63c3fe12 100644 --- a/tests/integration/test_send_crash_reports/test.py +++ b/tests/integration/test_send_crash_reports/test.py @@ -1,9 +1,14 @@ +# pylint: disable=redefined-outer-name +# pylint: disable=unused-argument +# pylint: disable=line-too-long +# pylint: disable=bare-except + import os import time +import pytest import helpers.cluster import helpers.test_tools -import pytest from . import fake_sentry_server @@ -20,7 +25,11 @@ def started_node(): cluster.start() yield node finally: - cluster.shutdown() + # It will print Fatal message after pkill -SEGV, suppress it + try: + cluster.shutdown() + except: + pass def test_send_segfault(started_node): @@ -30,7 +39,7 @@ def test_send_segfault(started_node): started_node.copy_file_to_container(os.path.join(SCRIPT_DIR, "fake_sentry_server.py"), "/fake_sentry_server.py") started_node.exec_in_container(["bash", "-c", "python3 /fake_sentry_server.py > /fake_sentry_server.log 2>&1"], detach=True, user="root") time.sleep(1) - started_node.exec_in_container(["bash", "-c", "pkill -11 clickhouse"], user="root") + started_node.exec_in_container(["bash", "-c", "pkill -SEGV clickhouse"], user="root") result = None for attempt in range(1, 6): @@ -38,9 +47,9 @@ def test_send_segfault(started_node): result = started_node.exec_in_container(['cat', fake_sentry_server.RESULT_PATH], user='root') if result == 'OK': break - elif result == 'INITIAL_STATE': + if result == 'INITIAL_STATE': continue - elif result: + if result: assert False, 'Unexpected state: ' + result assert result == 'OK', 'Crash report not sent' diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 698d2032d32..8883684730f 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -2161,7 +2161,7 @@ def test_kafka_no_holes_when_write_suffix_failed(kafka_cluster): # we have 0.25 (sleepEachRow) * 20 ( Rows ) = 5 sec window after "Polled batch of 20 messages" # while materialized view is working to inject zookeeper failure pm.drop_instance_zk_connections(instance) - instance.wait_for_log_line("Error.*(session has been expired|Connection loss).*while writing suffix to view") + instance.wait_for_log_line("Error.*(session has been expired|Connection loss).*while pushing to view") pm.heal_all() instance.wait_for_log_line("Committed offset 22") diff --git a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.reference b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.reference index 6e0517557ad..34c1c258e4b 100644 --- a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.reference +++ b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.reference @@ -3,7 +3,9 @@ b 1 c 1 a 1 +a 2 b 1 c 1 +c 2 1 diff --git a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh index 3c6a61b958b..472dad59710 100755 --- a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh +++ b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh @@ -25,7 +25,7 @@ if ${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (2)" 2>/dev/null; then fi echo -${CLICKHOUSE_CLIENT} --query "SELECT _table, d FROM merge('${CLICKHOUSE_DATABASE}', '^[abc]\$') ORDER BY _table" +${CLICKHOUSE_CLIENT} --query "SELECT _table, d FROM merge('${CLICKHOUSE_DATABASE}', '^[abc]\$') ORDER BY _table, d" ${CLICKHOUSE_CLIENT} --query "DROP TABLE root" ${CLICKHOUSE_CLIENT} --query "DROP TABLE a" diff --git a/tests/queries/0_stateless/01158_zookeeper_log_long.sql b/tests/queries/0_stateless/01158_zookeeper_log_long.sql index f067ce15079..61a36df68d8 100644 --- a/tests/queries/0_stateless/01158_zookeeper_log_long.sql +++ b/tests/queries/0_stateless/01158_zookeeper_log_long.sql @@ -4,7 +4,7 @@ drop table if exists rmt; -- cleanup code will perform extra Exists -- (so the .reference will not match) -create table rmt (n int) engine=ReplicatedMergeTree('/test/01158/{database}/rmt', '1') order by n settings cleanup_delay_period=86400; +create table rmt (n int) engine=ReplicatedMergeTree('/test/01158/{database}/rmt', '1') order by n settings cleanup_delay_period=86400, replicated_can_become_leader=0; system sync replica rmt; insert into rmt values (1); insert into rmt values (1); diff --git a/tests/queries/0_stateless/01160_table_dependencies.reference b/tests/queries/0_stateless/01160_table_dependencies.reference index 39a58b06076..a893d4882df 100644 --- a/tests/queries/0_stateless/01160_table_dependencies.reference +++ b/tests/queries/0_stateless/01160_table_dependencies.reference @@ -2,5 +2,6 @@ dict1 dict2 dict_src join +mv s t diff --git a/tests/queries/0_stateless/01160_table_dependencies.sh b/tests/queries/0_stateless/01160_table_dependencies.sh index 149439f2981..05d086ae1a4 100755 --- a/tests/queries/0_stateless/01160_table_dependencies.sh +++ b/tests/queries/0_stateless/01160_table_dependencies.sh @@ -30,6 +30,8 @@ $CLICKHOUSE_CLIENT -q "create table s (x default joinGet($CLICKHOUSE_DATABASE.jo $CLICKHOUSE_CLIENT -q "create table t (n int, m int default joinGet($CLICKHOUSE_DATABASE.join, 'm', 42::int), s String default dictGet($CLICKHOUSE_DATABASE.dict1, 's', 42::UInt64), x default in(1, $CLICKHOUSE_DATABASE.s)) engine=MergeTree order by n;" +$CLICKHOUSE_CLIENT -q "create materialized view mv to s as select n from t where n in (select n from join)" + CLICKHOUSE_CLIENT_DEFAULT_DB=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--database=${CLICKHOUSE_DATABASE}"'/--database=default/g') for _ in {1..10}; do diff --git a/tests/queries/0_stateless/01200_mutations_memory_consumption.sql b/tests/queries/0_stateless/01200_mutations_memory_consumption.sql index 6947c327adc..d3eb5dd165c 100644 --- a/tests/queries/0_stateless/01200_mutations_memory_consumption.sql +++ b/tests/queries/0_stateless/01200_mutations_memory_consumption.sql @@ -1,4 +1,4 @@ --- Tags: no-debug, no-parallel +-- Tags: no-debug, no-parallel, no-fasttest DROP TABLE IF EXISTS table_with_single_pk; diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 5c493dce98a..d347f149230 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -15,6 +15,7 @@ ALTER CLEAR COLUMN ['CLEAR COLUMN'] COLUMN ALTER COLUMN ALTER RENAME COLUMN ['RENAME COLUMN'] COLUMN ALTER COLUMN ALTER MATERIALIZE COLUMN ['MATERIALIZE COLUMN'] COLUMN ALTER COLUMN ALTER COLUMN [] \N ALTER TABLE +ALTER MODIFY COMMENT ['MODIFY COMMENT'] TABLE ALTER TABLE ALTER ORDER BY ['ALTER MODIFY ORDER BY','MODIFY ORDER BY'] TABLE ALTER INDEX ALTER SAMPLE BY ['ALTER MODIFY SAMPLE BY','MODIFY SAMPLE BY'] TABLE ALTER INDEX ALTER ADD INDEX ['ADD INDEX'] TABLE ALTER INDEX diff --git a/tests/queries/0_stateless/01410_nullable_key_and_index_negate_cond.reference b/tests/queries/0_stateless/01410_nullable_key_and_index_negate_cond.reference new file mode 100644 index 00000000000..f735f1378b6 --- /dev/null +++ b/tests/queries/0_stateless/01410_nullable_key_and_index_negate_cond.reference @@ -0,0 +1,3 @@ +s s s +s s s +s s s diff --git a/tests/queries/0_stateless/01410_nullable_key_and_index_negate_cond.sql b/tests/queries/0_stateless/01410_nullable_key_and_index_negate_cond.sql new file mode 100644 index 00000000000..3d1e3177173 --- /dev/null +++ b/tests/queries/0_stateless/01410_nullable_key_and_index_negate_cond.sql @@ -0,0 +1,16 @@ +drop table if exists test_23634; + +set force_primary_key=1; + +CREATE TABLE test_23634 (id Nullable(String), s Nullable(String), s1 Nullable(String)) +ENGINE = MergeTree() ORDER BY (id,s) SETTINGS allow_nullable_key = 1; + +INSERT into test_23634 values ('s','s','s'), (null,'s1','s1'), (null,null,'s2'), (null,null,null); + +select * from test_23634 where id !=''; + +select * from test_23634 where id !='' and s != ''; + +select * from test_23634 where id !='' and s != '' and s1 != ''; + +drop table test_23634; diff --git a/tests/queries/0_stateless/01710_projection_vertical_merges.reference b/tests/queries/0_stateless/01710_projection_vertical_merges.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01710_projection_vertical_merges.sql b/tests/queries/0_stateless/01710_projection_vertical_merges.sql new file mode 100644 index 00000000000..d54fef7e71d --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_vertical_merges.sql @@ -0,0 +1,19 @@ +-- Tags: long, no-parallel + +drop table if exists t; + +create table t (c1 Int64, c2 String, c3 DateTime, c4 Int8, c5 String, c6 String, c7 String, c8 String, c9 String, c10 String, c11 String, c12 String, c13 Int8, c14 Int64, c15 String, c16 String, c17 String, c18 Int64, c19 Int64, c20 Int64) engine MergeTree order by c18; + +insert into t (c1, c18) select number, -number from numbers(2000000); + +alter table t add projection p_norm (select * order by c1); + +optimize table t final; + +alter table t materialize projection p_norm settings mutations_sync = 1; + +set allow_experimental_projection_optimization = 1, max_rows_to_read = 3; + +select c18 from t where c1 < 0; + +drop table t; diff --git a/tests/queries/0_stateless/02030_client_unknown_database.reference b/tests/queries/0_stateless/02030_client_unknown_database.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02030_client_unknown_database.sh b/tests/queries/0_stateless/02030_client_unknown_database.sh new file mode 100755 index 00000000000..28bd4895a23 --- /dev/null +++ b/tests/queries/0_stateless/02030_client_unknown_database.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CLICKHOUSE_DATABASE=no_such_database_could_exist + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "SELECT 1" |& grep -q UNKNOWN_DATABASE diff --git a/tests/queries/0_stateless/02030_rocksdb_race_long.reference b/tests/queries/0_stateless/02030_rocksdb_race_long.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02030_rocksdb_race_long.sh b/tests/queries/0_stateless/02030_rocksdb_race_long.sh new file mode 100755 index 00000000000..88c30852c86 --- /dev/null +++ b/tests/queries/0_stateless/02030_rocksdb_race_long.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# Tags: race + +unset CLICKHOUSE_LOG_COMMENT + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -o errexit +set -o pipefail + +echo " + DROP TABLE IF EXISTS rocksdb_race; + CREATE TABLE rocksdb_race (key String, value UInt32) Engine=EmbeddedRocksDB PRIMARY KEY(key); + INSERT INTO rocksdb_race SELECT '1_' || toString(number), number FROM numbers(100000); +" | $CLICKHOUSE_CLIENT -n + +function read_stat_thread() +{ + while true; do + echo " + SELECT * FROM system.rocksdb FORMAT Null; + " | $CLICKHOUSE_CLIENT -n + done +} + +function truncate_thread() +{ + while true; do + sleep 3s; + echo " + TRUNCATE TABLE rocksdb_race; + " | $CLICKHOUSE_CLIENT -n + done +} + +# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout +export -f read_stat_thread; +export -f truncate_thread; + +TIMEOUT=20 + +timeout $TIMEOUT bash -c read_stat_thread 2> /dev/null & +timeout $TIMEOUT bash -c truncate_thread 2> /dev/null & + +wait + +$CLICKHOUSE_CLIENT -q "DROP TABLE rocksdb_race" diff --git a/tests/queries/0_stateless/2020_alter_table_modify_comment.reference b/tests/queries/0_stateless/2020_alter_table_modify_comment.reference new file mode 100644 index 00000000000..29ade427196 --- /dev/null +++ b/tests/queries/0_stateless/2020_alter_table_modify_comment.reference @@ -0,0 +1,144 @@ +engine : Null +initial comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = Null\nCOMMENT \'Test table with comment\' +comment= Test table with comment + +change a comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = Null\nCOMMENT \'new comment on a table\' +comment= new comment on a table + +remove a comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = Null +comment= + +add a comment back +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = Null\nCOMMENT \'another comment on a table\' +comment= another comment on a table + +detach table +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = Null\nCOMMENT \'another comment on a table\' + +re-attach table +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = Null\nCOMMENT \'another comment on a table\' +comment= another comment on a table + +engine : Memory +initial comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = Memory\nCOMMENT \'Test table with comment\' +comment= Test table with comment + +change a comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = Memory\nCOMMENT \'new comment on a table\' +comment= new comment on a table + +remove a comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = Memory +comment= + +add a comment back +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = Memory\nCOMMENT \'another comment on a table\' +comment= another comment on a table + +detach table +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = Memory\nCOMMENT \'another comment on a table\' + +re-attach table +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = Memory\nCOMMENT \'another comment on a table\' +comment= another comment on a table + +engine : MergeTree() ORDER BY k +initial comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = MergeTree\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' +comment= Test table with comment + +change a comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = MergeTree\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'new comment on a table\' +comment= new comment on a table + +remove a comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = MergeTree\nORDER BY k\nSETTINGS index_granularity = 8192 +comment= + +add a comment back +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = MergeTree\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'another comment on a table\' +comment= another comment on a table + +detach table +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = MergeTree\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'another comment on a table\' + +re-attach table +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = MergeTree\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'another comment on a table\' +comment= another comment on a table + +engine : Log +initial comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = Log\nCOMMENT \'Test table with comment\' +comment= Test table with comment + +change a comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = Log\nCOMMENT \'new comment on a table\' +comment= new comment on a table + +remove a comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = Log +comment= + +add a comment back +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = Log\nCOMMENT \'another comment on a table\' +comment= another comment on a table + +detach table +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = Log\nCOMMENT \'another comment on a table\' + +re-attach table +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = Log\nCOMMENT \'another comment on a table\' +comment= another comment on a table + +engine : TinyLog +initial comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = TinyLog\nCOMMENT \'Test table with comment\' +comment= Test table with comment + +change a comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = TinyLog\nCOMMENT \'new comment on a table\' +comment= new comment on a table + +remove a comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = TinyLog +comment= + +add a comment back +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = TinyLog\nCOMMENT \'another comment on a table\' +comment= another comment on a table + +detach table +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = TinyLog\nCOMMENT \'another comment on a table\' + +re-attach table +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = TinyLog\nCOMMENT \'another comment on a table\' +comment= another comment on a table + +engine : ReplicatedMergeTree('/clickhouse/2020_alter_table_modify_comment_default', '1') ORDER BY k +initial comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/2020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' +comment= Test table with comment + +change a comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/2020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' +comment= Test table with comment + +remove a comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/2020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' +comment= Test table with comment + +add a comment back +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/2020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' +comment= Test table with comment + +detach table +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/2020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' + +re-attach table +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/2020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' +comment= Test table with comment + diff --git a/tests/queries/0_stateless/2020_alter_table_modify_comment.sh b/tests/queries/0_stateless/2020_alter_table_modify_comment.sh new file mode 100755 index 00000000000..c674f21034c --- /dev/null +++ b/tests/queries/0_stateless/2020_alter_table_modify_comment.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function get_table_comment_info() +{ + $CLICKHOUSE_CLIENT --query="SHOW CREATE TABLE comment_test_table;" + $CLICKHOUSE_CLIENT --query="SELECT 'comment=', comment FROM system.tables WHERE database=currentDatabase() and name='comment_test_table'" + echo # just a newline +} + +function test_table_comments() +{ + local ENGINE_NAME="$1" + echo "engine : ${ENGINE_NAME}" + + $CLICKHOUSE_CLIENT -nm <