mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Merge branch 'master' into aku/perf
This commit is contained in:
commit
6317208add
@ -1,9 +1,9 @@
|
||||
# This strings autochanged from release_lib.sh:
|
||||
SET(VERSION_REVISION 54447)
|
||||
SET(VERSION_REVISION 54448)
|
||||
SET(VERSION_MAJOR 21)
|
||||
SET(VERSION_MINOR 2)
|
||||
SET(VERSION_MINOR 3)
|
||||
SET(VERSION_PATCH 1)
|
||||
SET(VERSION_GITHASH 53d0c9fa7255aa1dc48991d19f4246ff71cc2fd7)
|
||||
SET(VERSION_DESCRIBE v21.2.1.1-prestable)
|
||||
SET(VERSION_STRING 21.2.1.1)
|
||||
SET(VERSION_GITHASH ef72ba7349f230321750c13ee63b49a11a7c0adc)
|
||||
SET(VERSION_DESCRIBE v21.3.1.1-prestable)
|
||||
SET(VERSION_STRING 21.3.1.1)
|
||||
# end of autochange
|
||||
|
4
debian/changelog
vendored
4
debian/changelog
vendored
@ -1,5 +1,5 @@
|
||||
clickhouse (21.2.1.1) unstable; urgency=low
|
||||
clickhouse (21.3.1.1) unstable; urgency=low
|
||||
|
||||
* Modified source code
|
||||
|
||||
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 11 Jan 2021 11:12:08 +0300
|
||||
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 01 Feb 2021 12:50:53 +0300
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=21.2.1.*
|
||||
ARG version=21.3.1.*
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install --yes --no-install-recommends \
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:20.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=21.2.1.*
|
||||
ARG version=21.3.1.*
|
||||
ARG gosu_ver=1.10
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=21.2.1.*
|
||||
ARG version=21.3.1.*
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y apt-transport-https dirmngr && \
|
||||
|
@ -40,7 +40,7 @@ $ cd ClickHouse
|
||||
``` bash
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
$ cmake ..-DCMAKE_C_COMPILER=`brew --prefix llvm`/bin/clang -DCMAKE_CXX_COMPILER=`brew --prefix llvm`/bin/clang++ -DCMAKE_PREFIX_PATH=`brew --prefix llvm`
|
||||
$ cmake .. -DCMAKE_C_COMPILER=`brew --prefix llvm`/bin/clang -DCMAKE_CXX_COMPILER=`brew --prefix llvm`/bin/clang++ -DCMAKE_PREFIX_PATH=`brew --prefix llvm`
|
||||
$ ninja
|
||||
$ cd ..
|
||||
```
|
||||
|
@ -45,7 +45,10 @@ ORDER BY expr
|
||||
[PARTITION BY expr]
|
||||
[PRIMARY KEY expr]
|
||||
[SAMPLE BY expr]
|
||||
[TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...]
|
||||
[TTL expr
|
||||
[DELETE|TO DISK 'xxx'|TO VOLUME 'xxx' [, ...] ]
|
||||
[WHERE conditions]
|
||||
[GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ]
|
||||
[SETTINGS name=value, ...]
|
||||
```
|
||||
|
||||
@ -80,7 +83,7 @@ For a description of parameters, see the [CREATE query description](../../../sql
|
||||
Expression must have one `Date` or `DateTime` column as a result. Example:
|
||||
`TTL date + INTERVAL 1 DAY`
|
||||
|
||||
Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`). Default type of the rule is removal (`DELETE`). List of multiple rules can specified, but there should be no more than one `DELETE` rule.
|
||||
Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`), or aggregating values in expired rows. Default type of the rule is removal (`DELETE`). List of multiple rules can specified, but there should be no more than one `DELETE` rule.
|
||||
|
||||
For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl)
|
||||
|
||||
@ -455,18 +458,28 @@ ALTER TABLE example_table
|
||||
Table can have an expression for removal of expired rows, and multiple expressions for automatic move of parts between [disks or volumes](#table_engine-mergetree-multiple-volumes). When rows in the table expire, ClickHouse deletes all corresponding rows. For parts moving feature, all rows of a part must satisfy the movement expression criteria.
|
||||
|
||||
``` sql
|
||||
TTL expr [DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'], ...
|
||||
TTL expr
|
||||
[DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ...
|
||||
[WHERE conditions]
|
||||
[GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ]
|
||||
```
|
||||
|
||||
Type of TTL rule may follow each TTL expression. It affects an action which is to be done once the expression is satisfied (reaches current time):
|
||||
|
||||
- `DELETE` - delete expired rows (default action);
|
||||
- `TO DISK 'aaa'` - move part to the disk `aaa`;
|
||||
- `TO VOLUME 'bbb'` - move part to the disk `bbb`.
|
||||
- `TO VOLUME 'bbb'` - move part to the disk `bbb`;
|
||||
- `GROUP BY` - aggregate expired rows.
|
||||
|
||||
Examples:
|
||||
With `WHERE` clause you may specify which of the expired rows to delete or aggregate (it cannot be applied to moves).
|
||||
|
||||
Creating a table with TTL
|
||||
`GROUP BY` expression must be a prefix of the table primary key.
|
||||
|
||||
If a column is not part of the `GROUP BY` expression and is not set explicitely in the `SET` clause, in result row it contains an occasional value from the grouped rows (as if aggregate function `any` is applied to it).
|
||||
|
||||
**Examples**
|
||||
|
||||
Creating a table with TTL:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE example_table
|
||||
@ -482,13 +495,43 @@ TTL d + INTERVAL 1 MONTH [DELETE],
|
||||
d + INTERVAL 2 WEEK TO DISK 'bbb';
|
||||
```
|
||||
|
||||
Altering TTL of the table
|
||||
Altering TTL of the table:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE example_table
|
||||
MODIFY TTL d + INTERVAL 1 DAY;
|
||||
```
|
||||
|
||||
Creating a table, where the rows are expired after one month. The expired rows where dates are Mondays are deleted:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE table_with_where
|
||||
(
|
||||
d DateTime,
|
||||
a Int
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PARTITION BY toYYYYMM(d)
|
||||
ORDER BY d
|
||||
TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1;
|
||||
```
|
||||
|
||||
Creating a table, where expired rows are aggregated. In result rows `x` contains the maximum value accross the grouped rows, `y` — the minimum value, and `d` — any occasional value from grouped rows.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE table_for_aggregation
|
||||
(
|
||||
d DateTime,
|
||||
k1 Int,
|
||||
k2 Int,
|
||||
x Int,
|
||||
y Int
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY k1, k2
|
||||
TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
|
||||
```
|
||||
|
||||
**Removing Data**
|
||||
|
||||
Data with an expired TTL is removed when ClickHouse merges data parts.
|
||||
|
@ -8,118 +8,118 @@ toc_title: Adopters
|
||||
!!! warning "Disclaimer"
|
||||
The following list of companies using ClickHouse and their success stories is assembled from public sources, thus might differ from current reality. We’d appreciate it if you share the story of adopting ClickHouse in your company and [add it to the list](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), but please make sure you won’t have any NDA issues by doing so. Providing updates with publications from other companies is also useful.
|
||||
|
||||
| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size<abbr title="of single replica"><sup>\*</sup></abbr> | Reference |
|
||||
|------------------------------------------------------------------------------------------------|---------------------------------|-----------------------|------------------------------------------------------------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| <a href="https://2gis.ru" class="favicon">2gis</a> | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) |
|
||||
| <a href="https://getadmiral.com/" class="favicon">Admiral</a> | Martech | Engagement Management | — | — | [Webinar Slides, June 2020](https://altinity.com/presentations/2020/06/16/big-data-in-real-time-how-clickhouse-powers-admirals-visitor-relationships-for-publishers) |
|
||||
| <a href="https://cn.aliyun.com/" class="favicon">Alibaba Cloud</a> | Cloud | Managed Service | — | — | [Official Website](https://help.aliyun.com/product/144466.html) |
|
||||
| <a href="https://alohabrowser.com/" class="favicon">Aloha Browser</a> | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf) |
|
||||
| <a href="https://amadeus.com/" class="favicon">Amadeus</a> | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) |
|
||||
| <a href="https://www.appsflyer.com" class="favicon">Appsflyer</a> | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) |
|
||||
| <a href="https://arenadata.tech/" class="favicon">ArenaData</a> | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) |
|
||||
| <a href="https://avito.ru/" class="favicon">Avito</a> | Classifieds | Monitoring | — | — | [Meetup, April 2020](https://www.youtube.com/watch?v=n1tm4j4W8ZQ) |
|
||||
| <a href="https://badoo.com" class="favicon">Badoo</a> | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) |
|
||||
| <a href="https://www.benocs.com/" class="favicon">Benocs</a> | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) |
|
||||
| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size<abbr title="of single replica"><sup>\*</sup></abbr> | Reference |
|
||||
|---------|----------|---------|--------------|------------------------------------------------------------------------------|-----------|
|
||||
| <a href="https://2gis.ru" class="favicon">2gis</a> | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) |
|
||||
| <a href="https://getadmiral.com/" class="favicon">Admiral</a> | Martech | Engagement Management | — | — | [Webinar Slides, June 2020](https://altinity.com/presentations/2020/06/16/big-data-in-real-time-how-clickhouse-powers-admirals-visitor-relationships-for-publishers) |
|
||||
| <a href="https://cn.aliyun.com/" class="favicon">Alibaba Cloud</a> | Cloud | Managed Service | — | — | [Official Website](https://help.aliyun.com/product/144466.html) |
|
||||
| <a href="https://alohabrowser.com/" class="favicon">Aloha Browser</a> | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf) |
|
||||
| <a href="https://amadeus.com/" class="favicon">Amadeus</a> | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) |
|
||||
| <a href="https://www.appsflyer.com" class="favicon">Appsflyer</a> | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) |
|
||||
| <a href="https://arenadata.tech/" class="favicon">ArenaData</a> | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) |
|
||||
| <a href="https://avito.ru/" class="favicon">Avito</a> | Classifieds | Monitoring | — | — | [Meetup, April 2020](https://www.youtube.com/watch?v=n1tm4j4W8ZQ) |
|
||||
| <a href="https://badoo.com" class="favicon">Badoo</a> | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) |
|
||||
| <a href="https://www.benocs.com/" class="favicon">Benocs</a> | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) |
|
||||
| <a href="https://www.bigo.sg/" class="favicon">BIGO</a> | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) |
|
||||
| <a href="https://www.bloomberg.com/" class="favicon">Bloomberg</a> | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
|
||||
| <a href="https://bloxy.info" class="favicon">Bloxy</a> | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) |
|
||||
| <a href="https://www.bytedance.com" class="favicon">Bytedance</a> | Social platforms | — | — | — | [The ClickHouse Meetup East, October 2020](https://www.youtube.com/watch?v=ckChUkC3Pns) |
|
||||
| <a href="https://www.bloomberg.com/" class="favicon">Bloomberg</a> | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
|
||||
| <a href="https://bloxy.info" class="favicon">Bloxy</a> | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) |
|
||||
| <a href="https://www.bytedance.com" class="favicon">Bytedance</a> | Social platforms | — | — | — | [The ClickHouse Meetup East, October 2020](https://www.youtube.com/watch?v=ckChUkC3Pns) |
|
||||
| <a href="https://cardsmobile.ru/" class="favicon">CardsMobile</a> | Finance | Analytics | — | — | [VC.ru](https://vc.ru/s/cardsmobile/143449-rukovoditel-gruppy-analiza-dannyh) |
|
||||
| <a href="https://carto.com/" class="favicon">CARTO</a> | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) |
|
||||
| <a href="http://public.web.cern.ch/public/" class="favicon">CERN</a> | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) |
|
||||
| <a href="http://cisco.com/" class="favicon">Cisco</a> | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) |
|
||||
| <a href="https://www.citadelsecurities.com/" class="favicon">Citadel Securities</a> | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) |
|
||||
| <a href="https://city-mobil.ru" class="favicon">Citymobil</a> | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) |
|
||||
| <a href="https://cloudflare.com" class="favicon">Cloudflare</a> | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) |
|
||||
| <a href="https://corporate.comcast.com/" class="favicon">Comcast</a> | Media | CDN Traffic Analysis | — | — | [ApacheCon 2019 Talk](https://www.youtube.com/watch?v=e9TZ6gFDjNg) |
|
||||
| <a href="https://contentsquare.com" class="favicon">ContentSquare</a> | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) |
|
||||
| <a href="https://coru.net/" class="favicon">Corunet</a> | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) |
|
||||
| <a href="https://www.creditx.com" class="favicon">CraiditX 氪信</a> | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) |
|
||||
| <a href="https://crazypanda.ru/en/" class="favicon">Crazypanda</a> | Games | | — | — | Live session on ClickHouse meetup |
|
||||
| <a href="https://www.criteo.com/" class="favicon">Criteo</a> | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) |
|
||||
| <a href="https://www.chinatelecomglobal.com/" class="favicon">Dataliance for China Telecom</a> | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) |
|
||||
| <a href="https://db.com" class="favicon">Deutsche Bank</a> | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) |
|
||||
| <a href="https://deeplay.io/eng/" class="favicon">Deeplay</a> | Gaming Analytics | — | — | — | [Job advertisement, 2020](https://career.habr.com/vacancies/1000062568) |
|
||||
| <a href="https://www.diva-e.com" class="favicon">Diva-e</a> | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) |
|
||||
| <a href="https://www.ecwid.com/" class="favicon">Ecwid</a> | E-commerce SaaS | Metrics, Logging | — | — | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf) |
|
||||
| <a href="https://www.ebay.com/" class="favicon">eBay</a> | E-commerce | Logs, Metrics and Events | — | — | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/) |
|
||||
| <a href="https://www.exness.com" class="favicon">Exness</a> | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) |
|
||||
| <a href="https://fastnetmon.com/" class="favicon">FastNetMon</a> | DDoS Protection | Main Product | | — | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) |
|
||||
| <a href="https://www.flipkart.com/" class="favicon">Flipkart</a> | e-Commerce | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239) |
|
||||
| <a href="https://fun.co/rp" class="favicon">FunCorp</a> | Games | | — | — | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) |
|
||||
| <a href="https://geniee.co.jp" class="favicon">Geniee</a> | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) |
|
||||
| <a href="https://www.genotek.ru/" class="favicon">Genotek</a> | Bioinformatics | Main product | — | — | [Video, August 2020](https://youtu.be/v3KyZbz9lEE) |
|
||||
| <a href="https://www.huya.com/" class="favicon">HUYA</a> | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) |
|
||||
| <a href="https://www.the-ica.com/" class="favicon">ICA</a> | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) |
|
||||
| <a href="https://www.idealista.com" class="favicon">Idealista</a> | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) |
|
||||
| <a href="https://www.infovista.com/" class="favicon">Infovista</a> | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) |
|
||||
| <a href="https://www.innogames.com" class="favicon">InnoGames</a> | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) |
|
||||
| <a href="https://www.instana.com" class="favicon">Instana</a> | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) |
|
||||
| <a href="https://integros.com" class="favicon">Integros</a> | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) |
|
||||
| <a href="https://ippon.tech" class="favicon">Ippon Technologies</a> | Technology Consulting | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=205) |
|
||||
| <a href="https://www.ivi.ru/" class="favicon">Ivi</a> | Online Cinema | Analytics, Monitoring | — | — | [Article in Russian, Jan 2018](https://habr.com/en/company/ivi/blog/347408/) |
|
||||
| <a href="https://jinshuju.net" class="favicon">Jinshuju 金数据</a> | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) |
|
||||
| <a href="https://www.kodiakdata.com/" class="favicon">Kodiak Data</a> | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) |
|
||||
| <a href="https://kontur.ru" class="favicon">Kontur</a> | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) |
|
||||
| <a href="https://www.kuaishou.com/" class="favicon">Kuaishou</a> | Video | — | — | — | [ClickHouse Meetup, October 2018](https://clickhouse.tech/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/) |
|
||||
| <a href="https://www.lbl.gov" class="favicon">Lawrence Berkeley National Laboratory</a> | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) |
|
||||
| <a href="https://lifestreet.com/" class="favicon">LifeStreet</a> | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) |
|
||||
| <a href="https://mcs.mail.ru/" class="favicon">Mail.ru Cloud Solutions</a> | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) |
|
||||
| <a href="https://tech.mymarilyn.ru" class="favicon">Marilyn</a> | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) |
|
||||
| <a href="https://mellodesign.ru/" class="favicon">Mello</a> | Marketing | Analytics | 1 server | — | [Article, Oct 2020](https://vc.ru/marketing/166180-razrabotka-tipovogo-otcheta-skvoznoy-analitiki) |
|
||||
| <a href="https://www.messagebird.com" class="favicon">MessageBird</a> | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) |
|
||||
| <a href="https://www.mindsdb.com/" class="favicon">MindsDB</a> | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) |x
|
||||
| <a href="https://mux.com/" class="favicon">MUX</a> | Online Video | Video Analytics | — | — | [Talk in English, August 2019](https://altinity.com/presentations/2019/8/13/how-clickhouse-became-the-default-analytics-database-for-mux/) |
|
||||
| <a href="https://www.mgid.com/" class="favicon">MGID</a> | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) |
|
||||
| <a href="https://getnoc.com/" class="favicon">NOC Project</a> | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) |
|
||||
| <a href="https://www.nuna.com/" class="favicon">Nuna Inc.</a> | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) |
|
||||
| <a href="https://www.oneapm.com/" class="favicon">OneAPM</a> | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
|
||||
| <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a> | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
|
||||
| <a href="https://www.percona.com/" class="favicon">Percona</a> | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) |
|
||||
| <a href="https://plausible.io/" class="favicon">Plausible</a> | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) |
|
||||
| <a href="https://posthog.com/" class="favicon">PostHog</a> | Product Analytics | Main Product | — | — | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0) |
|
||||
| <a href="https://postmates.com/" class="favicon">Postmates</a> | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) |
|
||||
| <a href="http://www.pragma-innovation.fr/" class="favicon">Pragma Innovation</a> | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) |
|
||||
| <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
|
||||
| <a href="https://qrator.net" class="favicon">Qrator</a> | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) |
|
||||
| <a href="https://www.rbinternational.com/" class="favicon">Raiffeisenbank</a> | Banking | Analytics | — | — | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html) |
|
||||
| <a href="https://rambler.ru" class="favicon">Rambler</a> | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) |
|
||||
| <a href="https://retell.cc/" class="favicon">Retell</a> | Speech synthesis | Analytics | — | — | [Blog Article, August 2020](https://vc.ru/services/153732-kak-sozdat-audiostati-na-vashem-sayte-i-zachem-eto-nuzhno) |
|
||||
| <a href="https://rspamd.com/" class="favicon">Rspamd</a> | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html) |
|
||||
| <a href="https://rusiem.com/en" class="favicon">RuSIEM</a> | SIEM | Main Product | — | — | [Official Website](https://rusiem.com/en/products/architecture) |
|
||||
| <a href="https://www.s7.ru" class="favicon">S7 Airlines</a> | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) |
|
||||
| <a href="https://www.scireum.de/" class="favicon">scireum GmbH</a> | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) |
|
||||
| <a href="https://segment.com/" class="favicon">Segment</a> | Data processing | Main product | 9 * i3en.3xlarge nodes 7.5TB NVME SSDs, 96GB Memory, 12 vCPUs | — | [Slides, 2019](https://slides.com/abraithwaite/segment-clickhouse) |
|
||||
| <a href="https://www.semrush.com/" class="favicon">SEMrush</a> | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) |
|
||||
| <a href="https://sentry.io/" class="favicon">Sentry</a> | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) |
|
||||
| <a href="https://seo.do/" class="favicon">seo.do</a> | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) |
|
||||
| <a href="http://www.sgk.gov.tr/wps/portal/sgk/tr" class="favicon">SGK</a> | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) |
|
||||
| <a href="http://english.sina.com/index.html" class="favicon">Sina</a> | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) |
|
||||
| <a href="https://smi2.ru/" class="favicon">SMI2</a> | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) |
|
||||
| <a href="https://www.splunk.com/" class="favicon">Splunk</a> | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) |
|
||||
| <a href="https://www.spotify.com" class="favicon">Spotify</a> | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) |
|
||||
| <a href="https://www.staffcop.ru/" class="favicon">Staffcop</a> | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) |
|
||||
| <a href="https://www.suning.com/" class="favicon">Suning</a> | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) |
|
||||
| <a href="https://www.teralytics.net/" class="favicon">Teralytics</a> | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) |
|
||||
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) |
|
||||
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |
|
||||
| <a href="https://www.tencentmusic.com/" class="favicon">Tencent Music Entertainment (TME)</a> | BigData | Data processing | — | — | [Blog in Chinese, June 2020](https://cloud.tencent.com/developer/article/1637840) |
|
||||
| <a href="https://trafficstars.com/" class="favicon">Traffic Stars</a> | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) |
|
||||
| <a href="https://www.uber.com" class="favicon">Uber</a> | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) |
|
||||
| <a href="https://vk.com" class="favicon">VKontakte</a> | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) |
|
||||
| <a href="https://www.walmartlabs.com/" class="favicon">Walmart Labs</a> | Internet, Retail | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144) |
|
||||
| <a href="https://wargaming.com/en/" class="favicon">Wargaming</a> | Games | | — | — | [Interview](https://habr.com/en/post/496954/) |
|
||||
| <a href="https://wisebits.com/" class="favicon">Wisebits</a> | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) |
|
||||
| <a href="https://www.workato.com/" class="favicon">Workato</a> | Automation Software | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=334) |
|
||||
| <a href="http://www.xiaoxintech.cn/" class="favicon">Xiaoxin Tech</a> | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) |
|
||||
| <a href="https://www.ximalaya.com/" class="favicon">Ximalaya</a> | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) |
|
||||
| <a href="https://cloud.yandex.ru/services/managed-clickhouse" class="favicon">Yandex Cloud</a> | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) |
|
||||
| <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a> | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) |
|
||||
| <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a> | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
|
||||
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) |
|
||||
| <a href="https://htc-cs.ru/" class="favicon">ЦВТ</a> | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) |
|
||||
| <a href="https://mkb.ru/" class="favicon">МКБ</a> | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) |
|
||||
| <a href="https://cft.ru/" class="favicon">ЦФТ</a> | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) |
|
||||
| <a href="https://www.kakaocorp.com/" class="favicon">kakaocorp</a> | Internet company | — | — | — | [if(kakao)2020 conference](https://if.kakao.com/session/117) |
|
||||
| <a href="https://carto.com/" class="favicon">CARTO</a> | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) |
|
||||
| <a href="http://public.web.cern.ch/public/" class="favicon">CERN</a> | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) |
|
||||
| <a href="http://cisco.com/" class="favicon">Cisco</a> | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) |
|
||||
| <a href="https://www.citadelsecurities.com/" class="favicon">Citadel Securities</a> | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) |
|
||||
| <a href="https://city-mobil.ru" class="favicon">Citymobil</a> | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) |
|
||||
| <a href="https://cloudflare.com" class="favicon">Cloudflare</a> | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) |
|
||||
| <a href="https://corporate.comcast.com/" class="favicon">Comcast</a> | Media | CDN Traffic Analysis | — | — | [ApacheCon 2019 Talk](https://www.youtube.com/watch?v=e9TZ6gFDjNg) |
|
||||
| <a href="https://contentsquare.com" class="favicon">ContentSquare</a> | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) |
|
||||
| <a href="https://coru.net/" class="favicon">Corunet</a> | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) |
|
||||
| <a href="https://www.creditx.com" class="favicon">CraiditX 氪信</a> | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) |
|
||||
| <a href="https://crazypanda.ru/en/" class="favicon">Crazypanda</a> | Games | | — | — | Live session on ClickHouse meetup |
|
||||
| <a href="https://www.criteo.com/" class="favicon">Criteo</a> | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) |
|
||||
| <a href="https://www.chinatelecomglobal.com/" class="favicon">Dataliance for China Telecom</a> | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) |
|
||||
| <a href="https://db.com" class="favicon">Deutsche Bank</a> | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) |
|
||||
| <a href="https://deeplay.io/eng/" class="favicon">Deeplay</a> | Gaming Analytics | — | — | — | [Job advertisement, 2020](https://career.habr.com/vacancies/1000062568) |
|
||||
| <a href="https://www.diva-e.com" class="favicon">Diva-e</a> | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) |
|
||||
| <a href="https://www.ecwid.com/" class="favicon">Ecwid</a> | E-commerce SaaS | Metrics, Logging | — | — | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf) |
|
||||
| <a href="https://www.ebay.com/" class="favicon">eBay</a> | E-commerce | Logs, Metrics and Events | — | — | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/) |
|
||||
| <a href="https://www.exness.com" class="favicon">Exness</a> | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) |
|
||||
| <a href="https://fastnetmon.com/" class="favicon">FastNetMon</a> | DDoS Protection | Main Product | | — | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) |
|
||||
| <a href="https://www.flipkart.com/" class="favicon">Flipkart</a> | e-Commerce | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239) |
|
||||
| <a href="https://fun.co/rp" class="favicon">FunCorp</a> | Games | | — | — | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) |
|
||||
| <a href="https://geniee.co.jp" class="favicon">Geniee</a> | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) |
|
||||
| <a href="https://www.genotek.ru/" class="favicon">Genotek</a> | Bioinformatics | Main product | — | — | [Video, August 2020](https://youtu.be/v3KyZbz9lEE) |
|
||||
| <a href="https://www.huya.com/" class="favicon">HUYA</a> | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) |
|
||||
| <a href="https://www.the-ica.com/" class="favicon">ICA</a> | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) |
|
||||
| <a href="https://www.idealista.com" class="favicon">Idealista</a> | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) |
|
||||
| <a href="https://www.infovista.com/" class="favicon">Infovista</a> | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) |
|
||||
| <a href="https://www.innogames.com" class="favicon">InnoGames</a> | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) |
|
||||
| <a href="https://www.instana.com" class="favicon">Instana</a> | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) |
|
||||
| <a href="https://integros.com" class="favicon">Integros</a> | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) |
|
||||
| <a href="https://ippon.tech" class="favicon">Ippon Technologies</a> | Technology Consulting | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=205) |
|
||||
| <a href="https://www.ivi.ru/" class="favicon">Ivi</a> | Online Cinema | Analytics, Monitoring | — | — | [Article in Russian, Jan 2018](https://habr.com/en/company/ivi/blog/347408/) |
|
||||
| <a href="https://jinshuju.net" class="favicon">Jinshuju 金数据</a> | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) |
|
||||
| <a href="https://www.kodiakdata.com/" class="favicon">Kodiak Data</a> | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) |
|
||||
| <a href="https://kontur.ru" class="favicon">Kontur</a> | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) |
|
||||
| <a href="https://www.kuaishou.com/" class="favicon">Kuaishou</a> | Video | — | — | — | [ClickHouse Meetup, October 2018](https://clickhouse.tech/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/) |
|
||||
| <a href="https://www.lbl.gov" class="favicon">Lawrence Berkeley National Laboratory</a> | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) |
|
||||
| <a href="https://lifestreet.com/" class="favicon">LifeStreet</a> | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) |
|
||||
| <a href="https://mcs.mail.ru/" class="favicon">Mail.ru Cloud Solutions</a> | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) |
|
||||
| <a href="https://tech.mymarilyn.ru" class="favicon">Marilyn</a> | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) |
|
||||
| <a href="https://mellodesign.ru/" class="favicon">Mello</a> | Marketing | Analytics | 1 server | — | [Article, Oct 2020](https://vc.ru/marketing/166180-razrabotka-tipovogo-otcheta-skvoznoy-analitiki) |
|
||||
| <a href="https://www.messagebird.com" class="favicon">MessageBird</a> | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) |
|
||||
| <a href="https://www.mindsdb.com/" class="favicon">MindsDB</a> | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) |x
|
||||
| <a href="https://mux.com/" class="favicon">MUX</a> | Online Video | Video Analytics | — | — | [Talk in English, August 2019](https://altinity.com/presentations/2019/8/13/how-clickhouse-became-the-default-analytics-database-for-mux/) |
|
||||
| <a href="https://www.mgid.com/" class="favicon">MGID</a> | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) |
|
||||
| <a href="https://getnoc.com/" class="favicon">NOC Project</a> | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) |
|
||||
| <a href="https://www.nuna.com/" class="favicon">Nuna Inc.</a> | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) |
|
||||
| <a href="https://www.oneapm.com/" class="favicon">OneAPM</a> | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
|
||||
| <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a> | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
|
||||
| <a href="https://www.percona.com/" class="favicon">Percona</a> | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) |
|
||||
| <a href="https://plausible.io/" class="favicon">Plausible</a> | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) |
|
||||
| <a href="https://posthog.com/" class="favicon">PostHog</a> | Product Analytics | Main Product | — | — | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0) |
|
||||
| <a href="https://postmates.com/" class="favicon">Postmates</a> | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) |
|
||||
| <a href="http://www.pragma-innovation.fr/" class="favicon">Pragma Innovation</a> | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) |
|
||||
| <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
|
||||
| <a href="https://qrator.net" class="favicon">Qrator</a> | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) |
|
||||
| <a href="https://www.rbinternational.com/" class="favicon">Raiffeisenbank</a> | Banking | Analytics | — | — | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html) |
|
||||
| <a href="https://rambler.ru" class="favicon">Rambler</a> | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) |
|
||||
| <a href="https://retell.cc/" class="favicon">Retell</a> | Speech synthesis | Analytics | — | — | [Blog Article, August 2020](https://vc.ru/services/153732-kak-sozdat-audiostati-na-vashem-sayte-i-zachem-eto-nuzhno) |
|
||||
| <a href="https://rspamd.com/" class="favicon">Rspamd</a> | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html) |
|
||||
| <a href="https://rusiem.com/en" class="favicon">RuSIEM</a> | SIEM | Main Product | — | — | [Official Website](https://rusiem.com/en/products/architecture) |
|
||||
| <a href="https://www.s7.ru" class="favicon">S7 Airlines</a> | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) |
|
||||
| <a href="https://www.scireum.de/" class="favicon">scireum GmbH</a> | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) |
|
||||
| <a href="https://segment.com/" class="favicon">Segment</a> | Data processing | Main product | 9 * i3en.3xlarge nodes 7.5TB NVME SSDs, 96GB Memory, 12 vCPUs | — | [Slides, 2019](https://slides.com/abraithwaite/segment-clickhouse) |
|
||||
| <a href="https://www.semrush.com/" class="favicon">SEMrush</a> | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) |
|
||||
| <a href="https://sentry.io/" class="favicon">Sentry</a> | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) |
|
||||
| <a href="https://seo.do/" class="favicon">seo.do</a> | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) |
|
||||
| <a href="http://www.sgk.gov.tr/wps/portal/sgk/tr" class="favicon">SGK</a> | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) |
|
||||
| <a href="http://english.sina.com/index.html" class="favicon">Sina</a> | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) |
|
||||
| <a href="https://smi2.ru/" class="favicon">SMI2</a> | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) |
|
||||
| <a href="https://www.splunk.com/" class="favicon">Splunk</a> | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) |
|
||||
| <a href="https://www.spotify.com" class="favicon">Spotify</a> | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) |
|
||||
| <a href="https://www.staffcop.ru/" class="favicon">Staffcop</a> | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) |
|
||||
| <a href="https://www.suning.com/" class="favicon">Suning</a> | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) |
|
||||
| <a href="https://www.teralytics.net/" class="favicon">Teralytics</a> | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) |
|
||||
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) |
|
||||
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |
|
||||
| <a href="https://www.tencentmusic.com/" class="favicon">Tencent Music Entertainment (TME)</a> | BigData | Data processing | — | — | [Blog in Chinese, June 2020](https://cloud.tencent.com/developer/article/1637840) |
|
||||
| <a href="https://trafficstars.com/" class="favicon">Traffic Stars</a> | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) |
|
||||
| <a href="https://www.uber.com" class="favicon">Uber</a> | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) |
|
||||
| <a href="https://vk.com" class="favicon">VKontakte</a> | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) |
|
||||
| <a href="https://www.walmartlabs.com/" class="favicon">Walmart Labs</a> | Internet, Retail | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144) |
|
||||
| <a href="https://wargaming.com/en/" class="favicon">Wargaming</a> | Games | | — | — | [Interview](https://habr.com/en/post/496954/) |
|
||||
| <a href="https://wisebits.com/" class="favicon">Wisebits</a> | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) |
|
||||
| <a href="https://www.workato.com/" class="favicon">Workato</a> | Automation Software | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=334) |
|
||||
| <a href="http://www.xiaoxintech.cn/" class="favicon">Xiaoxin Tech</a> | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) |
|
||||
| <a href="https://www.ximalaya.com/" class="favicon">Ximalaya</a> | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) |
|
||||
| <a href="https://cloud.yandex.ru/services/managed-clickhouse" class="favicon">Yandex Cloud</a> | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) |
|
||||
| <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a> | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) |
|
||||
| <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a> | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
|
||||
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) |
|
||||
| <a href="https://htc-cs.ru/" class="favicon">ЦВТ</a> | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) |
|
||||
| <a href="https://mkb.ru/" class="favicon">МКБ</a> | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) |
|
||||
| <a href="https://cft.ru/" class="favicon">ЦФТ</a> | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) |
|
||||
| <a href="https://www.kakaocorp.com/" class="favicon">kakaocorp</a> | Internet company | — | — | — | [if(kakao)2020 conference](https://if.kakao.com/session/117) |
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/introduction/adopters/) <!--hide-->
|
||||
|
@ -1,22 +1,21 @@
|
||||
# system.distributed_ddl_queue {#system_tables-distributed_ddl_queue}
|
||||
|
||||
Contains information about distributed ddl queries (ON CLUSTER queries) that were executed on a cluster.
|
||||
Contains information about [distributed ddl queries (ON CLUSTER clause)](../../sql-reference/distributed-ddl.md) that were executed on a cluster.
|
||||
|
||||
Columns:
|
||||
|
||||
- `entry` ([String](../../sql-reference/data-types/string.md)) - Query id.
|
||||
- `host_name` ([String](../../sql-reference/data-types/string.md)) - Hostname.
|
||||
- `host_address` ([String](../../sql-reference/data-types/string.md)) - IP address that the Hostname resolves to.
|
||||
- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) - Host Port.
|
||||
- `status` ([Enum](../../sql-reference/data-types/enum.md)) - Stats of the query.
|
||||
- `cluster` ([String](../../sql-reference/data-types/string.md)) - Cluster name.
|
||||
- `query` ([String](../../sql-reference/data-types/string.md)) - Query executed.
|
||||
- `initiator` ([String](../../sql-reference/data-types/string.md)) - Nod that executed the query.
|
||||
- `query_start_time` ([Date](../../sql-reference/data-types/date.md)) — Query start time.
|
||||
- `query_finish_time` ([Date](../../sql-reference/data-types/date.md)) — Query finish time.
|
||||
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — Duration of query execution in milliseconds.
|
||||
- `exception_code` ([Enum](../../sql-reference/data-types/enum.md)) - Exception code from ZooKeeper.
|
||||
|
||||
- `entry` ([String](../../sql-reference/data-types/string.md)) — Query id.
|
||||
- `host_name` ([String](../../sql-reference/data-types/string.md)) — Hostname.
|
||||
- `host_address` ([String](../../sql-reference/data-types/string.md)) — IP address that the Hostname resolves to.
|
||||
- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Host Port.
|
||||
- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Status of the query.
|
||||
- `cluster` ([String](../../sql-reference/data-types/string.md)) — Cluster name.
|
||||
- `query` ([String](../../sql-reference/data-types/string.md)) — Query executed.
|
||||
- `initiator` ([String](../../sql-reference/data-types/string.md)) — Node that executed the query.
|
||||
- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query start time.
|
||||
- `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query finish time.
|
||||
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — Duration of query execution (in milliseconds).
|
||||
- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code from [ZooKeeper](../../operations/tips.md#zookeeper).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -62,6 +61,5 @@ exception_code: ZOK
|
||||
2 rows in set. Elapsed: 0.025 sec.
|
||||
```
|
||||
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) <!--hide-->
|
||||
|
@ -13,7 +13,7 @@ Basic query format:
|
||||
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
|
||||
```
|
||||
|
||||
You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
|
||||
You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
|
||||
|
||||
For example, consider the table:
|
||||
|
||||
|
@ -278,5 +278,4 @@ Other ways to make settings see [here](../../../operations/settings/index.md).
|
||||
SELECT * FROM some_table SETTINGS optimize_read_in_order=1, cast_keep_nullable=1;
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/)
|
||||
<!--hide-->
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/)<!--hide-->
|
||||
|
@ -37,7 +37,10 @@ ORDER BY expr
|
||||
[PARTITION BY expr]
|
||||
[PRIMARY KEY expr]
|
||||
[SAMPLE BY expr]
|
||||
[TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...]
|
||||
[TTL expr
|
||||
[DELETE|TO DISK 'xxx'|TO VOLUME 'xxx' [, ...] ]
|
||||
[WHERE conditions]
|
||||
[GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ]
|
||||
[SETTINGS name=value, ...]
|
||||
```
|
||||
|
||||
@ -71,7 +74,7 @@ ORDER BY expr
|
||||
|
||||
Выражение должно возвращать столбец `Date` или `DateTime`. Пример: `TTL date + INTERVAL 1 DAY`.
|
||||
|
||||
Тип правила `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'` указывает действие, которое будет выполнено с частью, удаление строк (прореживание), перемещение (при выполнении условия для всех строк части) на определённый диск (`TO DISK 'xxx'`) или том (`TO VOLUME 'xxx'`). Поведение по умолчанию соответствует удалению строк (`DELETE`). В списке правил может быть указано только одно выражение с поведением `DELETE`.
|
||||
Тип правила `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` указывает действие, которое будет выполнено с частью: удаление строк (прореживание), перемещение (при выполнении условия для всех строк части) на определённый диск (`TO DISK 'xxx'`) или том (`TO VOLUME 'xxx'`), или агрегирование данных в устаревших строках. Поведение по умолчанию соответствует удалению строк (`DELETE`). В списке правил может быть указано только одно выражение с поведением `DELETE`.
|
||||
|
||||
Дополнительные сведения смотрите в разделе [TTL для столбцов и таблиц](#table_engine-mergetree-ttl)
|
||||
|
||||
@ -443,16 +446,28 @@ ALTER TABLE example_table
|
||||
Для таблицы можно задать одно выражение для устаревания данных, а также несколько выражений, по срабатывании которых данные переместятся на [некоторый диск или том](#table_engine-mergetree-multiple-volumes). Когда некоторые данные в таблице устаревают, ClickHouse удаляет все соответствующие строки.
|
||||
|
||||
``` sql
|
||||
TTL expr [DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'], ...
|
||||
TTL expr
|
||||
[DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ...
|
||||
[WHERE conditions]
|
||||
[GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ]
|
||||
```
|
||||
|
||||
За каждым TTL выражением может следовать тип действия, которое выполняется после достижения времени, соответствующего результату TTL выражения:
|
||||
|
||||
- `DELETE` - удалить данные (действие по умолчанию);
|
||||
- `TO DISK 'aaa'` - переместить данные на диск `aaa`;
|
||||
- `TO VOLUME 'bbb'` - переместить данные на том `bbb`.
|
||||
- `TO VOLUME 'bbb'` - переместить данные на том `bbb`;
|
||||
- `GROUP BY` - агрегировать данные.
|
||||
|
||||
Примеры:
|
||||
В секции `WHERE` можно задать условие удаления или агрегирования устаревших строк (для перемещения условие `WHERE` не применимо).
|
||||
|
||||
Колонки, по которым агрегируются данные в `GROUP BY`, должны являться префиксом первичного ключа таблицы.
|
||||
|
||||
Если колонка не является частью выражения `GROUP BY` и не задается напрямую в секции `SET`, в результирующих строках она будет содержать случайное значение, взятое из одной из сгруппированных строк (как будто к ней применяется агрегирующая функция `any`).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Создание таблицы с TTL:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE example_table
|
||||
@ -468,13 +483,43 @@ TTL d + INTERVAL 1 MONTH [DELETE],
|
||||
d + INTERVAL 2 WEEK TO DISK 'bbb';
|
||||
```
|
||||
|
||||
Изменение TTL
|
||||
Изменение TTL:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE example_table
|
||||
MODIFY TTL d + INTERVAL 1 DAY;
|
||||
```
|
||||
|
||||
Создание таблицы, в которой строки устаревают через месяц. Устаревшие строки удаляются, если дата выпадает на понедельник:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE table_with_where
|
||||
(
|
||||
d DateTime,
|
||||
a Int
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PARTITION BY toYYYYMM(d)
|
||||
ORDER BY d
|
||||
TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1;
|
||||
```
|
||||
|
||||
Создание таблицы, где устаревшие строки агрегируются. В результирующих строках колонка `x` содержит максимальное значение по сгруппированным строкам, `y` — минимальное значение, а `d` — случайное значение из одной из сгуппированных строк.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE table_for_aggregation
|
||||
(
|
||||
d DateTime,
|
||||
k1 Int,
|
||||
k2 Int,
|
||||
x Int,
|
||||
y Int
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY k1, k2
|
||||
TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
|
||||
```
|
||||
|
||||
**Удаление данных**
|
||||
|
||||
Данные с истекшим TTL удаляются, когда ClickHouse мёржит куски данных.
|
||||
|
65
docs/ru/operations/system-tables/distributed_ddl_queue.md
Normal file
65
docs/ru/operations/system-tables/distributed_ddl_queue.md
Normal file
@ -0,0 +1,65 @@
|
||||
# system.distributed_ddl_queue {#system_tables-distributed_ddl_queue}
|
||||
|
||||
Содержит информацию о [распределенных ddl запросах (секция ON CLUSTER)](../../sql-reference/distributed-ddl.md), которые были выполнены на кластере.
|
||||
|
||||
Столбцы:
|
||||
|
||||
- `entry` ([String](../../sql-reference/data-types/string.md)) — идентификатор запроса.
|
||||
- `host_name` ([String](../../sql-reference/data-types/string.md)) — имя хоста.
|
||||
- `host_address` ([String](../../sql-reference/data-types/string.md)) — IP-адрес хоста.
|
||||
- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — порт для соединения с сервером.
|
||||
- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — состояние запроса.
|
||||
- `cluster` ([String](../../sql-reference/data-types/string.md)) — имя кластера.
|
||||
- `query` ([String](../../sql-reference/data-types/string.md)) — выполненный запрос.
|
||||
- `initiator` ([String](../../sql-reference/data-types/string.md)) — узел, выполнивший запрос.
|
||||
- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала запроса.
|
||||
- `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время окончания запроса.
|
||||
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — продолжительность выполнения запроса (в миллисекундах).
|
||||
- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — код исключения из [ZooKeeper](../../operations/tips.md#zookeeper).
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM system.distributed_ddl_queue
|
||||
WHERE cluster = 'test_cluster'
|
||||
LIMIT 2
|
||||
FORMAT Vertical
|
||||
|
||||
Query id: f544e72a-6641-43f1-836b-24baa1c9632a
|
||||
|
||||
Row 1:
|
||||
──────
|
||||
entry: query-0000000000
|
||||
host_name: clickhouse01
|
||||
host_address: 172.23.0.11
|
||||
port: 9000
|
||||
status: Finished
|
||||
cluster: test_cluster
|
||||
query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster
|
||||
initiator: clickhouse01:9000
|
||||
query_start_time: 2020-12-30 13:07:51
|
||||
query_finish_time: 2020-12-30 13:07:51
|
||||
query_duration_ms: 6
|
||||
exception_code: ZOK
|
||||
|
||||
Row 2:
|
||||
──────
|
||||
entry: query-0000000000
|
||||
host_name: clickhouse02
|
||||
host_address: 172.23.0.12
|
||||
port: 9000
|
||||
status: Finished
|
||||
cluster: test_cluster
|
||||
query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster
|
||||
initiator: clickhouse01:9000
|
||||
query_start_time: 2020-12-30 13:07:51
|
||||
query_finish_time: 2020-12-30 13:07:51
|
||||
query_duration_ms: 6
|
||||
exception_code: ZOK
|
||||
|
||||
2 rows in set. Elapsed: 0.025 sec.
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) <!--hide-->
|
||||
|
@ -13,7 +13,7 @@ toc_title: INSERT INTO
|
||||
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
|
||||
```
|
||||
|
||||
Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как `APPLY`, `EXCEPT`, `REPLACE`.
|
||||
Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
|
||||
|
||||
В качестве примера рассмотрим таблицу:
|
||||
|
||||
|
@ -162,6 +162,112 @@ Code: 42. DB::Exception: Received from localhost:9000. DB::Exception: Number of
|
||||
|
||||
Подробнее смотрите в разделе «Настройки». Присутствует возможность использовать внешнюю сортировку (с сохранением временных данных на диск) и внешнюю агрегацию.
|
||||
|
||||
## Модификаторы запроса SELECT {#select-modifiers}
|
||||
|
||||
Вы можете использовать следующие модификаторы в запросах `SELECT`.
|
||||
|
||||
### APPLY {#apply-modifier}
|
||||
|
||||
Вызывает указанную функцию для каждой строки, возвращаемой внешним табличным выражением запроса.
|
||||
|
||||
**Синтаксис:**
|
||||
|
||||
``` sql
|
||||
SELECT <expr> APPLY( <func> ) FROM [db.]table_name
|
||||
```
|
||||
|
||||
**Пример:**
|
||||
|
||||
``` sql
|
||||
CREATE TABLE columns_transformers (i Int64, j Int16, k Int64) ENGINE = MergeTree ORDER by (i);
|
||||
INSERT INTO columns_transformers VALUES (100, 10, 324), (120, 8, 23);
|
||||
SELECT * APPLY(sum) FROM columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌─sum(i)─┬─sum(j)─┬─sum(k)─┐
|
||||
│ 220 │ 18 │ 347 │
|
||||
└────────┴────────┴────────┘
|
||||
```
|
||||
|
||||
### EXCEPT {#except-modifier}
|
||||
|
||||
Исключает из результата запроса один или несколько столбцов.
|
||||
|
||||
**Синтаксис:**
|
||||
|
||||
``` sql
|
||||
SELECT <expr> EXCEPT ( col_name1 [, col_name2, col_name3, ...] ) FROM [db.]table_name
|
||||
```
|
||||
|
||||
**Пример:**
|
||||
|
||||
``` sql
|
||||
SELECT * EXCEPT (i) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌──j─┬───k─┐
|
||||
│ 10 │ 324 │
|
||||
│ 8 │ 23 │
|
||||
└────┴─────┘
|
||||
```
|
||||
|
||||
### REPLACE {#replace-modifier}
|
||||
|
||||
Определяет одно или несколько [выражений алиасов](../../../sql-reference/syntax.md#syntax-expression_aliases). Каждый алиас должен соответствовать имени столбца из запроса `SELECT *`. В списке столбцов результата запроса имя столбца, соответствующее алиасу, заменяется выражением в модификаторе `REPLACE`.
|
||||
|
||||
Этот модификатор не изменяет имена или порядок столбцов. Однако он может изменить значение и тип значения.
|
||||
|
||||
**Синтаксис:**
|
||||
|
||||
``` sql
|
||||
SELECT <expr> REPLACE( <expr> AS col_name) from [db.]table_name
|
||||
```
|
||||
|
||||
**Пример:**
|
||||
|
||||
``` sql
|
||||
SELECT * REPLACE(i + 1 AS i) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌───i─┬──j─┬───k─┐
|
||||
│ 101 │ 10 │ 324 │
|
||||
│ 121 │ 8 │ 23 │
|
||||
└─────┴────┴─────┘
|
||||
```
|
||||
|
||||
### Комбинации модификаторов {#modifier-combinations}
|
||||
|
||||
Вы можете использовать каждый модификатор отдельно или комбинировать их.
|
||||
|
||||
**Примеры:**
|
||||
|
||||
Использование одного и того же модификатора несколько раз.
|
||||
|
||||
``` sql
|
||||
SELECT COLUMNS('[jk]') APPLY(toString) APPLY(length) APPLY(max) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌─max(length(toString(j)))─┬─max(length(toString(k)))─┐
|
||||
│ 2 │ 3 │
|
||||
└──────────────────────────┴──────────────────────────┘
|
||||
```
|
||||
|
||||
Использование нескольких модификаторов в одном запросе.
|
||||
|
||||
``` sql
|
||||
SELECT * REPLACE(i + 1 AS i) EXCEPT (j) APPLY(sum) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌─sum(plus(i, 1))─┬─sum(k)─┐
|
||||
│ 222 │ 347 │
|
||||
└─────────────────┴────────┘
|
||||
```
|
||||
|
||||
## SETTINGS в запросе SELECT {#settings-in-select}
|
||||
|
||||
Вы можете задать значения необходимых настроек непосредственно в запросе `SELECT` в секции `SETTINGS`. Эти настройки действуют только в рамках данного запроса, а после его выполнения сбрасываются до предыдущего значения или значения по умолчанию.
|
||||
@ -174,5 +280,4 @@ Code: 42. DB::Exception: Received from localhost:9000. DB::Exception: Number of
|
||||
SELECT * FROM some_table SETTINGS optimize_read_in_order=1, cast_keep_nullable=1;
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/select/)
|
||||
<!--hide-->
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/select/)<!--hide-->
|
||||
|
@ -1310,3 +1310,14 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;
|
||||
**另请参阅**
|
||||
|
||||
- [IN 运算符中的 NULL 处理](../../sql-reference/operators/in.md#in-null-processing)
|
||||
|
||||
## max_final_threads {#max-final-threads}
|
||||
|
||||
设置使用[FINAL](../../sql-reference/statements/select/from.md#select-from-final) 限定符的`SELECT`查询, 在数据读取阶段的最大并发线程数。
|
||||
|
||||
可能的值:
|
||||
|
||||
- 正整数。
|
||||
- 0 or 1 — 禁用。 此时`SELECT` 查询单线程执行。
|
||||
|
||||
默认值: `16`。
|
||||
|
@ -1,11 +1,12 @@
|
||||
---
|
||||
toc_folder_title: 聚合函数
|
||||
toc_priority: 33
|
||||
toc_title: 聚合函数
|
||||
toc_title: 简介
|
||||
---
|
||||
|
||||
# 聚合函数 {#aggregate-functions}
|
||||
|
||||
聚合函数在 [正常](http://www.sql-tutorial.com/sql-aggregate-functions-sql-tutorial) 方式如预期的数据库专家。
|
||||
聚合函数如数据库专家预期的方式 [正常](http://www.sql-tutorial.com/sql-aggregate-functions-sql-tutorial) 工作。
|
||||
|
||||
ClickHouse还支持:
|
||||
|
||||
@ -14,7 +15,7 @@ ClickHouse还支持:
|
||||
|
||||
## 空处理 {#null-processing}
|
||||
|
||||
在聚合过程中,所有 `NULL`s被跳过。
|
||||
在聚合过程中,所有 `NULL` 被跳过。
|
||||
|
||||
**例:**
|
||||
|
||||
@ -30,7 +31,7 @@ ClickHouse还支持:
|
||||
└───┴──────┘
|
||||
```
|
||||
|
||||
比方说,你需要在总的值 `y` 列:
|
||||
比方说,你需要计算 `y` 列的总数:
|
||||
|
||||
``` sql
|
||||
SELECT sum(y) FROM t_null_big
|
||||
@ -40,9 +41,8 @@ SELECT sum(y) FROM t_null_big
|
||||
│ 7 │
|
||||
└────────┘
|
||||
|
||||
该 `sum` 函数解释 `NULL` 作为 `0`. 特别是,这意味着,如果函数接收输入的选择,其中所有的值 `NULL`,那么结果将是 `0`,不 `NULL`.
|
||||
|
||||
现在你可以使用 `groupArray` 函数从创建一个数组 `y` 列:
|
||||
现在你可以使用 `groupArray` 函数用 `y` 列创建一个数组:
|
||||
|
||||
``` sql
|
||||
SELECT groupArray(y) FROM t_null_big
|
||||
@ -54,6 +54,6 @@ SELECT groupArray(y) FROM t_null_big
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
`groupArray` 不包括 `NULL` 在生成的数组中。
|
||||
在 `groupArray` 生成的数组中不包括 `NULL`。
|
||||
|
||||
[原始文章](https://clickhouse.tech/docs/en/query_language/agg_functions/) <!--hide-->
|
||||
|
@ -1,9 +1,9 @@
|
||||
---
|
||||
toc_priority: 36
|
||||
toc_title: 聚合函数
|
||||
toc_title: 参考手册
|
||||
---
|
||||
|
||||
# 聚合函数引用 {#aggregate-functions-reference}
|
||||
# 参考手册 {#aggregate-functions-reference}
|
||||
|
||||
## count {#agg_function-count}
|
||||
|
||||
|
@ -25,11 +25,13 @@ toc_title: FROM
|
||||
- [Replicated](../../../engines/table-engines/mergetree-family/replication.md) 版本 `MergeTree` 引擎
|
||||
- [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md),和 [MaterializedView](../../../engines/table-engines/special/materializedview.md) 在其他引擎上运行的引擎,只要是它们底层是 `MergeTree`-引擎表即可。
|
||||
|
||||
现在使用 `FINAL` 修饰符 的 `SELECT` 查询启用了并发执行, 这会快一点。但是仍然存在缺陷 (见下)。 [max_final_threads](../../../operations/settings/settings.md#max-final-threads) 设置使用的最大线程数限制。
|
||||
|
||||
### 缺点 {#drawbacks}
|
||||
|
||||
使用的查询 `FINAL` 执行速度不如类似的查询那么快,因为:
|
||||
使用的查询 `FINAL` 执行速度比类似的查询慢一点,因为:
|
||||
|
||||
- 查询在单个线程中执行,并在查询执行期间合并数据。
|
||||
- 在查询执行期间合并数据。
|
||||
- 查询与 `FINAL` 除了读取查询中指定的列之外,还读取主键列。
|
||||
|
||||
**在大多数情况下,避免使用 `FINAL`.** 常见的方法是使用假设后台进程的不同查询 `MergeTree` 引擎还没有发生,并通过应用聚合(例如,丢弃重复项)来处理它。 {## TODO: examples ##}
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include "Suggest.h"
|
||||
|
||||
#include <Core/Settings.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
@ -86,6 +87,9 @@ Suggest::Suggest()
|
||||
|
||||
void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit)
|
||||
{
|
||||
/// NOTE: Once you will update the completion list,
|
||||
/// do not forget to update 01676_clickhouse_client_autocomplete.sh
|
||||
|
||||
std::stringstream query; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
query << "SELECT DISTINCT arrayJoin(extractAll(name, '[\\\\w_]{2,}')) AS res FROM ("
|
||||
"SELECT name FROM system.functions"
|
||||
@ -104,6 +108,18 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo
|
||||
" UNION ALL "
|
||||
"SELECT cluster FROM system.clusters"
|
||||
" UNION ALL "
|
||||
"SELECT name FROM system.errors"
|
||||
" UNION ALL "
|
||||
"SELECT event FROM system.events"
|
||||
" UNION ALL "
|
||||
"SELECT metric FROM system.asynchronous_metrics"
|
||||
" UNION ALL "
|
||||
"SELECT metric FROM system.metrics"
|
||||
" UNION ALL "
|
||||
"SELECT macro FROM system.macros"
|
||||
" UNION ALL "
|
||||
"SELECT policy_name FROM system.storage_policies"
|
||||
" UNION ALL "
|
||||
"SELECT concat(func.name, comb.name) FROM system.functions AS func CROSS JOIN system.aggregate_function_combinators AS comb WHERE is_aggregate";
|
||||
|
||||
/// The user may disable loading of databases, tables, columns by setting suggestion_limit to zero.
|
||||
@ -123,12 +139,17 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo
|
||||
|
||||
query << ") WHERE notEmpty(res)";
|
||||
|
||||
fetch(connection, timeouts, query.str());
|
||||
Settings settings;
|
||||
/// To show all rows from:
|
||||
/// - system.errors
|
||||
/// - system.events
|
||||
settings.system_events_show_zero_values = true;
|
||||
fetch(connection, timeouts, query.str(), settings);
|
||||
}
|
||||
|
||||
void Suggest::fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query)
|
||||
void Suggest::fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query, Settings & settings)
|
||||
{
|
||||
connection.sendQuery(timeouts, query);
|
||||
connection.sendQuery(timeouts, query, "" /* query_id */, QueryProcessingStage::Complete, &settings);
|
||||
|
||||
while (true)
|
||||
{
|
||||
|
@ -33,7 +33,7 @@ public:
|
||||
private:
|
||||
|
||||
void loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit);
|
||||
void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query);
|
||||
void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query, Settings & settings);
|
||||
void fillWordsFromBlock(const Block & block);
|
||||
|
||||
/// Words are fetched asynchronously.
|
||||
|
@ -642,7 +642,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
|
||||
query_deduplicate_ast_string += " OPTIMIZE TABLE " + getQuotedTable(original_table) +
|
||||
((partition_name == "'all'") ? " PARTITION ID " : " PARTITION ") + partition_name + " DEDUPLICATE;";
|
||||
|
||||
LOG_DEBUG(log, "Executing OPTIMIZE DEDUPLICATE query: {}", query_alter_ast_string);
|
||||
LOG_DEBUG(log, "Executing OPTIMIZE DEDUPLICATE query: {}", query_deduplicate_ast_string);
|
||||
|
||||
UInt64 num_nodes = executeQueryOnCluster(
|
||||
task_table.cluster_push,
|
||||
|
@ -261,6 +261,9 @@ StackTrace::StackTrace(const ucontext_t & signal_context)
|
||||
{
|
||||
tryCapture();
|
||||
|
||||
/// This variable from signal handler is not instrumented by Memory Sanitizer.
|
||||
__msan_unpoison(&signal_context, sizeof(signal_context));
|
||||
|
||||
void * caller_address = getCallerAddress(signal_context);
|
||||
|
||||
if (size == 0 && caller_address)
|
||||
|
@ -409,6 +409,15 @@ Block Block::cloneWithoutColumns() const
|
||||
return res;
|
||||
}
|
||||
|
||||
Block Block::cloneWithCutColumns(size_t start, size_t length) const
|
||||
{
|
||||
Block copy = *this;
|
||||
|
||||
for (auto & column_to_cut : copy.data)
|
||||
column_to_cut.column = column_to_cut.column->cut(start, length);
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
Block Block::sortColumns() const
|
||||
{
|
||||
|
@ -129,6 +129,7 @@ public:
|
||||
void setColumns(const Columns & columns);
|
||||
Block cloneWithColumns(const Columns & columns) const;
|
||||
Block cloneWithoutColumns() const;
|
||||
Block cloneWithCutColumns(size_t start, size_t length) const;
|
||||
|
||||
/** Get empty columns with the same types as in block. */
|
||||
MutableColumns cloneEmptyColumns() const;
|
||||
|
65
src/DataStreams/ITTLAlgorithm.cpp
Normal file
65
src/DataStreams/ITTLAlgorithm.cpp
Normal file
@ -0,0 +1,65 @@
|
||||
#include <DataStreams/ITTLAlgorithm.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
ITTLAlgorithm::ITTLAlgorithm(
|
||||
const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
|
||||
: description(description_)
|
||||
, old_ttl_info(old_ttl_info_)
|
||||
, current_time(current_time_)
|
||||
, force(force_)
|
||||
, date_lut(DateLUT::instance())
|
||||
{
|
||||
}
|
||||
|
||||
bool ITTLAlgorithm::isTTLExpired(time_t ttl) const
|
||||
{
|
||||
return (ttl && (ttl <= current_time));
|
||||
}
|
||||
|
||||
ColumnPtr ITTLAlgorithm::executeExpressionAndGetColumn(
|
||||
const ExpressionActionsPtr & expression, const Block & block, const String & result_column)
|
||||
{
|
||||
if (!expression)
|
||||
return nullptr;
|
||||
|
||||
if (block.has(result_column))
|
||||
return block.getByName(result_column).column;
|
||||
|
||||
Block block_copy;
|
||||
for (const auto & column_name : expression->getRequiredColumns())
|
||||
block_copy.insert(block.getByName(column_name));
|
||||
|
||||
/// Keep number of rows for const expression.
|
||||
size_t num_rows = block.rows();
|
||||
expression->execute(block_copy, num_rows);
|
||||
|
||||
return block_copy.getByName(result_column).column;
|
||||
}
|
||||
|
||||
UInt32 ITTLAlgorithm::getTimestampByIndex(const IColumn * column, size_t index) const
|
||||
{
|
||||
if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(column))
|
||||
return date_lut.fromDayNum(DayNum(column_date->getData()[index]));
|
||||
else if (const ColumnUInt32 * column_date_time = typeid_cast<const ColumnUInt32 *>(column))
|
||||
return column_date_time->getData()[index];
|
||||
else if (const ColumnConst * column_const = typeid_cast<const ColumnConst *>(column))
|
||||
{
|
||||
if (typeid_cast<const ColumnUInt16 *>(&column_const->getDataColumn()))
|
||||
return date_lut.fromDayNum(DayNum(column_const->getValue<UInt16>()));
|
||||
else if (typeid_cast<const ColumnUInt32 *>(&column_const->getDataColumn()))
|
||||
return column_const->getValue<UInt32>();
|
||||
}
|
||||
|
||||
throw Exception("Unexpected type of result TTL column", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
}
|
54
src/DataStreams/ITTLAlgorithm.h
Normal file
54
src/DataStreams/ITTLAlgorithm.h
Normal file
@ -0,0 +1,54 @@
|
||||
#pragma once
|
||||
|
||||
#include <Storages/TTLDescription.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartTTLInfo.h>
|
||||
#include <Storages/MergeTree/IMergeTreeDataPart.h>
|
||||
#include <common/DateLUT.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/**
|
||||
* Represents the actions, which are required to do
|
||||
* with data, when TTL is expired: delete, aggregate, etc.
|
||||
*/
|
||||
class ITTLAlgorithm
|
||||
{
|
||||
public:
|
||||
using TTLInfo = IMergeTreeDataPart::TTLInfo;
|
||||
using MutableDataPartPtr = MergeTreeMutableDataPartPtr;
|
||||
|
||||
ITTLAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
|
||||
virtual ~ITTLAlgorithm() = default;
|
||||
|
||||
virtual void execute(Block & block) = 0;
|
||||
|
||||
/// Updates TTL metadata of the data_part.
|
||||
virtual void finalize(const MutableDataPartPtr & data_part) const = 0;
|
||||
|
||||
bool isMinTTLExpired() const { return force || isTTLExpired(old_ttl_info.min); }
|
||||
bool isMaxTTLExpired() const { return isTTLExpired(old_ttl_info.max); }
|
||||
|
||||
/** This function is needed to avoid a conflict between already calculated columns and columns that needed to execute TTL.
|
||||
* If result column is absent in block, all required columns are copied to new block and expression is executed on new block.
|
||||
*/
|
||||
static ColumnPtr executeExpressionAndGetColumn(
|
||||
const ExpressionActionsPtr & expression, const Block & block, const String & result_column);
|
||||
|
||||
protected:
|
||||
bool isTTLExpired(time_t ttl) const;
|
||||
UInt32 getTimestampByIndex(const IColumn * column, size_t index) const;
|
||||
|
||||
const TTLDescription description;
|
||||
const TTLInfo old_ttl_info;
|
||||
const time_t current_time;
|
||||
const bool force;
|
||||
TTLInfo new_ttl_info;
|
||||
|
||||
private:
|
||||
const DateLUTImpl & date_lut;
|
||||
};
|
||||
|
||||
using TTLAlgorithmPtr = std::unique_ptr<ITTLAlgorithm>;
|
||||
|
||||
}
|
173
src/DataStreams/TTLAggregationAlgorithm.cpp
Normal file
173
src/DataStreams/TTLAggregationAlgorithm.cpp
Normal file
@ -0,0 +1,173 @@
|
||||
#include <DataStreams/TTLAggregationAlgorithm.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
TTLAggregationAlgorithm::TTLAggregationAlgorithm(
|
||||
const TTLDescription & description_,
|
||||
const TTLInfo & old_ttl_info_,
|
||||
time_t current_time_,
|
||||
bool force_,
|
||||
const Block & header_,
|
||||
const MergeTreeData & storage_)
|
||||
: ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
|
||||
, header(header_)
|
||||
{
|
||||
current_key_value.resize(description.group_by_keys.size());
|
||||
|
||||
ColumnNumbers keys;
|
||||
for (const auto & key : description.group_by_keys)
|
||||
keys.push_back(header.getPositionByName(key));
|
||||
|
||||
key_columns.resize(description.group_by_keys.size());
|
||||
AggregateDescriptions aggregates = description.aggregate_descriptions;
|
||||
|
||||
for (auto & descr : aggregates)
|
||||
if (descr.arguments.empty())
|
||||
for (const auto & name : descr.argument_names)
|
||||
descr.arguments.push_back(header.getPositionByName(name));
|
||||
|
||||
columns_for_aggregator.resize(description.aggregate_descriptions.size());
|
||||
const Settings & settings = storage_.global_context.getSettingsRef();
|
||||
|
||||
Aggregator::Params params(header, keys, aggregates,
|
||||
false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, 0, 0,
|
||||
settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set,
|
||||
storage_.global_context.getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data);
|
||||
|
||||
aggregator = std::make_unique<Aggregator>(params);
|
||||
}
|
||||
|
||||
void TTLAggregationAlgorithm::execute(Block & block)
|
||||
{
|
||||
if (!block)
|
||||
{
|
||||
if (!aggregation_result.empty())
|
||||
{
|
||||
MutableColumns result_columns = header.cloneEmptyColumns();
|
||||
finalizeAggregates(result_columns);
|
||||
block = header.cloneWithColumns(std::move(result_columns));
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
const auto & column_names = header.getNames();
|
||||
MutableColumns result_columns = header.cloneEmptyColumns();
|
||||
MutableColumns aggregate_columns = header.cloneEmptyColumns();
|
||||
|
||||
auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
|
||||
auto where_column = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column);
|
||||
|
||||
size_t rows_aggregated = 0;
|
||||
size_t current_key_start = 0;
|
||||
size_t rows_with_current_key = 0;
|
||||
|
||||
for (size_t i = 0; i < block.rows(); ++i)
|
||||
{
|
||||
UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
|
||||
bool where_filter_passed = !where_column || where_column->getBool(i);
|
||||
bool ttl_expired = isTTLExpired(cur_ttl) && where_filter_passed;
|
||||
|
||||
bool same_as_current = true;
|
||||
for (size_t j = 0; j < description.group_by_keys.size(); ++j)
|
||||
{
|
||||
const String & key_column = description.group_by_keys[j];
|
||||
const IColumn * values_column = block.getByName(key_column).column.get();
|
||||
if (!same_as_current || (*values_column)[i] != current_key_value[j])
|
||||
{
|
||||
values_column->get(i, current_key_value[j]);
|
||||
same_as_current = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!same_as_current)
|
||||
{
|
||||
if (rows_with_current_key)
|
||||
calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key);
|
||||
finalizeAggregates(result_columns);
|
||||
|
||||
current_key_start = rows_aggregated;
|
||||
rows_with_current_key = 0;
|
||||
}
|
||||
|
||||
if (ttl_expired)
|
||||
{
|
||||
++rows_with_current_key;
|
||||
++rows_aggregated;
|
||||
for (const auto & name : column_names)
|
||||
{
|
||||
const IColumn * values_column = block.getByName(name).column.get();
|
||||
auto & column = aggregate_columns[header.getPositionByName(name)];
|
||||
column->insertFrom(*values_column, i);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
new_ttl_info.update(cur_ttl);
|
||||
for (const auto & name : column_names)
|
||||
{
|
||||
const IColumn * values_column = block.getByName(name).column.get();
|
||||
auto & column = result_columns[header.getPositionByName(name)];
|
||||
column->insertFrom(*values_column, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rows_with_current_key)
|
||||
calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key);
|
||||
|
||||
block = header.cloneWithColumns(std::move(result_columns));
|
||||
}
|
||||
|
||||
void TTLAggregationAlgorithm::calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length)
|
||||
{
|
||||
Columns aggregate_chunk;
|
||||
aggregate_chunk.reserve(aggregate_columns.size());
|
||||
for (const auto & name : header.getNames())
|
||||
{
|
||||
const auto & column = aggregate_columns[header.getPositionByName(name)];
|
||||
ColumnPtr chunk_column = column->cut(start_pos, length);
|
||||
aggregate_chunk.emplace_back(std::move(chunk_column));
|
||||
}
|
||||
|
||||
aggregator->executeOnBlock(aggregate_chunk, length, aggregation_result, key_columns,
|
||||
columns_for_aggregator, no_more_keys);
|
||||
}
|
||||
|
||||
void TTLAggregationAlgorithm::finalizeAggregates(MutableColumns & result_columns)
|
||||
{
|
||||
if (!aggregation_result.empty())
|
||||
{
|
||||
auto aggregated_res = aggregator->convertToBlocks(aggregation_result, true, 1);
|
||||
for (auto & agg_block : aggregated_res)
|
||||
{
|
||||
for (const auto & it : description.set_parts)
|
||||
it.expression->execute(agg_block);
|
||||
|
||||
for (const auto & name : description.group_by_keys)
|
||||
{
|
||||
const IColumn * values_column = agg_block.getByName(name).column.get();
|
||||
auto & result_column = result_columns[header.getPositionByName(name)];
|
||||
result_column->insertRangeFrom(*values_column, 0, agg_block.rows());
|
||||
}
|
||||
|
||||
for (const auto & it : description.set_parts)
|
||||
{
|
||||
const IColumn * values_column = agg_block.getByName(it.expression_result_column_name).column.get();
|
||||
auto & result_column = result_columns[header.getPositionByName(it.column_name)];
|
||||
result_column->insertRangeFrom(*values_column, 0, agg_block.rows());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
aggregation_result.invalidate();
|
||||
}
|
||||
|
||||
void TTLAggregationAlgorithm::finalize(const MutableDataPartPtr & data_part) const
|
||||
{
|
||||
data_part->ttl_infos.group_by_ttl[description.result_column] = new_ttl_info;
|
||||
data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max);
|
||||
}
|
||||
|
||||
}
|
42
src/DataStreams/TTLAggregationAlgorithm.h
Normal file
42
src/DataStreams/TTLAggregationAlgorithm.h
Normal file
@ -0,0 +1,42 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/ITTLAlgorithm.h>
|
||||
#include <Interpreters/Aggregator.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Aggregates rows according to 'TTL expr GROUP BY key' description.
|
||||
/// Aggregation key must be the prefix of the sorting key.
|
||||
class TTLAggregationAlgorithm final : public ITTLAlgorithm
|
||||
{
|
||||
public:
|
||||
TTLAggregationAlgorithm(
|
||||
const TTLDescription & description_,
|
||||
const TTLInfo & old_ttl_info_,
|
||||
time_t current_time_,
|
||||
bool force_,
|
||||
const Block & header_,
|
||||
const MergeTreeData & storage_);
|
||||
|
||||
void execute(Block & block) override;
|
||||
void finalize(const MutableDataPartPtr & data_part) const override;
|
||||
|
||||
private:
|
||||
// Calculate aggregates of aggregate_columns into aggregation_result
|
||||
void calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length);
|
||||
|
||||
/// Finalize aggregation_result into result_columns
|
||||
void finalizeAggregates(MutableColumns & result_columns);
|
||||
|
||||
const Block header;
|
||||
std::unique_ptr<Aggregator> aggregator;
|
||||
Row current_key_value;
|
||||
AggregatedDataVariants aggregation_result;
|
||||
ColumnRawPtrs key_columns;
|
||||
Aggregator::AggregateColumns columns_for_aggregator;
|
||||
bool no_more_keys = false;
|
||||
};
|
||||
|
||||
}
|
@ -8,15 +8,14 @@
|
||||
#include <Storages/TTLMode.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <DataStreams/TTLDeleteAlgorithm.h>
|
||||
#include <DataStreams/TTLColumnAlgorithm.h>
|
||||
#include <DataStreams/TTLAggregationAlgorithm.h>
|
||||
#include <DataStreams/TTLUpdateInfoAlgorithm.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
TTLBlockInputStream::TTLBlockInputStream(
|
||||
const BlockInputStreamPtr & input_,
|
||||
const MergeTreeData & storage_,
|
||||
@ -24,83 +23,69 @@ TTLBlockInputStream::TTLBlockInputStream(
|
||||
const MergeTreeData::MutableDataPartPtr & data_part_,
|
||||
time_t current_time_,
|
||||
bool force_)
|
||||
: storage(storage_)
|
||||
, metadata_snapshot(metadata_snapshot_)
|
||||
, data_part(data_part_)
|
||||
, current_time(current_time_)
|
||||
, force(force_)
|
||||
, old_ttl_infos(data_part->ttl_infos)
|
||||
, log(&Poco::Logger::get(storage.getLogName() + " (TTLBlockInputStream)"))
|
||||
, date_lut(DateLUT::instance())
|
||||
: data_part(data_part_)
|
||||
, log(&Poco::Logger::get(storage_.getLogName() + " (TTLBlockInputStream)"))
|
||||
{
|
||||
children.push_back(input_);
|
||||
header = children.at(0)->getHeader();
|
||||
auto old_ttl_infos = data_part->ttl_infos;
|
||||
|
||||
const auto & storage_columns = metadata_snapshot->getColumns();
|
||||
const auto & column_defaults = storage_columns.getDefaults();
|
||||
|
||||
ASTPtr default_expr_list = std::make_shared<ASTExpressionList>();
|
||||
for (const auto & [name, _] : metadata_snapshot->getColumnTTLs())
|
||||
if (metadata_snapshot_->hasRowsTTL())
|
||||
{
|
||||
auto it = column_defaults.find(name);
|
||||
if (it != column_defaults.end())
|
||||
const auto & rows_ttl = metadata_snapshot_->getRowsTTL();
|
||||
auto algorithm = std::make_unique<TTLDeleteAlgorithm>(
|
||||
rows_ttl, old_ttl_infos.table_ttl, current_time_, force_);
|
||||
|
||||
/// Skip all data if table ttl is expired for part
|
||||
if (algorithm->isMaxTTLExpired() && !rows_ttl.where_expression)
|
||||
all_data_dropped = true;
|
||||
|
||||
delete_algorithm = algorithm.get();
|
||||
algorithms.emplace_back(std::move(algorithm));
|
||||
}
|
||||
|
||||
for (const auto & where_ttl : metadata_snapshot_->getRowsWhereTTLs())
|
||||
algorithms.emplace_back(std::make_unique<TTLDeleteAlgorithm>(
|
||||
where_ttl, old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_));
|
||||
|
||||
for (const auto & group_by_ttl : metadata_snapshot_->getGroupByTTLs())
|
||||
algorithms.emplace_back(std::make_unique<TTLAggregationAlgorithm>(
|
||||
group_by_ttl, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_, header, storage_));
|
||||
|
||||
if (metadata_snapshot_->hasAnyColumnTTL())
|
||||
{
|
||||
const auto & storage_columns = metadata_snapshot_->getColumns();
|
||||
const auto & column_defaults = storage_columns.getDefaults();
|
||||
|
||||
for (const auto & [name, description] : metadata_snapshot_->getColumnTTLs())
|
||||
{
|
||||
auto column = storage_columns.get(name);
|
||||
auto expression = it->second.expression->clone();
|
||||
default_expr_list->children.emplace_back(setAlias(addTypeConversionToAST(std::move(expression), column.type->getName()), it->first));
|
||||
ExpressionActionsPtr default_expression;
|
||||
String default_column_name;
|
||||
auto it = column_defaults.find(name);
|
||||
if (it != column_defaults.end())
|
||||
{
|
||||
const auto & column = storage_columns.get(name);
|
||||
auto default_ast = it->second.expression->clone();
|
||||
default_ast = addTypeConversionToAST(std::move(default_ast), column.type->getName());
|
||||
|
||||
auto syntax_result = TreeRewriter(storage_.global_context).analyze(default_ast, metadata_snapshot_->getColumns().getAllPhysical());
|
||||
default_expression = ExpressionAnalyzer{default_ast, syntax_result, storage_.global_context}.getActions(true);
|
||||
default_column_name = default_ast->getColumnName();
|
||||
}
|
||||
|
||||
algorithms.emplace_back(std::make_unique<TTLColumnAlgorithm>(
|
||||
description, old_ttl_infos.columns_ttl[name], current_time_,
|
||||
force_, name, default_expression, default_column_name));
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto & [name, ttl_info] : old_ttl_infos.columns_ttl)
|
||||
{
|
||||
if (force || isTTLExpired(ttl_info.min))
|
||||
{
|
||||
new_ttl_infos.columns_ttl.emplace(name, IMergeTreeDataPart::TTLInfo{});
|
||||
empty_columns.emplace(name);
|
||||
}
|
||||
else
|
||||
new_ttl_infos.columns_ttl.emplace(name, ttl_info);
|
||||
}
|
||||
for (const auto & move_ttl : metadata_snapshot_->getMoveTTLs())
|
||||
algorithms.emplace_back(std::make_unique<TTLMoveAlgorithm>(
|
||||
move_ttl, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_));
|
||||
|
||||
if (!force && !isTTLExpired(old_ttl_infos.table_ttl.min))
|
||||
new_ttl_infos.table_ttl = old_ttl_infos.table_ttl;
|
||||
|
||||
if (!default_expr_list->children.empty())
|
||||
{
|
||||
auto syntax_result = TreeRewriter(storage.global_context).analyze(default_expr_list, metadata_snapshot->getColumns().getAllPhysical());
|
||||
defaults_expression = ExpressionAnalyzer{default_expr_list, syntax_result, storage.global_context}.getActions(true);
|
||||
}
|
||||
|
||||
auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
|
||||
if (metadata_snapshot->hasRowsTTL() && storage_rows_ttl.mode == TTLMode::GROUP_BY)
|
||||
{
|
||||
current_key_value.resize(storage_rows_ttl.group_by_keys.size());
|
||||
|
||||
ColumnNumbers keys;
|
||||
for (const auto & key : storage_rows_ttl.group_by_keys)
|
||||
keys.push_back(header.getPositionByName(key));
|
||||
agg_key_columns.resize(storage_rows_ttl.group_by_keys.size());
|
||||
|
||||
AggregateDescriptions aggregates = storage_rows_ttl.aggregate_descriptions;
|
||||
for (auto & descr : aggregates)
|
||||
if (descr.arguments.empty())
|
||||
for (const auto & name : descr.argument_names)
|
||||
descr.arguments.push_back(header.getPositionByName(name));
|
||||
agg_aggregate_columns.resize(storage_rows_ttl.aggregate_descriptions.size());
|
||||
|
||||
const Settings & settings = storage.global_context.getSettingsRef();
|
||||
|
||||
Aggregator::Params params(header, keys, aggregates,
|
||||
false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, 0, 0,
|
||||
settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set,
|
||||
storage.global_context.getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data);
|
||||
aggregator = std::make_unique<Aggregator>(params);
|
||||
}
|
||||
}
|
||||
|
||||
bool TTLBlockInputStream::isTTLExpired(time_t ttl) const
|
||||
{
|
||||
return (ttl && (ttl <= current_time));
|
||||
for (const auto & recompression_ttl : metadata_snapshot_->getRecompressionTTLs())
|
||||
algorithms.emplace_back(std::make_unique<TTLRecompressionAlgorithm>(
|
||||
recompression_ttl, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
|
||||
}
|
||||
|
||||
Block reorderColumns(Block block, const Block & header)
|
||||
@ -114,321 +99,30 @@ Block reorderColumns(Block block, const Block & header)
|
||||
|
||||
Block TTLBlockInputStream::readImpl()
|
||||
{
|
||||
/// Skip all data if table ttl is expired for part
|
||||
auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
|
||||
if (metadata_snapshot->hasRowsTTL() && !storage_rows_ttl.where_expression && storage_rows_ttl.mode != TTLMode::GROUP_BY
|
||||
&& isTTLExpired(old_ttl_infos.table_ttl.max))
|
||||
{
|
||||
rows_removed = data_part->rows_count;
|
||||
if (all_data_dropped)
|
||||
return {};
|
||||
}
|
||||
|
||||
auto block = children.at(0)->read();
|
||||
for (const auto & algorithm : algorithms)
|
||||
algorithm->execute(block);
|
||||
|
||||
Block block = children.at(0)->read();
|
||||
if (!block)
|
||||
{
|
||||
if (aggregator && !agg_result.empty())
|
||||
{
|
||||
MutableColumns result_columns = header.cloneEmptyColumns();
|
||||
finalizeAggregates(result_columns);
|
||||
block = header.cloneWithColumns(std::move(result_columns));
|
||||
}
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
if (metadata_snapshot->hasRowsTTL() && (force || isTTLExpired(old_ttl_infos.table_ttl.min)))
|
||||
removeRowsWithExpiredTableTTL(block);
|
||||
|
||||
removeValuesWithExpiredColumnTTL(block);
|
||||
|
||||
updateMovesTTL(block);
|
||||
updateRecompressionTTL(block);
|
||||
|
||||
return reorderColumns(std::move(block), header);
|
||||
}
|
||||
|
||||
void TTLBlockInputStream::readSuffixImpl()
|
||||
{
|
||||
for (const auto & elem : new_ttl_infos.columns_ttl)
|
||||
new_ttl_infos.updatePartMinMaxTTL(elem.second.min, elem.second.max);
|
||||
data_part->ttl_infos = {};
|
||||
for (const auto & algorithm : algorithms)
|
||||
algorithm->finalize(data_part);
|
||||
|
||||
new_ttl_infos.updatePartMinMaxTTL(new_ttl_infos.table_ttl.min, new_ttl_infos.table_ttl.max);
|
||||
|
||||
data_part->ttl_infos = std::move(new_ttl_infos);
|
||||
data_part->expired_columns = std::move(empty_columns);
|
||||
|
||||
if (rows_removed)
|
||||
if (delete_algorithm)
|
||||
{
|
||||
size_t rows_removed = all_data_dropped ? data_part->rows_count : delete_algorithm->getNumberOfRemovedRows();
|
||||
LOG_DEBUG(log, "Removed {} rows with expired TTL from part {}", rows_removed, data_part->name);
|
||||
}
|
||||
|
||||
void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block)
|
||||
{
|
||||
auto rows_ttl = metadata_snapshot->getRowsTTL();
|
||||
|
||||
rows_ttl.expression->execute(block);
|
||||
if (rows_ttl.where_expression)
|
||||
rows_ttl.where_expression->execute(block);
|
||||
|
||||
const IColumn * ttl_column =
|
||||
block.getByName(rows_ttl.result_column).column.get();
|
||||
|
||||
const IColumn * where_result_column = rows_ttl.where_expression ?
|
||||
block.getByName(rows_ttl.where_result_column).column.get() : nullptr;
|
||||
|
||||
const auto & column_names = header.getNames();
|
||||
|
||||
if (!aggregator)
|
||||
{
|
||||
MutableColumns result_columns;
|
||||
result_columns.reserve(column_names.size());
|
||||
for (auto it = column_names.begin(); it != column_names.end(); ++it)
|
||||
{
|
||||
const IColumn * values_column = block.getByName(*it).column.get();
|
||||
MutableColumnPtr result_column = values_column->cloneEmpty();
|
||||
result_column->reserve(block.rows());
|
||||
|
||||
for (size_t i = 0; i < block.rows(); ++i)
|
||||
{
|
||||
UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
|
||||
bool where_filter_passed = !where_result_column || where_result_column->getBool(i);
|
||||
if (!isTTLExpired(cur_ttl) || !where_filter_passed)
|
||||
{
|
||||
new_ttl_infos.table_ttl.update(cur_ttl);
|
||||
result_column->insertFrom(*values_column, i);
|
||||
}
|
||||
else if (it == column_names.begin())
|
||||
++rows_removed;
|
||||
}
|
||||
result_columns.emplace_back(std::move(result_column));
|
||||
}
|
||||
block = header.cloneWithColumns(std::move(result_columns));
|
||||
}
|
||||
else
|
||||
{
|
||||
MutableColumns result_columns = header.cloneEmptyColumns();
|
||||
MutableColumns aggregate_columns = header.cloneEmptyColumns();
|
||||
|
||||
size_t rows_aggregated = 0;
|
||||
size_t current_key_start = 0;
|
||||
size_t rows_with_current_key = 0;
|
||||
auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
|
||||
for (size_t i = 0; i < block.rows(); ++i)
|
||||
{
|
||||
UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
|
||||
bool where_filter_passed = !where_result_column || where_result_column->getBool(i);
|
||||
bool ttl_expired = isTTLExpired(cur_ttl) && where_filter_passed;
|
||||
|
||||
bool same_as_current = true;
|
||||
for (size_t j = 0; j < storage_rows_ttl.group_by_keys.size(); ++j)
|
||||
{
|
||||
const String & key_column = storage_rows_ttl.group_by_keys[j];
|
||||
const IColumn * values_column = block.getByName(key_column).column.get();
|
||||
if (!same_as_current || (*values_column)[i] != current_key_value[j])
|
||||
{
|
||||
values_column->get(i, current_key_value[j]);
|
||||
same_as_current = false;
|
||||
}
|
||||
}
|
||||
if (!same_as_current)
|
||||
{
|
||||
if (rows_with_current_key)
|
||||
calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key);
|
||||
finalizeAggregates(result_columns);
|
||||
|
||||
current_key_start = rows_aggregated;
|
||||
rows_with_current_key = 0;
|
||||
}
|
||||
|
||||
if (ttl_expired)
|
||||
{
|
||||
++rows_with_current_key;
|
||||
++rows_aggregated;
|
||||
for (const auto & name : column_names)
|
||||
{
|
||||
const IColumn * values_column = block.getByName(name).column.get();
|
||||
auto & column = aggregate_columns[header.getPositionByName(name)];
|
||||
column->insertFrom(*values_column, i);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
new_ttl_infos.table_ttl.update(cur_ttl);
|
||||
for (const auto & name : column_names)
|
||||
{
|
||||
const IColumn * values_column = block.getByName(name).column.get();
|
||||
auto & column = result_columns[header.getPositionByName(name)];
|
||||
column->insertFrom(*values_column, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rows_with_current_key)
|
||||
calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key);
|
||||
|
||||
block = header.cloneWithColumns(std::move(result_columns));
|
||||
}
|
||||
}
|
||||
|
||||
void TTLBlockInputStream::calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length)
|
||||
{
|
||||
Columns aggregate_chunk;
|
||||
aggregate_chunk.reserve(aggregate_columns.size());
|
||||
for (const auto & name : header.getNames())
|
||||
{
|
||||
const auto & column = aggregate_columns[header.getPositionByName(name)];
|
||||
ColumnPtr chunk_column = column->cut(start_pos, length);
|
||||
aggregate_chunk.emplace_back(std::move(chunk_column));
|
||||
}
|
||||
aggregator->executeOnBlock(aggregate_chunk, length, agg_result, agg_key_columns,
|
||||
agg_aggregate_columns, agg_no_more_keys);
|
||||
}
|
||||
|
||||
void TTLBlockInputStream::finalizeAggregates(MutableColumns & result_columns)
|
||||
{
|
||||
if (!agg_result.empty())
|
||||
{
|
||||
auto aggregated_res = aggregator->convertToBlocks(agg_result, true, 1);
|
||||
auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
|
||||
for (auto & agg_block : aggregated_res)
|
||||
{
|
||||
for (const auto & it : storage_rows_ttl.set_parts)
|
||||
it.expression->execute(agg_block);
|
||||
for (const auto & name : storage_rows_ttl.group_by_keys)
|
||||
{
|
||||
const IColumn * values_column = agg_block.getByName(name).column.get();
|
||||
auto & result_column = result_columns[header.getPositionByName(name)];
|
||||
result_column->insertRangeFrom(*values_column, 0, agg_block.rows());
|
||||
}
|
||||
for (const auto & it : storage_rows_ttl.set_parts)
|
||||
{
|
||||
const IColumn * values_column = agg_block.getByName(it.expression_result_column_name).column.get();
|
||||
auto & result_column = result_columns[header.getPositionByName(it.column_name)];
|
||||
result_column->insertRangeFrom(*values_column, 0, agg_block.rows());
|
||||
}
|
||||
}
|
||||
}
|
||||
agg_result.invalidate();
|
||||
}
|
||||
|
||||
void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block)
|
||||
{
|
||||
Block block_with_defaults;
|
||||
if (defaults_expression)
|
||||
{
|
||||
block_with_defaults = block;
|
||||
defaults_expression->execute(block_with_defaults);
|
||||
}
|
||||
|
||||
std::vector<String> columns_to_remove;
|
||||
for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs())
|
||||
{
|
||||
/// If we read not all table columns. E.g. while mutation.
|
||||
if (!block.has(name))
|
||||
continue;
|
||||
|
||||
const auto & old_ttl_info = old_ttl_infos.columns_ttl[name];
|
||||
auto & new_ttl_info = new_ttl_infos.columns_ttl[name];
|
||||
|
||||
/// Nothing to do
|
||||
if (!force && !isTTLExpired(old_ttl_info.min))
|
||||
continue;
|
||||
|
||||
/// Later drop full column
|
||||
if (isTTLExpired(old_ttl_info.max))
|
||||
continue;
|
||||
|
||||
if (!block.has(ttl_entry.result_column))
|
||||
{
|
||||
columns_to_remove.push_back(ttl_entry.result_column);
|
||||
ttl_entry.expression->execute(block);
|
||||
}
|
||||
|
||||
ColumnPtr default_column = nullptr;
|
||||
if (block_with_defaults.has(name))
|
||||
default_column = block_with_defaults.getByName(name).column->convertToFullColumnIfConst();
|
||||
|
||||
auto & column_with_type = block.getByName(name);
|
||||
const IColumn * values_column = column_with_type.column.get();
|
||||
MutableColumnPtr result_column = values_column->cloneEmpty();
|
||||
result_column->reserve(block.rows());
|
||||
|
||||
const IColumn * ttl_column = block.getByName(ttl_entry.result_column).column.get();
|
||||
|
||||
for (size_t i = 0; i < block.rows(); ++i)
|
||||
{
|
||||
UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
|
||||
if (isTTLExpired(cur_ttl))
|
||||
{
|
||||
if (default_column)
|
||||
result_column->insertFrom(*default_column, i);
|
||||
else
|
||||
result_column->insertDefault();
|
||||
}
|
||||
else
|
||||
{
|
||||
new_ttl_info.update(cur_ttl);
|
||||
empty_columns.erase(name);
|
||||
result_column->insertFrom(*values_column, i);
|
||||
}
|
||||
}
|
||||
column_with_type.column = std::move(result_column);
|
||||
}
|
||||
|
||||
for (const String & column : columns_to_remove)
|
||||
block.erase(column);
|
||||
}
|
||||
|
||||
void TTLBlockInputStream::updateTTLWithDescriptions(Block & block, const TTLDescriptions & descriptions, TTLInfoMap & ttl_info_map)
|
||||
{
|
||||
std::vector<String> columns_to_remove;
|
||||
for (const auto & ttl_entry : descriptions)
|
||||
{
|
||||
auto & new_ttl_info = ttl_info_map[ttl_entry.result_column];
|
||||
if (!block.has(ttl_entry.result_column))
|
||||
{
|
||||
columns_to_remove.push_back(ttl_entry.result_column);
|
||||
ttl_entry.expression->execute(block);
|
||||
}
|
||||
|
||||
const IColumn * ttl_column = block.getByName(ttl_entry.result_column).column.get();
|
||||
|
||||
for (size_t i = 0; i < block.rows(); ++i)
|
||||
{
|
||||
UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
|
||||
new_ttl_info.update(cur_ttl);
|
||||
}
|
||||
}
|
||||
|
||||
for (const String & column : columns_to_remove)
|
||||
block.erase(column);
|
||||
}
|
||||
|
||||
void TTLBlockInputStream::updateMovesTTL(Block & block)
|
||||
{
|
||||
updateTTLWithDescriptions(block, metadata_snapshot->getMoveTTLs(), new_ttl_infos.moves_ttl);
|
||||
}
|
||||
|
||||
void TTLBlockInputStream::updateRecompressionTTL(Block & block)
|
||||
{
|
||||
updateTTLWithDescriptions(block, metadata_snapshot->getRecompressionTTLs(), new_ttl_infos.recompression_ttl);
|
||||
}
|
||||
|
||||
UInt32 TTLBlockInputStream::getTimestampByIndex(const IColumn * column, size_t ind)
|
||||
{
|
||||
if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(column))
|
||||
return date_lut.fromDayNum(DayNum(column_date->getData()[ind]));
|
||||
else if (const ColumnUInt32 * column_date_time = typeid_cast<const ColumnUInt32 *>(column))
|
||||
return column_date_time->getData()[ind];
|
||||
else if (const ColumnConst * column_const = typeid_cast<const ColumnConst *>(column))
|
||||
{
|
||||
if (typeid_cast<const ColumnUInt16 *>(&column_const->getDataColumn()))
|
||||
return date_lut.fromDayNum(DayNum(column_const->getValue<UInt16>()));
|
||||
else if (typeid_cast<const ColumnUInt32 *>(&column_const->getDataColumn()))
|
||||
return column_const->getValue<UInt32>();
|
||||
}
|
||||
|
||||
throw Exception("Unexpected type of result TTL column", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -3,8 +3,9 @@
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Storages/MergeTree/IMergeTreeDataPart.h>
|
||||
#include <Core/Block.h>
|
||||
#include <Interpreters/Aggregator.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartTTLInfo.h>
|
||||
#include <DataStreams/ITTLAlgorithm.h>
|
||||
#include <DataStreams/TTLDeleteAlgorithm.h>
|
||||
|
||||
#include <common/DateLUT.h>
|
||||
|
||||
@ -24,7 +25,6 @@ public:
|
||||
);
|
||||
|
||||
String getName() const override { return "TTL"; }
|
||||
|
||||
Block getHeader() const override { return header; }
|
||||
|
||||
protected:
|
||||
@ -34,60 +34,14 @@ protected:
|
||||
void readSuffixImpl() override;
|
||||
|
||||
private:
|
||||
const MergeTreeData & storage;
|
||||
StorageMetadataPtr metadata_snapshot;
|
||||
std::vector<TTLAlgorithmPtr> algorithms;
|
||||
const TTLDeleteAlgorithm * delete_algorithm = nullptr;
|
||||
bool all_data_dropped = false;
|
||||
|
||||
/// ttl_infos and empty_columns are updating while reading
|
||||
const MergeTreeData::MutableDataPartPtr & data_part;
|
||||
|
||||
time_t current_time;
|
||||
bool force;
|
||||
|
||||
std::unique_ptr<Aggregator> aggregator;
|
||||
std::vector<Field> current_key_value;
|
||||
AggregatedDataVariants agg_result;
|
||||
ColumnRawPtrs agg_key_columns;
|
||||
Aggregator::AggregateColumns agg_aggregate_columns;
|
||||
bool agg_no_more_keys = false;
|
||||
|
||||
IMergeTreeDataPart::TTLInfos old_ttl_infos;
|
||||
IMergeTreeDataPart::TTLInfos new_ttl_infos;
|
||||
NameSet empty_columns;
|
||||
|
||||
size_t rows_removed = 0;
|
||||
Poco::Logger * log;
|
||||
const DateLUTImpl & date_lut;
|
||||
|
||||
/// TODO rewrite defaults logic to evaluteMissingDefaults
|
||||
std::unordered_map<String, String> defaults_result_column;
|
||||
ExpressionActionsPtr defaults_expression;
|
||||
|
||||
Block header;
|
||||
private:
|
||||
/// Removes values with expired ttl and computes new_ttl_infos and empty_columns for part
|
||||
void removeValuesWithExpiredColumnTTL(Block & block);
|
||||
|
||||
/// Removes rows with expired table ttl and computes new ttl_infos for part
|
||||
void removeRowsWithExpiredTableTTL(Block & block);
|
||||
|
||||
// Calculate aggregates of aggregate_columns into agg_result
|
||||
void calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length);
|
||||
|
||||
/// Finalize agg_result into result_columns
|
||||
void finalizeAggregates(MutableColumns & result_columns);
|
||||
|
||||
/// Execute description expressions on block and update ttl's in
|
||||
/// ttl_info_map with expression results.
|
||||
void updateTTLWithDescriptions(Block & block, const TTLDescriptions & descriptions, TTLInfoMap & ttl_info_map);
|
||||
|
||||
/// Updates TTL for moves
|
||||
void updateMovesTTL(Block & block);
|
||||
|
||||
/// Update values for recompression TTL using data from block.
|
||||
void updateRecompressionTTL(Block & block);
|
||||
|
||||
UInt32 getTimestampByIndex(const IColumn * column, size_t ind);
|
||||
bool isTTLExpired(time_t ttl) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
83
src/DataStreams/TTLColumnAlgorithm.cpp
Normal file
83
src/DataStreams/TTLColumnAlgorithm.cpp
Normal file
@ -0,0 +1,83 @@
|
||||
#include <DataStreams/TTLColumnAlgorithm.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
TTLColumnAlgorithm::TTLColumnAlgorithm(
|
||||
const TTLDescription & description_,
|
||||
const TTLInfo & old_ttl_info_,
|
||||
time_t current_time_,
|
||||
bool force_,
|
||||
const String & column_name_,
|
||||
const ExpressionActionsPtr & default_expression_,
|
||||
const String & default_column_name_)
|
||||
: ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
|
||||
, column_name(column_name_)
|
||||
, default_expression(default_expression_)
|
||||
, default_column_name(default_column_name_)
|
||||
{
|
||||
if (!isMinTTLExpired())
|
||||
{
|
||||
new_ttl_info = old_ttl_info;
|
||||
is_fully_empty = false;
|
||||
}
|
||||
}
|
||||
|
||||
void TTLColumnAlgorithm::execute(Block & block)
|
||||
{
|
||||
if (!block)
|
||||
return;
|
||||
|
||||
/// If we read not all table columns. E.g. while mutation.
|
||||
if (!block.has(column_name))
|
||||
return;
|
||||
|
||||
/// Nothing to do
|
||||
if (!isMinTTLExpired())
|
||||
return;
|
||||
|
||||
/// Later drop full column
|
||||
if (isMaxTTLExpired())
|
||||
return;
|
||||
|
||||
auto default_column = executeExpressionAndGetColumn(default_expression, block, default_column_name);
|
||||
if (default_column)
|
||||
default_column = default_column->convertToFullColumnIfConst();
|
||||
|
||||
auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
|
||||
|
||||
auto & column_with_type = block.getByName(column_name);
|
||||
const IColumn * values_column = column_with_type.column.get();
|
||||
MutableColumnPtr result_column = values_column->cloneEmpty();
|
||||
result_column->reserve(block.rows());
|
||||
|
||||
for (size_t i = 0; i < block.rows(); ++i)
|
||||
{
|
||||
UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
|
||||
if (isTTLExpired(cur_ttl))
|
||||
{
|
||||
if (default_column)
|
||||
result_column->insertFrom(*default_column, i);
|
||||
else
|
||||
result_column->insertDefault();
|
||||
}
|
||||
else
|
||||
{
|
||||
new_ttl_info.update(cur_ttl);
|
||||
is_fully_empty = false;
|
||||
result_column->insertFrom(*values_column, i);
|
||||
}
|
||||
}
|
||||
|
||||
column_with_type.column = std::move(result_column);
|
||||
}
|
||||
|
||||
void TTLColumnAlgorithm::finalize(const MutableDataPartPtr & data_part) const
|
||||
{
|
||||
data_part->ttl_infos.columns_ttl[column_name] = new_ttl_info;
|
||||
data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max);
|
||||
if (is_fully_empty)
|
||||
data_part->expired_columns.insert(column_name);
|
||||
}
|
||||
|
||||
}
|
33
src/DataStreams/TTLColumnAlgorithm.h
Normal file
33
src/DataStreams/TTLColumnAlgorithm.h
Normal file
@ -0,0 +1,33 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/ITTLAlgorithm.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Deletes (replaces to default) values in column according to column's TTL description.
|
||||
/// If all values in column are replaced with defaults, this column won't be written to part.
|
||||
class TTLColumnAlgorithm final : public ITTLAlgorithm
|
||||
{
|
||||
public:
|
||||
TTLColumnAlgorithm(
|
||||
const TTLDescription & description_,
|
||||
const TTLInfo & old_ttl_info_,
|
||||
time_t current_time_,
|
||||
bool force_,
|
||||
const String & column_name_,
|
||||
const ExpressionActionsPtr & default_expression_,
|
||||
const String & default_column_name_);
|
||||
|
||||
void execute(Block & block) override;
|
||||
void finalize(const MutableDataPartPtr & data_part) const override;
|
||||
|
||||
private:
|
||||
const String column_name;
|
||||
const ExpressionActionsPtr default_expression;
|
||||
const String default_column_name;
|
||||
|
||||
bool is_fully_empty = true;
|
||||
};
|
||||
|
||||
}
|
62
src/DataStreams/TTLDeleteAlgorithm.cpp
Normal file
62
src/DataStreams/TTLDeleteAlgorithm.cpp
Normal file
@ -0,0 +1,62 @@
|
||||
#include <DataStreams/TTLDeleteAlgorithm.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
TTLDeleteAlgorithm::TTLDeleteAlgorithm(
|
||||
const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
|
||||
: ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
|
||||
{
|
||||
if (!isMinTTLExpired())
|
||||
new_ttl_info = old_ttl_info;
|
||||
}
|
||||
|
||||
void TTLDeleteAlgorithm::execute(Block & block)
|
||||
{
|
||||
if (!block || !isMinTTLExpired())
|
||||
return;
|
||||
|
||||
auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
|
||||
auto where_column = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column);
|
||||
|
||||
MutableColumns result_columns;
|
||||
const auto & column_names = block.getNames();
|
||||
|
||||
result_columns.reserve(column_names.size());
|
||||
for (auto it = column_names.begin(); it != column_names.end(); ++it)
|
||||
{
|
||||
const IColumn * values_column = block.getByName(*it).column.get();
|
||||
MutableColumnPtr result_column = values_column->cloneEmpty();
|
||||
result_column->reserve(block.rows());
|
||||
|
||||
for (size_t i = 0; i < block.rows(); ++i)
|
||||
{
|
||||
UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
|
||||
bool where_filter_passed = !where_column || where_column->getBool(i);
|
||||
|
||||
if (!isTTLExpired(cur_ttl) || !where_filter_passed)
|
||||
{
|
||||
new_ttl_info.update(cur_ttl);
|
||||
result_column->insertFrom(*values_column, i);
|
||||
}
|
||||
else if (it == column_names.begin())
|
||||
++rows_removed;
|
||||
}
|
||||
|
||||
result_columns.emplace_back(std::move(result_column));
|
||||
}
|
||||
|
||||
block = block.cloneWithColumns(std::move(result_columns));
|
||||
}
|
||||
|
||||
void TTLDeleteAlgorithm::finalize(const MutableDataPartPtr & data_part) const
|
||||
{
|
||||
if (description.where_expression)
|
||||
data_part->ttl_infos.rows_where_ttl[description.result_column] = new_ttl_info;
|
||||
else
|
||||
data_part->ttl_infos.table_ttl = new_ttl_info;
|
||||
|
||||
data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max);
|
||||
}
|
||||
|
||||
}
|
23
src/DataStreams/TTLDeleteAlgorithm.h
Normal file
23
src/DataStreams/TTLDeleteAlgorithm.h
Normal file
@ -0,0 +1,23 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/ITTLAlgorithm.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Deletes rows according to table TTL description with
|
||||
/// possible optional condition in 'WHERE' clause.
|
||||
class TTLDeleteAlgorithm final : public ITTLAlgorithm
|
||||
{
|
||||
public:
|
||||
TTLDeleteAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
|
||||
|
||||
void execute(Block & block) override;
|
||||
void finalize(const MutableDataPartPtr & data_part) const override;
|
||||
size_t getNumberOfRemovedRows() const { return rows_removed; }
|
||||
|
||||
private:
|
||||
size_t rows_removed = 0;
|
||||
};
|
||||
|
||||
}
|
47
src/DataStreams/TTLUpdateInfoAlgorithm.cpp
Normal file
47
src/DataStreams/TTLUpdateInfoAlgorithm.cpp
Normal file
@ -0,0 +1,47 @@
|
||||
#include <DataStreams/TTLUpdateInfoAlgorithm.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
TTLUpdateInfoAlgorithm::TTLUpdateInfoAlgorithm(
|
||||
const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
|
||||
: ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
|
||||
{
|
||||
}
|
||||
|
||||
void TTLUpdateInfoAlgorithm::execute(Block & block)
|
||||
{
|
||||
if (!block)
|
||||
return;
|
||||
|
||||
auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
|
||||
for (size_t i = 0; i < block.rows(); ++i)
|
||||
{
|
||||
UInt32 cur_ttl = ITTLAlgorithm::getTimestampByIndex(ttl_column.get(), i);
|
||||
new_ttl_info.update(cur_ttl);
|
||||
}
|
||||
}
|
||||
|
||||
TTLMoveAlgorithm::TTLMoveAlgorithm(
|
||||
const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
|
||||
: TTLUpdateInfoAlgorithm(description_, old_ttl_info_, current_time_, force_)
|
||||
{
|
||||
}
|
||||
|
||||
void TTLMoveAlgorithm::finalize(const MutableDataPartPtr & data_part) const
|
||||
{
|
||||
data_part->ttl_infos.moves_ttl[description.result_column] = new_ttl_info;
|
||||
}
|
||||
|
||||
TTLRecompressionAlgorithm::TTLRecompressionAlgorithm(
|
||||
const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
|
||||
: TTLUpdateInfoAlgorithm(description_, old_ttl_info_, current_time_, force_)
|
||||
{
|
||||
}
|
||||
|
||||
void TTLRecompressionAlgorithm::finalize(const MutableDataPartPtr & data_part) const
|
||||
{
|
||||
data_part->ttl_infos.recompression_ttl[description.result_column] = new_ttl_info;
|
||||
}
|
||||
|
||||
}
|
32
src/DataStreams/TTLUpdateInfoAlgorithm.h
Normal file
32
src/DataStreams/TTLUpdateInfoAlgorithm.h
Normal file
@ -0,0 +1,32 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/ITTLAlgorithm.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Calculates new ttl_info and does nothing with data.
|
||||
class TTLUpdateInfoAlgorithm : public ITTLAlgorithm
|
||||
{
|
||||
public:
|
||||
TTLUpdateInfoAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
|
||||
|
||||
void execute(Block & block) override;
|
||||
void finalize(const MutableDataPartPtr & data_part) const override = 0;
|
||||
};
|
||||
|
||||
class TTLMoveAlgorithm final : public TTLUpdateInfoAlgorithm
|
||||
{
|
||||
public:
|
||||
TTLMoveAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
|
||||
void finalize(const MutableDataPartPtr & data_part) const override;
|
||||
};
|
||||
|
||||
class TTLRecompressionAlgorithm final : public TTLUpdateInfoAlgorithm
|
||||
{
|
||||
public:
|
||||
TTLRecompressionAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
|
||||
void finalize(const MutableDataPartPtr & data_part) const override;
|
||||
};
|
||||
|
||||
}
|
@ -27,6 +27,7 @@ SRCS(
|
||||
ExecutionSpeedLimits.cpp
|
||||
ExpressionBlockInputStream.cpp
|
||||
IBlockInputStream.cpp
|
||||
ITTLAlgorithm.cpp
|
||||
InputStreamFromASTInsertQuery.cpp
|
||||
InternalTextLogsRowOutputStream.cpp
|
||||
LimitBlockInputStream.cpp
|
||||
@ -44,7 +45,11 @@ SRCS(
|
||||
SquashingBlockInputStream.cpp
|
||||
SquashingBlockOutputStream.cpp
|
||||
SquashingTransform.cpp
|
||||
TTLAggregationAlgorithm.cpp
|
||||
TTLBlockInputStream.cpp
|
||||
TTLColumnAlgorithm.cpp
|
||||
TTLDeleteAlgorithm.cpp
|
||||
TTLUpdateInfoAlgorithm.cpp
|
||||
copyData.cpp
|
||||
finalizeBlock.cpp
|
||||
materializeBlock.cpp
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include "DictionarySourceHelpers.h"
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Core/Block.h>
|
||||
#include <Core/ColumnWithTypeAndName.h>
|
||||
#include <DataStreams/IBlockOutputStream.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
@ -13,44 +12,54 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/// For simple key
|
||||
void formatIDs(BlockOutputStreamPtr & out, const std::vector<UInt64> & ids)
|
||||
|
||||
void formatBlock(BlockOutputStreamPtr & out, const Block & block)
|
||||
{
|
||||
auto column = ColumnUInt64::create(ids.size());
|
||||
memcpy(column->getData().data(), ids.data(), ids.size() * sizeof(ids.front()));
|
||||
|
||||
Block block{{std::move(column), std::make_shared<DataTypeUInt64>(), "id"}};
|
||||
|
||||
out->writePrefix();
|
||||
out->write(block);
|
||||
out->writeSuffix();
|
||||
out->flush();
|
||||
}
|
||||
|
||||
/// For composite key
|
||||
void formatKeys(
|
||||
/// For simple key
|
||||
|
||||
Block blockForIds(
|
||||
const DictionaryStructure & dict_struct,
|
||||
const std::vector<UInt64> & ids)
|
||||
{
|
||||
auto column = ColumnUInt64::create(ids.size());
|
||||
memcpy(column->getData().data(), ids.data(), ids.size() * sizeof(ids.front()));
|
||||
|
||||
Block block{{std::move(column), std::make_shared<DataTypeUInt64>(), (*dict_struct.id).name}};
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
/// For composite key
|
||||
|
||||
Block blockForKeys(
|
||||
const DictionaryStructure & dict_struct,
|
||||
BlockOutputStreamPtr & out,
|
||||
const Columns & key_columns,
|
||||
const std::vector<size_t> & requested_rows)
|
||||
{
|
||||
Block block;
|
||||
|
||||
for (size_t i = 0, size = key_columns.size(); i < size; ++i)
|
||||
{
|
||||
const ColumnPtr & source_column = key_columns[i];
|
||||
auto filtered_column = source_column->cloneEmpty();
|
||||
filtered_column->reserve(requested_rows.size());
|
||||
size_t column_rows_size = source_column->size();
|
||||
|
||||
PaddedPODArray<UInt8> filter(column_rows_size, false);
|
||||
|
||||
for (size_t idx : requested_rows)
|
||||
filtered_column->insertFrom(*source_column, idx);
|
||||
filter[idx] = true;
|
||||
|
||||
block.insert({std::move(filtered_column), (*dict_struct.key)[i].type, toString(i)});
|
||||
auto filtered_column = source_column->filter(filter, requested_rows.size());
|
||||
|
||||
block.insert({std::move(filtered_column), (*dict_struct.key)[i].type, (*dict_struct.key)[i].name});
|
||||
}
|
||||
|
||||
out->writePrefix();
|
||||
out->write(block);
|
||||
out->writeSuffix();
|
||||
out->flush();
|
||||
return block;
|
||||
}
|
||||
|
||||
Context copyContextAndApplySettings(
|
||||
|
@ -1,11 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <Columns/IColumn.h>
|
||||
|
||||
#include <common/types.h>
|
||||
|
||||
#include <Poco/File.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Core/Block.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class IBlockOutputStream;
|
||||
@ -16,13 +20,18 @@ class Context;
|
||||
|
||||
/// Write keys to block output stream.
|
||||
|
||||
void formatBlock(BlockOutputStreamPtr & out, const Block & block);
|
||||
|
||||
/// For simple key
|
||||
void formatIDs(BlockOutputStreamPtr & out, const std::vector<UInt64> & ids);
|
||||
|
||||
Block blockForIds(
|
||||
const DictionaryStructure & dict_struct,
|
||||
const std::vector<UInt64> & ids);
|
||||
|
||||
/// For composite key
|
||||
void formatKeys(
|
||||
|
||||
Block blockForKeys(
|
||||
const DictionaryStructure & dict_struct,
|
||||
BlockOutputStreamPtr & out,
|
||||
const Columns & key_columns,
|
||||
const std::vector<size_t> & requested_rows);
|
||||
|
||||
@ -36,4 +45,5 @@ void applySettingsToContext(
|
||||
const std::string & config_prefix,
|
||||
Context & context,
|
||||
const Poco::Util::AbstractConfiguration & config);
|
||||
|
||||
}
|
||||
|
@ -281,6 +281,21 @@ size_t DictionaryStructure::getKeySize() const
|
||||
});
|
||||
}
|
||||
|
||||
Strings DictionaryStructure::getKeysNames() const
|
||||
{
|
||||
if (id)
|
||||
return { id->name };
|
||||
|
||||
const auto & key_attributes = *key;
|
||||
|
||||
Strings keys_names;
|
||||
keys_names.reserve(key_attributes.size());
|
||||
|
||||
for (const auto & key_attribute : key_attributes)
|
||||
keys_names.emplace_back(key_attribute.name);
|
||||
|
||||
return keys_names;
|
||||
}
|
||||
|
||||
static void checkAttributeKeys(const Poco::Util::AbstractConfiguration::Keys & keys)
|
||||
{
|
||||
|
@ -158,6 +158,8 @@ struct DictionaryStructure final
|
||||
std::string getKeyDescription() const;
|
||||
bool isKeySizeFixed() const;
|
||||
size_t getKeySize() const;
|
||||
Strings getKeysNames() const;
|
||||
|
||||
private:
|
||||
/// range_min and range_max have to be parsed before this function call
|
||||
std::vector<DictionaryAttribute> getAttributes(
|
||||
|
@ -26,6 +26,8 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int DICTIONARY_ACCESS_DENIED;
|
||||
extern const int UNSUPPORTED_METHOD;
|
||||
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -65,18 +67,34 @@ ExecutableDictionarySource::ExecutableDictionarySource(
|
||||
const Context & context_)
|
||||
: log(&Poco::Logger::get("ExecutableDictionarySource"))
|
||||
, dict_struct{dict_struct_}
|
||||
, implicit_key{config.getBool(config_prefix + ".implicit_key", false)}
|
||||
, command{config.getString(config_prefix + ".command")}
|
||||
, update_field{config.getString(config_prefix + ".update_field", "")}
|
||||
, format{config.getString(config_prefix + ".format")}
|
||||
, sample_block{sample_block_}
|
||||
, context(context_)
|
||||
{
|
||||
/// Remove keys from sample_block for implicit_key dictionary because
|
||||
/// these columns will not be returned from source
|
||||
/// Implicit key means that the source script will return only values,
|
||||
/// and the correspondence to the requested keys is determined implicitly - by the order of rows in the result.
|
||||
if (implicit_key)
|
||||
{
|
||||
auto keys_names = dict_struct.getKeysNames();
|
||||
|
||||
for (auto & key_name : keys_names)
|
||||
{
|
||||
size_t key_column_position_in_block = sample_block.getPositionByName(key_name);
|
||||
sample_block.erase(key_column_position_in_block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ExecutableDictionarySource::ExecutableDictionarySource(const ExecutableDictionarySource & other)
|
||||
: log(&Poco::Logger::get("ExecutableDictionarySource"))
|
||||
, update_time{other.update_time}
|
||||
, dict_struct{other.dict_struct}
|
||||
, implicit_key{other.implicit_key}
|
||||
, command{other.command}
|
||||
, update_field{other.update_field}
|
||||
, format{other.format}
|
||||
@ -87,6 +105,9 @@ ExecutableDictionarySource::ExecutableDictionarySource(const ExecutableDictionar
|
||||
|
||||
BlockInputStreamPtr ExecutableDictionarySource::loadAll()
|
||||
{
|
||||
if (implicit_key)
|
||||
throw Exception("ExecutableDictionarySource with implicit_key does not support loadAll method", ErrorCodes::UNSUPPORTED_METHOD);
|
||||
|
||||
LOG_TRACE(log, "loadAll {}", toString());
|
||||
auto process = ShellCommand::execute(command);
|
||||
auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size);
|
||||
@ -95,6 +116,9 @@ BlockInputStreamPtr ExecutableDictionarySource::loadAll()
|
||||
|
||||
BlockInputStreamPtr ExecutableDictionarySource::loadUpdatedAll()
|
||||
{
|
||||
if (implicit_key)
|
||||
throw Exception("ExecutableDictionarySource with implicit_key does not support loadUpdatedAll method", ErrorCodes::UNSUPPORTED_METHOD);
|
||||
|
||||
time_t new_update_time = time(nullptr);
|
||||
SCOPE_EXIT(update_time = new_update_time);
|
||||
|
||||
@ -173,6 +197,77 @@ namespace
|
||||
std::function<void(WriteBufferFromFile &)> send_data;
|
||||
ThreadFromGlobalPool thread;
|
||||
};
|
||||
|
||||
/** A stream, adds additional columns to each block that it will read from inner stream.
|
||||
*
|
||||
* block_to_add rows size must be equal to final sum rows size of all inner stream blocks.
|
||||
*/
|
||||
class BlockInputStreamWithAdditionalColumns final: public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
BlockInputStreamWithAdditionalColumns(
|
||||
Block block_to_add_,
|
||||
std::unique_ptr<IBlockInputStream>&& stream_)
|
||||
: block_to_add(std::move(block_to_add_))
|
||||
, stream(std::move(stream_))
|
||||
{
|
||||
}
|
||||
|
||||
Block getHeader() const override
|
||||
{
|
||||
auto header = stream->getHeader();
|
||||
|
||||
if (header)
|
||||
{
|
||||
for (Int64 i = static_cast<Int64>(block_to_add.columns() - 1); i >= 0; --i)
|
||||
header.insert(0, block_to_add.getByPosition(i).cloneEmpty());
|
||||
}
|
||||
|
||||
return header;
|
||||
}
|
||||
|
||||
Block readImpl() override
|
||||
{
|
||||
auto block = stream->read();
|
||||
|
||||
if (block)
|
||||
{
|
||||
auto block_rows = block.rows();
|
||||
|
||||
auto cut_block = block_to_add.cloneWithCutColumns(current_range_index, block_rows);
|
||||
|
||||
if (cut_block.rows() != block_rows)
|
||||
throw Exception(
|
||||
"Number of rows in block to add after cut must equal to number of rows in block from inner stream",
|
||||
ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
|
||||
|
||||
for (Int64 i = static_cast<Int64>(cut_block.columns() - 1); i >= 0; --i)
|
||||
block.insert(0, cut_block.getByPosition(i));
|
||||
|
||||
current_range_index += block_rows;
|
||||
}
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
void readPrefix() override
|
||||
{
|
||||
stream->readPrefix();
|
||||
}
|
||||
|
||||
void readSuffix() override
|
||||
{
|
||||
stream->readSuffix();
|
||||
}
|
||||
|
||||
String getName() const override { return "BlockInputStreamWithAdditionalColumns"; }
|
||||
|
||||
private:
|
||||
Block block_to_add;
|
||||
std::unique_ptr<IBlockInputStream> stream;
|
||||
size_t current_range_index = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -180,28 +275,44 @@ BlockInputStreamPtr ExecutableDictionarySource::loadIds(const std::vector<UInt64
|
||||
{
|
||||
LOG_TRACE(log, "loadIds {} size = {}", toString(), ids.size());
|
||||
|
||||
return std::make_shared<BlockInputStreamWithBackgroundThread>(
|
||||
auto block = blockForIds(dict_struct, ids);
|
||||
|
||||
auto stream = std::make_unique<BlockInputStreamWithBackgroundThread>(
|
||||
context, format, sample_block, command, log,
|
||||
[&ids, this](WriteBufferFromFile & out) mutable
|
||||
[block, this](WriteBufferFromFile & out) mutable
|
||||
{
|
||||
auto output_stream = context.getOutputStream(format, out, sample_block);
|
||||
formatIDs(output_stream, ids);
|
||||
auto output_stream = context.getOutputStream(format, out, block.cloneEmpty());
|
||||
formatBlock(output_stream, block);
|
||||
out.close();
|
||||
});
|
||||
|
||||
if (implicit_key)
|
||||
{
|
||||
return std::make_shared<BlockInputStreamWithAdditionalColumns>(block, std::move(stream));
|
||||
}
|
||||
else
|
||||
return std::shared_ptr<BlockInputStreamWithBackgroundThread>(stream.release());
|
||||
}
|
||||
|
||||
BlockInputStreamPtr ExecutableDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
|
||||
{
|
||||
LOG_TRACE(log, "loadKeys {} size = {}", toString(), requested_rows.size());
|
||||
|
||||
return std::make_shared<BlockInputStreamWithBackgroundThread>(
|
||||
auto block = blockForKeys(dict_struct, key_columns, requested_rows);
|
||||
|
||||
auto stream = std::make_unique<BlockInputStreamWithBackgroundThread>(
|
||||
context, format, sample_block, command, log,
|
||||
[key_columns, &requested_rows, this](WriteBufferFromFile & out) mutable
|
||||
[block, this](WriteBufferFromFile & out) mutable
|
||||
{
|
||||
auto output_stream = context.getOutputStream(format, out, sample_block);
|
||||
formatKeys(dict_struct, output_stream, key_columns, requested_rows);
|
||||
auto output_stream = context.getOutputStream(format, out, block.cloneEmpty());
|
||||
formatBlock(output_stream, block);
|
||||
out.close();
|
||||
});
|
||||
|
||||
if (implicit_key)
|
||||
return std::make_shared<BlockInputStreamWithAdditionalColumns>(block, std::move(stream));
|
||||
else
|
||||
return std::shared_ptr<BlockInputStreamWithBackgroundThread>(stream.release());
|
||||
}
|
||||
|
||||
bool ExecutableDictionarySource::isModified() const
|
||||
|
@ -49,9 +49,9 @@ public:
|
||||
|
||||
private:
|
||||
Poco::Logger * log;
|
||||
|
||||
time_t update_time = 0;
|
||||
const DictionaryStructure dict_struct;
|
||||
bool implicit_key;
|
||||
const std::string command;
|
||||
const std::string update_field;
|
||||
const std::string format;
|
||||
|
@ -131,11 +131,13 @@ BlockInputStreamPtr HTTPDictionarySource::loadIds(const std::vector<UInt64> & id
|
||||
{
|
||||
LOG_TRACE(log, "loadIds {} size = {}", toString(), ids.size());
|
||||
|
||||
ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [&](std::ostream & ostr)
|
||||
auto block = blockForIds(dict_struct, ids);
|
||||
|
||||
ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [block, this](std::ostream & ostr)
|
||||
{
|
||||
WriteBufferFromOStream out_buffer(ostr);
|
||||
auto output_stream = context.getOutputStream(format, out_buffer, sample_block);
|
||||
formatIDs(output_stream, ids);
|
||||
formatBlock(output_stream, block);
|
||||
};
|
||||
|
||||
Poco::URI uri(url);
|
||||
@ -150,11 +152,13 @@ BlockInputStreamPtr HTTPDictionarySource::loadKeys(const Columns & key_columns,
|
||||
{
|
||||
LOG_TRACE(log, "loadKeys {} size = {}", toString(), requested_rows.size());
|
||||
|
||||
ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [&](std::ostream & ostr)
|
||||
auto block = blockForKeys(dict_struct, key_columns, requested_rows);
|
||||
|
||||
ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [block, this](std::ostream & ostr)
|
||||
{
|
||||
WriteBufferFromOStream out_buffer(ostr);
|
||||
auto output_stream = context.getOutputStream(format, out_buffer, sample_block);
|
||||
formatKeys(dict_struct, output_stream, key_columns, requested_rows);
|
||||
formatBlock(output_stream, block);
|
||||
};
|
||||
|
||||
Poco::URI uri(url);
|
||||
|
62
src/Disks/IStoragePolicy.h
Normal file
62
src/Disks/IStoragePolicy.h
Normal file
@ -0,0 +1,62 @@
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <common/types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class IStoragePolicy;
|
||||
using StoragePolicyPtr = std::shared_ptr<const IStoragePolicy>;
|
||||
class IVolume;
|
||||
using VolumePtr = std::shared_ptr<IVolume>;
|
||||
using Volumes = std::vector<VolumePtr>;
|
||||
class IDisk;
|
||||
using DiskPtr = std::shared_ptr<IDisk>;
|
||||
using Disks = std::vector<DiskPtr>;
|
||||
class IReservation;
|
||||
using ReservationPtr = std::unique_ptr<IReservation>;
|
||||
using Reservations = std::vector<ReservationPtr>;
|
||||
|
||||
using String = std::string;
|
||||
|
||||
class IStoragePolicy
|
||||
{
|
||||
public:
|
||||
virtual ~IStoragePolicy() = default;
|
||||
virtual const String & getName() const = 0;
|
||||
virtual const Volumes & getVolumes() const = 0;
|
||||
/// Returns number [0., 1.] -- fraction of free space on disk
|
||||
/// which should be kept with help of background moves
|
||||
virtual double getMoveFactor() const = 0;
|
||||
virtual bool isDefaultPolicy() const = 0;
|
||||
/// Returns disks ordered by volumes priority
|
||||
virtual Disks getDisks() const = 0;
|
||||
/// Returns any disk
|
||||
/// Used when it's not important, for example for
|
||||
/// mutations files
|
||||
virtual DiskPtr getAnyDisk() const = 0;
|
||||
virtual DiskPtr getDiskByName(const String & disk_name) const = 0;
|
||||
/// Get free space from most free disk
|
||||
virtual UInt64 getMaxUnreservedFreeSpace() const = 0;
|
||||
/// Reserves space on any volume with index > min_volume_index or returns nullptr
|
||||
virtual ReservationPtr reserve(UInt64 bytes, size_t min_volume_index) const = 0;
|
||||
/// Returns valid reservation or nullptr
|
||||
virtual ReservationPtr reserve(UInt64 bytes) const = 0;
|
||||
/// Reserves space on any volume or throws
|
||||
virtual ReservationPtr reserveAndCheck(UInt64 bytes) const = 0;
|
||||
/// Reserves 0 bytes on disk with max available space
|
||||
/// Do not use this function when it is possible to predict size.
|
||||
virtual ReservationPtr makeEmptyReservationOnLargestDisk() const = 0;
|
||||
/// Get volume by index.
|
||||
virtual VolumePtr getVolume(size_t index) const = 0;
|
||||
virtual VolumePtr getVolumeByName(const String & volume_name) const = 0;
|
||||
/// Checks if storage policy can be replaced by another one.
|
||||
virtual void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const = 0;
|
||||
/// Find volume index, which contains disk
|
||||
virtual size_t getVolumeIndexByDisk(const DiskPtr & disk_ptr) const = 0;
|
||||
/// Check if we have any volume with stopped merges
|
||||
virtual bool hasAnyVolumeWithDisabledMerges() const = 0;
|
||||
virtual bool containsVolume(const String & volume_name) const = 0;
|
||||
};
|
||||
|
||||
}
|
@ -93,17 +93,17 @@ StoragePolicy::StoragePolicy(String name_, Volumes volumes_, double move_factor_
|
||||
}
|
||||
|
||||
|
||||
StoragePolicy::StoragePolicy(const StoragePolicy & storage_policy,
|
||||
StoragePolicy::StoragePolicy(StoragePolicyPtr storage_policy,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const String & config_prefix,
|
||||
DiskSelectorPtr disks)
|
||||
: StoragePolicy(storage_policy.getName(), config, config_prefix, disks)
|
||||
: StoragePolicy(storage_policy->getName(), config, config_prefix, disks)
|
||||
{
|
||||
for (auto & volume : volumes)
|
||||
{
|
||||
if (storage_policy.volume_index_by_volume_name.count(volume->getName()) > 0)
|
||||
if (storage_policy->containsVolume(volume->getName()))
|
||||
{
|
||||
auto old_volume = storage_policy.getVolumeByName(volume->getName());
|
||||
auto old_volume = storage_policy->getVolumeByName(volume->getName());
|
||||
try
|
||||
{
|
||||
auto new_volume = updateVolumeFromConfig(old_volume, config, config_prefix + ".volumes." + volume->getName(), disks);
|
||||
@ -112,7 +112,7 @@ StoragePolicy::StoragePolicy(const StoragePolicy & storage_policy,
|
||||
catch (Exception & e)
|
||||
{
|
||||
/// Default policies are allowed to be missed in configuration.
|
||||
if (e.code() != ErrorCodes::NO_ELEMENTS_IN_CONFIG || storage_policy.getName() != DEFAULT_STORAGE_POLICY_NAME)
|
||||
if (e.code() != ErrorCodes::NO_ELEMENTS_IN_CONFIG || storage_policy->getName() != DEFAULT_STORAGE_POLICY_NAME)
|
||||
throw;
|
||||
|
||||
Poco::Util::AbstractConfiguration::Keys keys;
|
||||
@ -331,6 +331,11 @@ bool StoragePolicy::hasAnyVolumeWithDisabledMerges() const
|
||||
return false;
|
||||
}
|
||||
|
||||
bool StoragePolicy::containsVolume(const String & volume_name) const
|
||||
{
|
||||
return volume_index_by_volume_name.contains(volume_name);
|
||||
}
|
||||
|
||||
StoragePolicySelector::StoragePolicySelector(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const String & config_prefix,
|
||||
@ -345,6 +350,13 @@ StoragePolicySelector::StoragePolicySelector(
|
||||
throw Exception(
|
||||
"Storage policy name can contain only alphanumeric and '_' (" + backQuote(name) + ")", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
|
||||
|
||||
/*
|
||||
* A customization point for StoragePolicy, here one can add his own policy, for example, based on policy's name
|
||||
* if (name == "MyCustomPolicy")
|
||||
* policies.emplace(name, std::make_shared<CustomPolicy>(name, config, config_prefix + "." + name, disks));
|
||||
* else
|
||||
*/
|
||||
|
||||
policies.emplace(name, std::make_shared<StoragePolicy>(name, config, config_prefix + "." + name, disks));
|
||||
LOG_INFO(&Poco::Logger::get("StoragePolicySelector"), "Storage policy {} loaded", backQuote(name));
|
||||
}
|
||||
@ -374,7 +386,7 @@ StoragePolicySelectorPtr StoragePolicySelector::updateFromConfig(const Poco::Uti
|
||||
/// Second pass, load.
|
||||
for (const auto & [name, policy] : policies)
|
||||
{
|
||||
result->policies[name] = std::make_shared<StoragePolicy>(*policy, config, config_prefix + "." + name, disks);
|
||||
result->policies[name] = std::make_shared<StoragePolicy>(policy, config, config_prefix + "." + name, disks);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Disks/IStoragePolicy.h>
|
||||
#include <Disks/DiskSelector.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <Disks/IVolume.h>
|
||||
@ -23,14 +24,11 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class StoragePolicy;
|
||||
using StoragePolicyPtr = std::shared_ptr<const StoragePolicy>;
|
||||
|
||||
/**
|
||||
* Contains all information about volumes configuration for Storage.
|
||||
* Can determine appropriate Volume and Disk for each reservation.
|
||||
*/
|
||||
class StoragePolicy
|
||||
class StoragePolicy : public IStoragePolicy
|
||||
{
|
||||
public:
|
||||
StoragePolicy(String name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, DiskSelectorPtr disks);
|
||||
@ -38,62 +36,63 @@ public:
|
||||
StoragePolicy(String name_, Volumes volumes_, double move_factor_);
|
||||
|
||||
StoragePolicy(
|
||||
const StoragePolicy & storage_policy,
|
||||
StoragePolicyPtr storage_policy,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const String & config_prefix,
|
||||
DiskSelectorPtr disks
|
||||
);
|
||||
|
||||
bool isDefaultPolicy() const;
|
||||
bool isDefaultPolicy() const override;
|
||||
|
||||
/// Returns disks ordered by volumes priority
|
||||
Disks getDisks() const;
|
||||
Disks getDisks() const override;
|
||||
|
||||
/// Returns any disk
|
||||
/// Used when it's not important, for example for
|
||||
/// mutations files
|
||||
DiskPtr getAnyDisk() const;
|
||||
DiskPtr getAnyDisk() const override;
|
||||
|
||||
DiskPtr getDiskByName(const String & disk_name) const;
|
||||
DiskPtr getDiskByName(const String & disk_name) const override;
|
||||
|
||||
/// Get free space from most free disk
|
||||
UInt64 getMaxUnreservedFreeSpace() const;
|
||||
UInt64 getMaxUnreservedFreeSpace() const override;
|
||||
|
||||
const String & getName() const { return name; }
|
||||
const String & getName() const override{ return name; }
|
||||
|
||||
/// Returns valid reservation or nullptr
|
||||
ReservationPtr reserve(UInt64 bytes) const;
|
||||
ReservationPtr reserve(UInt64 bytes) const override;
|
||||
|
||||
/// Reserves space on any volume or throws
|
||||
ReservationPtr reserveAndCheck(UInt64 bytes) const;
|
||||
ReservationPtr reserveAndCheck(UInt64 bytes) const override;
|
||||
|
||||
/// Reserves space on any volume with index > min_volume_index or returns nullptr
|
||||
ReservationPtr reserve(UInt64 bytes, size_t min_volume_index) const;
|
||||
ReservationPtr reserve(UInt64 bytes, size_t min_volume_index) const override;
|
||||
|
||||
/// Find volume index, which contains disk
|
||||
size_t getVolumeIndexByDisk(const DiskPtr & disk_ptr) const;
|
||||
size_t getVolumeIndexByDisk(const DiskPtr & disk_ptr) const override;
|
||||
|
||||
/// Reserves 0 bytes on disk with max available space
|
||||
/// Do not use this function when it is possible to predict size.
|
||||
ReservationPtr makeEmptyReservationOnLargestDisk() const;
|
||||
ReservationPtr makeEmptyReservationOnLargestDisk() const override;
|
||||
|
||||
const Volumes & getVolumes() const { return volumes; }
|
||||
const Volumes & getVolumes() const override{ return volumes; }
|
||||
|
||||
/// Returns number [0., 1.] -- fraction of free space on disk
|
||||
/// which should be kept with help of background moves
|
||||
double getMoveFactor() const { return move_factor; }
|
||||
double getMoveFactor() const override{ return move_factor; }
|
||||
|
||||
/// Get volume by index.
|
||||
VolumePtr getVolume(size_t index) const;
|
||||
VolumePtr getVolume(size_t index) const override;
|
||||
|
||||
VolumePtr getVolumeByName(const String & volume_name) const;
|
||||
VolumePtr getVolumeByName(const String & volume_name) const override;
|
||||
|
||||
/// Checks if storage policy can be replaced by another one.
|
||||
void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const;
|
||||
void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const override;
|
||||
|
||||
/// Check if we have any volume with stopped merges
|
||||
bool hasAnyVolumeWithDisabledMerges() const;
|
||||
bool hasAnyVolumeWithDisabledMerges() const override;
|
||||
|
||||
bool containsVolume(const String & volume_name) const override;
|
||||
private:
|
||||
Volumes volumes;
|
||||
const String name;
|
||||
|
@ -1,7 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/UUID.h>
|
||||
#include <Common/UInt128.h>
|
||||
#include <common/DayNum.h>
|
||||
#include <memory>
|
||||
|
||||
|
@ -3,8 +3,10 @@
|
||||
#include <cmath>
|
||||
#include <type_traits>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
#include <DataTypes/NumberTraits.h>
|
||||
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include <Common/config.h>
|
||||
#endif
|
||||
@ -87,7 +89,19 @@ struct DivideIntegralImpl
|
||||
return static_cast<Result>(checkedDivision(static_cast<SignedCastA>(a), static_cast<SignedCastB>(b)));
|
||||
}
|
||||
else
|
||||
{
|
||||
if constexpr (std::is_floating_point_v<A>)
|
||||
if (isNaN(a) || a > std::numeric_limits<CastA>::max() || a < std::numeric_limits<CastA>::lowest())
|
||||
throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
|
||||
ErrorCodes::ILLEGAL_DIVISION);
|
||||
|
||||
if constexpr (std::is_floating_point_v<B>)
|
||||
if (isNaN(b) || b > std::numeric_limits<CastB>::max() || b < std::numeric_limits<CastB>::lowest())
|
||||
throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
|
||||
ErrorCodes::ILLEGAL_DIVISION);
|
||||
|
||||
return static_cast<Result>(checkedDivision(CastA(a), CastB(b)));
|
||||
}
|
||||
}
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
@ -114,6 +128,16 @@ struct ModuloImpl
|
||||
}
|
||||
else
|
||||
{
|
||||
if constexpr (std::is_floating_point_v<A>)
|
||||
if (isNaN(a) || a > std::numeric_limits<IntegerAType>::max() || a < std::numeric_limits<IntegerAType>::lowest())
|
||||
throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
|
||||
ErrorCodes::ILLEGAL_DIVISION);
|
||||
|
||||
if constexpr (std::is_floating_point_v<B>)
|
||||
if (isNaN(b) || b > std::numeric_limits<IntegerBType>::max() || b < std::numeric_limits<IntegerBType>::lowest())
|
||||
throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
|
||||
ErrorCodes::ILLEGAL_DIVISION);
|
||||
|
||||
throwIfDivisionLeadsToFPE(IntegerAType(a), IntegerBType(b));
|
||||
|
||||
if constexpr (is_big_int_v<IntegerAType> || is_big_int_v<IntegerBType>)
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <ext/range.h>
|
||||
#include <type_traits>
|
||||
#include <boost/tti/has_member_function.hpp>
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
@ -507,11 +508,20 @@ public:
|
||||
}
|
||||
else if (element.isDouble())
|
||||
{
|
||||
if (!accurate::convertNumeric(element.getDouble(), value))
|
||||
if constexpr (std::is_floating_point_v<NumberType>)
|
||||
{
|
||||
/// We permit inaccurate conversion of double to float.
|
||||
/// Example: double 0.1 from JSON is not representable in float.
|
||||
/// But it will be more convenient for user to perform conversion.
|
||||
value = element.getDouble();
|
||||
}
|
||||
else if (!accurate::convertNumeric(element.getDouble(), value))
|
||||
return false;
|
||||
}
|
||||
else if (element.isBool() && is_integer_v<NumberType> && convert_bool_to_integer)
|
||||
{
|
||||
value = static_cast<NumberType>(element.getBool());
|
||||
}
|
||||
else
|
||||
return false;
|
||||
|
||||
|
@ -216,9 +216,7 @@ inline Float64 getSpan(uint8_t precision, CoordType type)
|
||||
inline uint8_t geohashPrecision(uint8_t precision)
|
||||
{
|
||||
if (precision == 0 || precision > MAX_PRECISION)
|
||||
{
|
||||
precision = MAX_PRECISION;
|
||||
}
|
||||
|
||||
return precision;
|
||||
}
|
||||
@ -281,13 +279,21 @@ GeohashesInBoxPreparedArgs geohashesInBoxPrepare(
|
||||
return {};
|
||||
}
|
||||
|
||||
longitude_min = std::max(longitude_min, LON_MIN);
|
||||
longitude_max = std::min(longitude_max, LON_MAX);
|
||||
latitude_min = std::max(latitude_min, LAT_MIN);
|
||||
latitude_max = std::min(latitude_max, LAT_MAX);
|
||||
auto saturate = [](Float64 & value, Float64 min, Float64 max)
|
||||
{
|
||||
if (value < min)
|
||||
value = min;
|
||||
else if (value > max)
|
||||
value = max;
|
||||
};
|
||||
|
||||
const auto lon_step = getSpan(precision, LONGITUDE);
|
||||
const auto lat_step = getSpan(precision, LATITUDE);
|
||||
saturate(longitude_min, LON_MIN, LON_MAX);
|
||||
saturate(longitude_max, LON_MIN, LON_MAX);
|
||||
saturate(latitude_min, LAT_MIN, LAT_MAX);
|
||||
saturate(latitude_max, LAT_MIN, LAT_MAX);
|
||||
|
||||
Float64 lon_step = getSpan(precision, LONGITUDE);
|
||||
Float64 lat_step = getSpan(precision, LATITUDE);
|
||||
|
||||
/// Align max to the right (or up) border of geohash grid cell to ensure that cell is in result.
|
||||
Float64 lon_min = floor(longitude_min / lon_step) * lon_step;
|
||||
|
@ -163,7 +163,7 @@ namespace DB
|
||||
|
||||
FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
|
||||
{
|
||||
const DataTypePtr & from_type = arguments[0].type;
|
||||
const DataTypePtr & from_type = removeNullable(arguments[0].type);
|
||||
DataTypes argument_types = { from_type };
|
||||
FunctionBaseImplPtr base;
|
||||
auto call = [&](const auto & types) -> bool
|
||||
@ -185,7 +185,7 @@ namespace DB
|
||||
* here causes a SEGV. So we must somehow create a
|
||||
* dummy implementation and return it.
|
||||
*/
|
||||
if (WhichDataType(from_type).isNullable()) // Nullable(Nothing)
|
||||
if (WhichDataType(from_type).isNothing()) // Nullable(Nothing)
|
||||
return std::make_unique<FunctionBaseFromModifiedJulianDay<Name, DataTypeInt32, nullOnErrors>>(argument_types, return_type);
|
||||
else
|
||||
// Should not happen.
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include <Common/assert_cast.h>
|
||||
#include <AggregateFunctions/AggregateFunctionArray.h>
|
||||
#include <AggregateFunctions/AggregateFunctionState.h>
|
||||
#include <Disks/StoragePolicy.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
|
||||
|
@ -58,6 +58,9 @@ public:
|
||||
/// Separate method allows to initialize the `servers` variable beforehand.
|
||||
void start()
|
||||
{
|
||||
/// Update once right now, to make metrics available just after server start
|
||||
/// (without waiting for asynchronous_metrics_update_period_s).
|
||||
update();
|
||||
thread = std::make_unique<ThreadFromGlobalPool>([this] { run(); });
|
||||
}
|
||||
|
||||
|
@ -102,8 +102,8 @@ using DiskPtr = std::shared_ptr<IDisk>;
|
||||
class DiskSelector;
|
||||
using DiskSelectorPtr = std::shared_ptr<const DiskSelector>;
|
||||
using DisksMap = std::map<String, DiskPtr>;
|
||||
class StoragePolicy;
|
||||
using StoragePolicyPtr = std::shared_ptr<const StoragePolicy>;
|
||||
class IStoragePolicy;
|
||||
using StoragePolicyPtr = std::shared_ptr<const IStoragePolicy>;
|
||||
using StoragePoliciesMap = std::map<String, StoragePolicyPtr>;
|
||||
class StoragePolicySelector;
|
||||
using StoragePolicySelectorPtr = std::shared_ptr<const StoragePolicySelector>;
|
||||
|
@ -865,7 +865,18 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
|
||||
while (stopwatch.elapsedSeconds() <= MAX_EXECUTION_TIMEOUT_SEC)
|
||||
{
|
||||
StorageReplicatedMergeTree::Status status;
|
||||
replicated_storage->getStatus(status);
|
||||
// Has to get with zk fields to get active replicas field
|
||||
replicated_storage->getStatus(status, true);
|
||||
|
||||
// Should return as soon as possible if the table is dropped.
|
||||
bool replica_dropped = replicated_storage->is_dropped;
|
||||
bool all_replicas_likely_detached = status.active_replicas == 0 && !DatabaseCatalog::instance().isTableExist(replicated_storage->getStorageID(), context);
|
||||
if (replica_dropped || all_replicas_likely_detached)
|
||||
{
|
||||
LOG_WARNING(log, ", task {} will not be executed.", task.entry_name);
|
||||
task.execution_status = ExecutionStatus(ErrorCodes::UNFINISHED, "Cannot execute replicated DDL query, table is dropped or detached permanently");
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Any replica which is leader tries to take lock
|
||||
if (status.is_leader && lock->tryLock())
|
||||
|
@ -140,7 +140,16 @@ void Set::setHeader(const Block & header)
|
||||
ConstNullMapPtr null_map{};
|
||||
ColumnPtr null_map_holder;
|
||||
if (!transform_null_in)
|
||||
{
|
||||
/// We convert nullable columns to non nullable we also need to update nullable types
|
||||
for (size_t i = 0; i < set_elements_types.size(); ++i)
|
||||
{
|
||||
data_types[i] = removeNullable(data_types[i]);
|
||||
set_elements_types[i] = removeNullable(set_elements_types[i]);
|
||||
}
|
||||
|
||||
extractNestedColumnsAndNullMap(key_columns, null_map);
|
||||
}
|
||||
|
||||
if (fill_set_elements)
|
||||
{
|
||||
@ -182,7 +191,7 @@ bool Set::insertFromBlock(const Block & block)
|
||||
ConstNullMapPtr null_map{};
|
||||
ColumnPtr null_map_holder;
|
||||
if (!transform_null_in)
|
||||
null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
|
||||
null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
|
||||
|
||||
/// Filter to extract distinct values from the block.
|
||||
ColumnUInt8::MutablePtr filter;
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include <DataStreams/OneBlockInputStream.h>
|
||||
#include <DataStreams/TemporaryFileStream.h>
|
||||
#include <DataStreams/materializeBlock.h>
|
||||
#include <Disks/StoragePolicy.h>
|
||||
#include <Disks/IVolume.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <condition_variable>
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <common/logger_useful.h>
|
||||
#include <ext/scope_guard.h>
|
||||
#include <common/types.h>
|
||||
#include <Core/Defines.h>
|
||||
#include <Storages/IStorage.h>
|
||||
@ -229,14 +230,23 @@ void SystemLog<LogElement>::startup()
|
||||
}
|
||||
|
||||
|
||||
static thread_local bool recursive_add_call = false;
|
||||
|
||||
template <typename LogElement>
|
||||
void SystemLog<LogElement>::add(const LogElement & element)
|
||||
{
|
||||
/// It is possible that the method will be called recursively.
|
||||
/// Better to drop these events to avoid complications.
|
||||
if (recursive_add_call)
|
||||
return;
|
||||
recursive_add_call = true;
|
||||
SCOPE_EXIT({ recursive_add_call = false; });
|
||||
|
||||
/// Memory can be allocated while resizing on queue.push_back.
|
||||
/// The size of allocation can be in order of a few megabytes.
|
||||
/// But this should not be accounted for query memory usage.
|
||||
/// Otherwise the tests like 01017_uniqCombined_memory_usage.sql will be flacky.
|
||||
MemoryTracker::BlockerInThread temporarily_disable_memory_tracker;
|
||||
MemoryTracker::BlockerInThread temporarily_disable_memory_tracker(VariableContext::Global);
|
||||
|
||||
/// Should not log messages under mutex.
|
||||
bool queue_is_half_full = false;
|
||||
|
@ -20,7 +20,7 @@ ASTPtr ASTTTLElement::clone() const
|
||||
|
||||
for (auto & expr : clone->group_by_key)
|
||||
expr = expr->clone();
|
||||
for (auto & [name, expr] : clone->group_by_aggregations)
|
||||
for (auto & expr : clone->group_by_assignments)
|
||||
expr = expr->clone();
|
||||
|
||||
return clone;
|
||||
@ -46,15 +46,15 @@ void ASTTTLElement::formatImpl(const FormatSettings & settings, FormatState & st
|
||||
settings.ostr << ", ";
|
||||
(*it)->formatImpl(settings, state, frame);
|
||||
}
|
||||
if (!group_by_aggregations.empty())
|
||||
|
||||
if (!group_by_assignments.empty())
|
||||
{
|
||||
settings.ostr << " SET ";
|
||||
for (auto it = group_by_aggregations.begin(); it != group_by_aggregations.end(); ++it)
|
||||
for (auto it = group_by_assignments.begin(); it != group_by_assignments.end(); ++it)
|
||||
{
|
||||
if (it != group_by_aggregations.begin())
|
||||
if (it != group_by_assignments.begin())
|
||||
settings.ostr << ", ";
|
||||
settings.ostr << it->first << " = ";
|
||||
it->second->formatImpl(settings, state, frame);
|
||||
(*it)->formatImpl(settings, state, frame);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ public:
|
||||
String destination_name;
|
||||
|
||||
ASTs group_by_key;
|
||||
std::vector<std::pair<String, ASTPtr>> group_by_aggregations;
|
||||
ASTs group_by_assignments;
|
||||
|
||||
ASTPtr recompression_codec;
|
||||
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include <Parsers/ASTTTLElement.h>
|
||||
#include <Parsers/ASTWindowDefinition.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTAssignment.h>
|
||||
|
||||
#include <Parsers/parseIdentifierOrStringLiteral.h>
|
||||
#include <Parsers/parseIntervalKind.h>
|
||||
@ -2008,9 +2009,12 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
ParserIdentifier parser_identifier;
|
||||
ParserStringLiteral parser_string_literal;
|
||||
ParserExpression parser_exp;
|
||||
ParserExpressionList parser_expression_list(false);
|
||||
ParserExpressionList parser_keys_list(false);
|
||||
ParserCodec parser_codec;
|
||||
|
||||
ParserList parser_assignment_list(
|
||||
std::make_unique<ParserAssignment>(), std::make_unique<ParserToken>(TokenType::Comma));
|
||||
|
||||
ASTPtr ttl_expr;
|
||||
if (!parser_exp.parse(pos, ttl_expr, expected))
|
||||
return false;
|
||||
@ -2044,9 +2048,9 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
}
|
||||
|
||||
ASTPtr where_expr;
|
||||
ASTPtr ast_group_by_key;
|
||||
ASTPtr group_by_key;
|
||||
ASTPtr recompression_codec;
|
||||
std::vector<std::pair<String, ASTPtr>> group_by_aggregations;
|
||||
ASTPtr group_by_assignments;
|
||||
|
||||
if (mode == TTLMode::MOVE)
|
||||
{
|
||||
@ -2058,30 +2062,13 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
}
|
||||
else if (mode == TTLMode::GROUP_BY)
|
||||
{
|
||||
if (!parser_expression_list.parse(pos, ast_group_by_key, expected))
|
||||
if (!parser_keys_list.parse(pos, group_by_key, expected))
|
||||
return false;
|
||||
|
||||
if (s_set.ignore(pos))
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
if (!group_by_aggregations.empty() && !s_comma.ignore(pos))
|
||||
break;
|
||||
|
||||
ASTPtr name;
|
||||
ASTPtr value;
|
||||
if (!parser_identifier.parse(pos, name, expected))
|
||||
return false;
|
||||
if (!s_eq.ignore(pos))
|
||||
return false;
|
||||
if (!parser_exp.parse(pos, value, expected))
|
||||
return false;
|
||||
|
||||
String name_str;
|
||||
if (!tryGetIdentifierNameInto(name, name_str))
|
||||
return false;
|
||||
group_by_aggregations.emplace_back(name_str, std::move(value));
|
||||
}
|
||||
if (!parser_assignment_list.parse(pos, group_by_assignments, expected))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (mode == TTLMode::DELETE && s_where.ignore(pos))
|
||||
@ -2105,8 +2092,9 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
|
||||
if (mode == TTLMode::GROUP_BY)
|
||||
{
|
||||
ttl_element->group_by_key = std::move(ast_group_by_key->children);
|
||||
ttl_element->group_by_aggregations = std::move(group_by_aggregations);
|
||||
ttl_element->group_by_key = std::move(group_by_key->children);
|
||||
if (group_by_assignments)
|
||||
ttl_element->group_by_assignments = std::move(group_by_assignments->children);
|
||||
}
|
||||
|
||||
if (mode == TTLMode::RECOMPRESS)
|
||||
@ -2141,4 +2129,31 @@ bool ParserIdentifierWithOptionalParameters::parseImpl(Pos & pos, ASTPtr & node,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ParserAssignment::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
auto assignment = std::make_shared<ASTAssignment>();
|
||||
node = assignment;
|
||||
|
||||
ParserIdentifier p_identifier;
|
||||
ParserToken s_equals(TokenType::Equals);
|
||||
ParserExpression p_expression;
|
||||
|
||||
ASTPtr column;
|
||||
if (!p_identifier.parse(pos, column, expected))
|
||||
return false;
|
||||
|
||||
if (!s_equals.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
ASTPtr expression;
|
||||
if (!p_expression.parse(pos, expression, expected))
|
||||
return false;
|
||||
|
||||
tryGetIdentifierNameInto(column, assignment->column_name);
|
||||
if (expression)
|
||||
assignment->children.push_back(expression);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -483,4 +483,12 @@ protected:
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
};
|
||||
|
||||
/// Part of the UPDATE command or TTL with GROUP BY of the form: col_name = expr
|
||||
class ParserAssignment : public IParserBase
|
||||
{
|
||||
protected:
|
||||
const char * getName() const override{ return "column assignment"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include <Parsers/ASTIndexDeclaration.h>
|
||||
#include <Parsers/ASTAlterQuery.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTAssignment.h>
|
||||
#include <Parsers/parseDatabaseAndTableName.h>
|
||||
|
||||
|
||||
@ -651,34 +650,6 @@ bool ParserAlterCommandList::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
|
||||
}
|
||||
|
||||
|
||||
bool ParserAssignment::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
auto assignment = std::make_shared<ASTAssignment>();
|
||||
node = assignment;
|
||||
|
||||
ParserIdentifier p_identifier;
|
||||
ParserToken s_equals(TokenType::Equals);
|
||||
ParserExpression p_expression;
|
||||
|
||||
ASTPtr column;
|
||||
if (!p_identifier.parse(pos, column, expected))
|
||||
return false;
|
||||
|
||||
if (!s_equals.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
ASTPtr expression;
|
||||
if (!p_expression.parse(pos, expression, expected))
|
||||
return false;
|
||||
|
||||
tryGetIdentifierNameInto(column, assignment->column_name);
|
||||
if (expression)
|
||||
assignment->children.push_back(expression);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
auto query = std::make_shared<ASTAlterQuery>();
|
||||
|
@ -63,12 +63,4 @@ public:
|
||||
};
|
||||
|
||||
|
||||
/// Part of the UPDATE command of the form: col_name = expr
|
||||
class ParserAssignment : public IParserBase
|
||||
{
|
||||
protected:
|
||||
const char * getName() const override{ return "column assignment"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -4,13 +4,14 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin, Token last)
|
||||
UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin)
|
||||
{
|
||||
/// We have just two kind of parentheses: () and [].
|
||||
UnmatchedParentheses stack;
|
||||
|
||||
for (TokenIterator it = begin;
|
||||
it.isValid() && it->begin <= last.begin; ++it)
|
||||
/// We have to iterate through all tokens until the end to avoid false positive "Unmatched parentheses" error
|
||||
/// when parser failed in the middle of the query.
|
||||
for (TokenIterator it = begin; it.isValid(); ++it)
|
||||
{
|
||||
if (it->type == TokenType::OpeningRoundBracket || it->type == TokenType::OpeningSquareBracket)
|
||||
{
|
||||
|
@ -80,6 +80,6 @@ public:
|
||||
|
||||
/// Returns positions of unmatched parentheses.
|
||||
using UnmatchedParentheses = std::vector<Token>;
|
||||
UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin, Token last);
|
||||
UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin);
|
||||
|
||||
}
|
||||
|
@ -79,7 +79,7 @@ void writeQueryWithHighlightedErrorPositions(
|
||||
{
|
||||
const char * current_position_to_hilite = positions_to_hilite[position_to_hilite_idx].begin;
|
||||
|
||||
assert(current_position_to_hilite < end);
|
||||
assert(current_position_to_hilite <= end);
|
||||
assert(current_position_to_hilite >= begin);
|
||||
|
||||
out.write(pos, current_position_to_hilite - pos);
|
||||
@ -269,14 +269,6 @@ ASTPtr tryParseQuery(
|
||||
// most of the checks.
|
||||
if (insert && insert->data)
|
||||
{
|
||||
if (!parse_res)
|
||||
{
|
||||
// Generic parse error.
|
||||
out_error_message = getSyntaxErrorMessage(query_begin, all_queries_end,
|
||||
last_token, expected, hilite, query_description);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -290,7 +282,7 @@ ASTPtr tryParseQuery(
|
||||
}
|
||||
|
||||
/// Unmatched parentheses
|
||||
UnmatchedParentheses unmatched_parens = checkUnmatchedParentheses(TokenIterator(tokens), last_token);
|
||||
UnmatchedParentheses unmatched_parens = checkUnmatchedParentheses(TokenIterator(tokens));
|
||||
if (!unmatched_parens.empty())
|
||||
{
|
||||
out_error_message = getUnmatchedParenthesesErrorMessage(query_begin,
|
||||
|
@ -24,6 +24,7 @@ namespace ErrorCodes
|
||||
extern const int TYPE_MISMATCH;
|
||||
extern const int SUPPORT_IS_DISABLED;
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
extern const int CANNOT_READ_ALL_DATA;
|
||||
}
|
||||
|
||||
|
||||
@ -412,6 +413,15 @@ void ValuesBlockInputFormat::readPrefix()
|
||||
|
||||
void ValuesBlockInputFormat::readSuffix()
|
||||
{
|
||||
if (!buf.eof() && *buf.position() == ';')
|
||||
{
|
||||
++buf.position();
|
||||
skipWhitespaceIfAny(buf);
|
||||
if (buf.hasUnreadData())
|
||||
throw Exception("Cannot read data after semicolon", ErrorCodes::CANNOT_READ_ALL_DATA);
|
||||
return;
|
||||
}
|
||||
|
||||
if (buf.hasUnreadData())
|
||||
throw Exception("Unread data in PeekableReadBuffer will be lost. Most likely it's a bug.", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
@ -105,6 +105,8 @@ Pipe::Holder & Pipe::Holder::operator=(Holder && rhs)
|
||||
for (auto & plan : rhs.query_plans)
|
||||
query_plans.emplace_back(std::move(plan));
|
||||
|
||||
query_id_holder = std::move(rhs.query_id_holder);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
#include <Processors/IProcessor.h>
|
||||
#include <Processors/Sources/SourceWithProgress.h>
|
||||
#include <Processors/QueryPlan/QueryIdHolder.h>
|
||||
#include <Processors/QueryPlan/QueryPlan.h>
|
||||
|
||||
namespace DB
|
||||
@ -108,6 +109,7 @@ public:
|
||||
/// This methods are from QueryPipeline. Needed to make conversion from pipeline to pipe possible.
|
||||
void addInterpreterContext(std::shared_ptr<Context> context) { holder.interpreter_context.emplace_back(std::move(context)); }
|
||||
void addStorageHolder(StoragePtr storage) { holder.storage_holders.emplace_back(std::move(storage)); }
|
||||
void addQueryIdHolder(std::shared_ptr<QueryIdHolder> query_id_holder) { holder.query_id_holder = std::move(query_id_holder); }
|
||||
/// For queries with nested interpreters (i.e. StorageDistributed)
|
||||
void addQueryPlan(std::unique_ptr<QueryPlan> plan) { holder.query_plans.emplace_back(std::move(plan)); }
|
||||
|
||||
@ -128,6 +130,7 @@ private:
|
||||
std::vector<StoragePtr> storage_holders;
|
||||
std::vector<TableLockHolder> table_locks;
|
||||
std::vector<std::unique_ptr<QueryPlan>> query_plans;
|
||||
std::shared_ptr<QueryIdHolder> query_id_holder;
|
||||
};
|
||||
|
||||
Holder holder;
|
||||
|
15
src/Processors/QueryPlan/QueryIdHolder.cpp
Normal file
15
src/Processors/QueryPlan/QueryIdHolder.cpp
Normal file
@ -0,0 +1,15 @@
|
||||
#include <Processors/QueryPlan/QueryIdHolder.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
QueryIdHolder::QueryIdHolder(const String & query_id_, const MergeTreeData & data_) : query_id(query_id_), data(data_)
|
||||
{
|
||||
}
|
||||
|
||||
QueryIdHolder::~QueryIdHolder()
|
||||
{
|
||||
data.removeQueryId(query_id);
|
||||
}
|
||||
|
||||
}
|
21
src/Processors/QueryPlan/QueryIdHolder.h
Normal file
21
src/Processors/QueryPlan/QueryIdHolder.h
Normal file
@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class MergeTreeData;
|
||||
|
||||
/// Holds the current query id and do something meaningful in destructor.
|
||||
/// Currently it's used for cleaning query id in the MergeTreeData query set.
|
||||
struct QueryIdHolder
|
||||
{
|
||||
QueryIdHolder(const std::string & query_id_, const MergeTreeData & data_);
|
||||
|
||||
~QueryIdHolder();
|
||||
|
||||
std::string query_id;
|
||||
const MergeTreeData & data;
|
||||
};
|
||||
|
||||
}
|
@ -8,7 +8,7 @@
|
||||
#include <Compression/CompressedWriteBuffer.h>
|
||||
#include <DataStreams/NativeBlockInputStream.h>
|
||||
#include <DataStreams/NativeBlockOutputStream.h>
|
||||
#include <Disks/StoragePolicy.h>
|
||||
#include <Disks/IVolume.h>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
|
@ -122,6 +122,7 @@ SRCS(
|
||||
QueryPlan/Optimizations/optimizeTree.cpp
|
||||
QueryPlan/Optimizations/splitFilter.cpp
|
||||
QueryPlan/PartialSortingStep.cpp
|
||||
QueryPlan/QueryIdHolder.cpp
|
||||
QueryPlan/QueryPlan.cpp
|
||||
QueryPlan/ReadFromPreparedSource.cpp
|
||||
QueryPlan/ReadNothingStep.cpp
|
||||
|
@ -1613,7 +1613,10 @@ private:
|
||||
|
||||
|
||||
GRPCServer::GRPCServer(IServer & iserver_, const Poco::Net::SocketAddress & address_to_listen_)
|
||||
: iserver(iserver_), address_to_listen(address_to_listen_), log(&Poco::Logger::get("GRPCServer"))
|
||||
: iserver(iserver_)
|
||||
, address_to_listen(address_to_listen_)
|
||||
, log(&Poco::Logger::get("GRPCServer"))
|
||||
, runner(std::make_unique<Runner>(*this))
|
||||
{}
|
||||
|
||||
GRPCServer::~GRPCServer()
|
||||
@ -1644,7 +1647,6 @@ void GRPCServer::start()
|
||||
|
||||
queue = builder.AddCompletionQueue();
|
||||
grpc_server = builder.BuildAndStart();
|
||||
runner = std::make_unique<Runner>(*this);
|
||||
runner->start();
|
||||
}
|
||||
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include <common/getFQDNOrHostName.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Common/SettingsChanges.h>
|
||||
#include <Disks/StoragePolicy.h>
|
||||
#include <Disks/IVolume.h>
|
||||
#include <Compression/CompressedReadBuffer.h>
|
||||
#include <Compression/CompressedWriteBuffer.h>
|
||||
#include <IO/ReadBufferFromIStream.h>
|
||||
|
@ -50,8 +50,8 @@ class Pipe;
|
||||
class QueryPlan;
|
||||
using QueryPlanPtr = std::unique_ptr<QueryPlan>;
|
||||
|
||||
class StoragePolicy;
|
||||
using StoragePolicyPtr = std::shared_ptr<const StoragePolicy>;
|
||||
class IStoragePolicy;
|
||||
using StoragePolicyPtr = std::shared_ptr<const IStoragePolicy>;
|
||||
|
||||
struct StreamLocalLimits;
|
||||
class EnabledQuota;
|
||||
|
@ -1278,6 +1278,18 @@ bool IMergeTreeDataPart::checkAllTTLCalculated(const StorageMetadataPtr & metada
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const auto & group_by_desc : metadata_snapshot->getGroupByTTLs())
|
||||
{
|
||||
if (!ttl_infos.group_by_ttl.count(group_by_desc.result_column))
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const auto & rows_where_desc : metadata_snapshot->getRowsWhereTTLs())
|
||||
{
|
||||
if (!ttl_infos.rows_where_ttl.count(rows_where_desc.result_column))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -114,6 +114,7 @@ namespace ErrorCodes
|
||||
extern const int NOT_ENOUGH_SPACE;
|
||||
extern const int ALTER_OF_COLUMN_IS_FORBIDDEN;
|
||||
extern const int SUPPORT_IS_DISABLED;
|
||||
extern const int TOO_MANY_SIMULTANEOUS_QUERIES;
|
||||
}
|
||||
|
||||
|
||||
@ -3988,4 +3989,24 @@ void MergeTreeData::setDataVolume(size_t bytes, size_t rows, size_t parts)
|
||||
total_active_size_rows.store(rows, std::memory_order_release);
|
||||
total_active_size_parts.store(parts, std::memory_order_release);
|
||||
}
|
||||
|
||||
void MergeTreeData::insertQueryIdOrThrow(const String & query_id, size_t max_queries) const
|
||||
{
|
||||
std::lock_guard lock(query_id_set_mutex);
|
||||
if (query_id_set.find(query_id) != query_id_set.end())
|
||||
return;
|
||||
if (query_id_set.size() >= max_queries)
|
||||
throw Exception(
|
||||
ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, "Too many simultaneous queries for table {}. Maximum is: {}", log_name, max_queries);
|
||||
query_id_set.insert(query_id);
|
||||
}
|
||||
|
||||
void MergeTreeData::removeQueryId(const String & query_id) const
|
||||
{
|
||||
std::lock_guard lock(query_id_set_mutex);
|
||||
if (query_id_set.find(query_id) == query_id_set.end())
|
||||
LOG_WARNING(log, "We have query_id removed but it's not recorded. This is a bug");
|
||||
else
|
||||
query_id_set.erase(query_id);
|
||||
}
|
||||
}
|
||||
|
@ -702,6 +702,12 @@ public:
|
||||
/// section from config.xml.
|
||||
CompressionCodecPtr getCompressionCodecForPart(size_t part_size_compressed, const IMergeTreeDataPart::TTLInfos & ttl_infos, time_t current_time) const;
|
||||
|
||||
/// Record current query id where querying the table. Throw if there are already `max_queries` queries accessing the same table.
|
||||
void insertQueryIdOrThrow(const String & query_id, size_t max_queries) const;
|
||||
|
||||
/// Remove current query id after query finished.
|
||||
void removeQueryId(const String & query_id) const;
|
||||
|
||||
/// Limiting parallel sends per one table, used in DataPartsExchange
|
||||
std::atomic_uint current_table_sends {0};
|
||||
|
||||
@ -958,6 +964,10 @@ private:
|
||||
std::atomic<size_t> total_active_size_bytes = 0;
|
||||
std::atomic<size_t> total_active_size_rows = 0;
|
||||
std::atomic<size_t> total_active_size_parts = 0;
|
||||
|
||||
// Record all query ids which access the table. It's guarded by `query_id_set_mutex` and is always mutable.
|
||||
mutable std::set<String> query_id_set;
|
||||
mutable std::mutex query_id_set_mutex;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -3,7 +3,6 @@
|
||||
#include <Storages/MergeTree/MergeTreeSequentialSource.h>
|
||||
#include <Storages/MergeTree/MergedBlockOutputStream.h>
|
||||
#include <Storages/MergeTree/MergedColumnOnlyOutputStream.h>
|
||||
#include <Disks/StoragePolicy.h>
|
||||
#include <Storages/MergeTree/SimpleMergeSelector.h>
|
||||
#include <Storages/MergeTree/AllMergeSelector.h>
|
||||
#include <Storages/MergeTree/TTLMergeSelector.h>
|
||||
|
@ -17,13 +17,23 @@ void MergeTreeDataPartTTLInfos::update(const MergeTreeDataPartTTLInfos & other_i
|
||||
updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
|
||||
}
|
||||
|
||||
for (const auto & [name, ttl_info] : other_infos.rows_where_ttl)
|
||||
{
|
||||
rows_where_ttl[name].update(ttl_info);
|
||||
updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
|
||||
}
|
||||
|
||||
for (const auto & [name, ttl_info] : other_infos.group_by_ttl)
|
||||
{
|
||||
group_by_ttl[name].update(ttl_info);
|
||||
updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
|
||||
}
|
||||
|
||||
for (const auto & [name, ttl_info] : other_infos.recompression_ttl)
|
||||
recompression_ttl[name].update(ttl_info);
|
||||
|
||||
for (const auto & [expression, ttl_info] : other_infos.moves_ttl)
|
||||
{
|
||||
moves_ttl[expression].update(ttl_info);
|
||||
}
|
||||
|
||||
table_ttl.update(other_infos.table_ttl);
|
||||
updatePartMinMaxTTL(table_ttl.min, table_ttl.max);
|
||||
@ -59,29 +69,41 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in)
|
||||
|
||||
updatePartMinMaxTTL(table_ttl.min, table_ttl.max);
|
||||
}
|
||||
|
||||
auto fill_ttl_info_map = [this](const JSON & json_part, TTLInfoMap & ttl_info_map, bool update_min_max)
|
||||
{
|
||||
for (auto elem : json_part) // NOLINT
|
||||
{
|
||||
MergeTreeDataPartTTLInfo ttl_info;
|
||||
ttl_info.min = elem["min"].getUInt();
|
||||
ttl_info.max = elem["max"].getUInt();
|
||||
String expression = elem["expression"].getString();
|
||||
ttl_info_map.emplace(expression, ttl_info);
|
||||
|
||||
if (update_min_max)
|
||||
updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
|
||||
}
|
||||
};
|
||||
|
||||
if (json.has("moves"))
|
||||
{
|
||||
const JSON & moves = json["moves"];
|
||||
for (auto move : moves) // NOLINT
|
||||
{
|
||||
MergeTreeDataPartTTLInfo ttl_info;
|
||||
ttl_info.min = move["min"].getUInt();
|
||||
ttl_info.max = move["max"].getUInt();
|
||||
String expression = move["expression"].getString();
|
||||
moves_ttl.emplace(expression, ttl_info);
|
||||
}
|
||||
fill_ttl_info_map(moves, moves_ttl, false);
|
||||
}
|
||||
if (json.has("recompression"))
|
||||
{
|
||||
const JSON & recompressions = json["recompression"];
|
||||
for (auto recompression : recompressions) // NOLINT
|
||||
{
|
||||
MergeTreeDataPartTTLInfo ttl_info;
|
||||
ttl_info.min = recompression["min"].getUInt();
|
||||
ttl_info.max = recompression["max"].getUInt();
|
||||
String expression = recompression["expression"].getString();
|
||||
recompression_ttl.emplace(expression, ttl_info);
|
||||
}
|
||||
fill_ttl_info_map(recompressions, recompression_ttl, false);
|
||||
}
|
||||
if (json.has("group_by"))
|
||||
{
|
||||
const JSON & group_by = json["group_by"];
|
||||
fill_ttl_info_map(group_by, group_by_ttl, true);
|
||||
}
|
||||
if (json.has("rows_where"))
|
||||
{
|
||||
const JSON & rows_where = json["rows_where"];
|
||||
fill_ttl_info_map(rows_where, rows_where_ttl, true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -118,47 +140,52 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const
|
||||
writeIntText(table_ttl.max, out);
|
||||
writeString("}", out);
|
||||
}
|
||||
|
||||
auto write_infos = [&out](const TTLInfoMap & infos, const String & type, bool is_first)
|
||||
{
|
||||
if (!is_first)
|
||||
writeString(",", out);
|
||||
|
||||
writeDoubleQuotedString(type, out);
|
||||
writeString(":[", out);
|
||||
for (auto it = infos.begin(); it != infos.end(); ++it)
|
||||
{
|
||||
if (it != infos.begin())
|
||||
writeString(",", out);
|
||||
|
||||
writeString(R"({"expression":)", out);
|
||||
writeString(doubleQuoteString(it->first), out);
|
||||
writeString(R"(,"min":)", out);
|
||||
writeIntText(it->second.min, out);
|
||||
writeString(R"(,"max":)", out);
|
||||
writeIntText(it->second.max, out);
|
||||
writeString("}", out);
|
||||
}
|
||||
writeString("]", out);
|
||||
};
|
||||
|
||||
bool is_first = columns_ttl.empty() && !table_ttl.min;
|
||||
if (!moves_ttl.empty())
|
||||
{
|
||||
if (!columns_ttl.empty() || table_ttl.min)
|
||||
writeString(",", out);
|
||||
writeString(R"("moves":[)", out);
|
||||
for (auto it = moves_ttl.begin(); it != moves_ttl.end(); ++it)
|
||||
{
|
||||
if (it != moves_ttl.begin())
|
||||
writeString(",", out);
|
||||
|
||||
writeString(R"({"expression":)", out);
|
||||
writeString(doubleQuoteString(it->first), out);
|
||||
writeString(R"(,"min":)", out);
|
||||
writeIntText(it->second.min, out);
|
||||
writeString(R"(,"max":)", out);
|
||||
writeIntText(it->second.max, out);
|
||||
writeString("}", out);
|
||||
}
|
||||
writeString("]", out);
|
||||
write_infos(moves_ttl, "moves", is_first);
|
||||
is_first = false;
|
||||
}
|
||||
|
||||
if (!recompression_ttl.empty())
|
||||
{
|
||||
if (!moves_ttl.empty() || !columns_ttl.empty() || table_ttl.min)
|
||||
writeString(",", out);
|
||||
|
||||
writeString(R"("recompression":[)", out);
|
||||
for (auto it = recompression_ttl.begin(); it != recompression_ttl.end(); ++it)
|
||||
{
|
||||
if (it != recompression_ttl.begin())
|
||||
writeString(",", out);
|
||||
|
||||
writeString(R"({"expression":)", out);
|
||||
writeString(doubleQuoteString(it->first), out);
|
||||
writeString(R"(,"min":)", out);
|
||||
writeIntText(it->second.min, out);
|
||||
writeString(R"(,"max":)", out);
|
||||
writeIntText(it->second.max, out);
|
||||
writeString("}", out);
|
||||
}
|
||||
writeString("]", out);
|
||||
write_infos(recompression_ttl, "recompression", is_first);
|
||||
is_first = false;
|
||||
}
|
||||
|
||||
if (!group_by_ttl.empty())
|
||||
{
|
||||
write_infos(group_by_ttl, "group_by", is_first);
|
||||
is_first = false;
|
||||
}
|
||||
|
||||
if (!rows_where_ttl.empty())
|
||||
write_infos(rows_where_ttl, "rows_where", is_first);
|
||||
|
||||
writeString("}", out);
|
||||
}
|
||||
|
||||
|
@ -45,14 +45,17 @@ struct MergeTreeDataPartTTLInfos
|
||||
time_t part_min_ttl = 0;
|
||||
time_t part_max_ttl = 0;
|
||||
|
||||
TTLInfoMap rows_where_ttl;
|
||||
|
||||
TTLInfoMap moves_ttl;
|
||||
|
||||
TTLInfoMap recompression_ttl;
|
||||
|
||||
TTLInfoMap group_by_ttl;
|
||||
|
||||
/// Return the smallest max recompression TTL value
|
||||
time_t getMinimalMaxRecompressionTTL() const;
|
||||
|
||||
|
||||
void read(ReadBuffer & in);
|
||||
void write(WriteBuffer & out) const;
|
||||
void update(const MergeTreeDataPartTTLInfos & other_infos);
|
||||
@ -68,6 +71,7 @@ struct MergeTreeDataPartTTLInfos
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
/// part_min_ttl in minimum of rows, rows_where and group_by TTLs
|
||||
return !part_min_ttl && moves_ttl.empty() && recompression_ttl.empty();
|
||||
}
|
||||
};
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include <Processors/QueryPlan/MergingSortedStep.h>
|
||||
#include <Processors/QueryPlan/UnionStep.h>
|
||||
#include <Processors/QueryPlan/MergingFinal.h>
|
||||
#include <Processors/QueryPlan/ReadNothingStep.h>
|
||||
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeEnum.h>
|
||||
@ -708,8 +709,9 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
||||
if (parts_with_ranges.empty())
|
||||
return std::make_unique<QueryPlan>();
|
||||
|
||||
const auto data_settings = data.getSettings();
|
||||
auto max_partitions_to_read
|
||||
= settings.max_partitions_to_read.changed ? settings.max_partitions_to_read : data.getSettings()->max_partitions_to_read;
|
||||
= settings.max_partitions_to_read.changed ? settings.max_partitions_to_read : data_settings->max_partitions_to_read;
|
||||
if (max_partitions_to_read > 0)
|
||||
{
|
||||
std::set<String> partitions;
|
||||
@ -723,6 +725,18 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
||||
max_partitions_to_read);
|
||||
}
|
||||
|
||||
String query_id;
|
||||
if (data_settings->max_concurrent_queries > 0)
|
||||
{
|
||||
if (data_settings->min_marks_to_honor_max_concurrent_queries > 0
|
||||
&& sum_marks >= data_settings->min_marks_to_honor_max_concurrent_queries)
|
||||
{
|
||||
query_id = context.getCurrentQueryId();
|
||||
if (!query_id.empty())
|
||||
data.insertQueryIdOrThrow(query_id, data_settings->max_concurrent_queries);
|
||||
}
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::SelectedParts, parts_with_ranges.size());
|
||||
ProfileEvents::increment(ProfileEvents::SelectedRanges, sum_ranges);
|
||||
ProfileEvents::increment(ProfileEvents::SelectedMarks, sum_marks);
|
||||
@ -759,7 +773,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
||||
virt_column_names,
|
||||
settings,
|
||||
reader_settings,
|
||||
result_projection);
|
||||
result_projection,
|
||||
query_id);
|
||||
}
|
||||
else if ((settings.optimize_read_in_order || settings.optimize_aggregation_in_order) && query_info.input_order_info)
|
||||
{
|
||||
@ -782,7 +797,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
||||
virt_column_names,
|
||||
settings,
|
||||
reader_settings,
|
||||
result_projection);
|
||||
result_projection,
|
||||
query_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -796,7 +812,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
||||
query_info,
|
||||
virt_column_names,
|
||||
settings,
|
||||
reader_settings);
|
||||
reader_settings,
|
||||
query_id);
|
||||
}
|
||||
|
||||
if (!plan)
|
||||
@ -896,7 +913,7 @@ size_t minMarksForConcurrentRead(
|
||||
|
||||
}
|
||||
|
||||
static QueryPlanPtr createPlanFromPipe(Pipe pipe, const std::string & description = "")
|
||||
static QueryPlanPtr createPlanFromPipe(Pipe pipe, const String & query_id, const MergeTreeData & data, const std::string & description = "")
|
||||
{
|
||||
auto plan = std::make_unique<QueryPlan>();
|
||||
|
||||
@ -904,6 +921,10 @@ static QueryPlanPtr createPlanFromPipe(Pipe pipe, const std::string & descriptio
|
||||
if (!description.empty())
|
||||
storage_name += ' ' + description;
|
||||
|
||||
// Attach QueryIdHolder if needed
|
||||
if (!query_id.empty())
|
||||
pipe.addQueryIdHolder(std::make_shared<QueryIdHolder>(query_id, data));
|
||||
|
||||
auto step = std::make_unique<ReadFromStorageStep>(std::move(pipe), storage_name);
|
||||
plan->addStep(std::move(step));
|
||||
return plan;
|
||||
@ -919,7 +940,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
|
||||
const SelectQueryInfo & query_info,
|
||||
const Names & virt_columns,
|
||||
const Settings & settings,
|
||||
const MergeTreeReaderSettings & reader_settings) const
|
||||
const MergeTreeReaderSettings & reader_settings,
|
||||
const String & query_id) const
|
||||
{
|
||||
/// Count marks for each part.
|
||||
std::vector<size_t> sum_marks_in_parts(parts.size());
|
||||
@ -1004,7 +1026,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
|
||||
res.emplace_back(std::move(source));
|
||||
}
|
||||
|
||||
return createPlanFromPipe(Pipe::unitePipes(std::move(res)));
|
||||
return createPlanFromPipe(Pipe::unitePipes(std::move(res)), query_id, data);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1028,7 +1050,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
|
||||
if (pipe.numOutputPorts() > 1)
|
||||
pipe.addTransform(std::make_shared<ConcatProcessor>(pipe.getHeader(), pipe.numOutputPorts()));
|
||||
|
||||
return createPlanFromPipe(std::move(pipe));
|
||||
return createPlanFromPipe(std::move(pipe), query_id, data);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1052,7 +1074,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
|
||||
const Names & virt_columns,
|
||||
const Settings & settings,
|
||||
const MergeTreeReaderSettings & reader_settings,
|
||||
ActionsDAGPtr & out_projection) const
|
||||
ActionsDAGPtr & out_projection,
|
||||
const String & query_id) const
|
||||
{
|
||||
size_t sum_marks = 0;
|
||||
const InputOrderInfoPtr & input_order_info = query_info.input_order_info;
|
||||
@ -1243,7 +1266,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
|
||||
}
|
||||
}
|
||||
|
||||
auto plan = createPlanFromPipe(Pipe::unitePipes(std::move(pipes)), " with order");
|
||||
auto plan = createPlanFromPipe(Pipe::unitePipes(std::move(pipes)), query_id, data, " with order");
|
||||
|
||||
if (input_order_info->direction != 1)
|
||||
{
|
||||
@ -1311,7 +1334,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
|
||||
const Names & virt_columns,
|
||||
const Settings & settings,
|
||||
const MergeTreeReaderSettings & reader_settings,
|
||||
ActionsDAGPtr & out_projection) const
|
||||
ActionsDAGPtr & out_projection,
|
||||
const String & query_id) const
|
||||
{
|
||||
const auto data_settings = data.getSettings();
|
||||
size_t sum_marks = 0;
|
||||
@ -1370,6 +1394,12 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
|
||||
|
||||
std::vector<QueryPlanPtr> partition_plans;
|
||||
|
||||
/// If do_not_merge_across_partitions_select_final is true and num_streams > 1
|
||||
/// we will store lonely parts with level > 0 to use parallel select on them.
|
||||
std::vector<RangesInDataPart> lonely_parts;
|
||||
size_t total_rows_in_lonely_parts = 0;
|
||||
size_t sum_marks_in_lonely_parts = 0;
|
||||
|
||||
for (size_t range_index = 0; range_index < parts_to_merge_ranges.size() - 1; ++range_index)
|
||||
{
|
||||
QueryPlanPtr plan;
|
||||
@ -1377,25 +1407,41 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
|
||||
{
|
||||
Pipes pipes;
|
||||
|
||||
for (auto part_it = parts_to_merge_ranges[range_index]; part_it != parts_to_merge_ranges[range_index + 1]; ++part_it)
|
||||
/// If do_not_merge_across_partitions_select_final is true and there is only one part in partition
|
||||
/// with level > 0 then we won't postprocess this part and if num_streams > 1 we
|
||||
/// can use parallel select on such parts. We save such parts in one vector and then use
|
||||
/// MergeTreeReadPool and MergeTreeThreadSelectBlockInputProcessor for parallel select.
|
||||
if (num_streams > 1 && settings.do_not_merge_across_partitions_select_final &&
|
||||
std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 &&
|
||||
parts_to_merge_ranges[range_index]->data_part->info.level > 0)
|
||||
{
|
||||
auto source_processor = std::make_shared<MergeTreeSelectProcessor>(
|
||||
data,
|
||||
metadata_snapshot,
|
||||
part_it->data_part,
|
||||
max_block_size,
|
||||
settings.preferred_block_size_bytes,
|
||||
settings.preferred_max_column_in_block_size_bytes,
|
||||
column_names,
|
||||
part_it->ranges,
|
||||
use_uncompressed_cache,
|
||||
query_info.prewhere_info,
|
||||
true,
|
||||
reader_settings,
|
||||
virt_columns,
|
||||
part_it->part_index_in_query);
|
||||
total_rows_in_lonely_parts += parts_to_merge_ranges[range_index]->getRowsCount();
|
||||
sum_marks_in_lonely_parts += parts_to_merge_ranges[range_index]->getMarksCount();
|
||||
lonely_parts.push_back(std::move(*parts_to_merge_ranges[range_index]));
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto part_it = parts_to_merge_ranges[range_index]; part_it != parts_to_merge_ranges[range_index + 1]; ++part_it)
|
||||
{
|
||||
auto source_processor = std::make_shared<MergeTreeSelectProcessor>(
|
||||
data,
|
||||
metadata_snapshot,
|
||||
part_it->data_part,
|
||||
max_block_size,
|
||||
settings.preferred_block_size_bytes,
|
||||
settings.preferred_max_column_in_block_size_bytes,
|
||||
column_names,
|
||||
part_it->ranges,
|
||||
use_uncompressed_cache,
|
||||
query_info.prewhere_info,
|
||||
true,
|
||||
reader_settings,
|
||||
virt_columns,
|
||||
part_it->part_index_in_query);
|
||||
|
||||
pipes.emplace_back(std::move(source_processor));
|
||||
pipes.emplace_back(std::move(source_processor));
|
||||
}
|
||||
}
|
||||
|
||||
if (pipes.empty())
|
||||
@ -1407,9 +1453,16 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
|
||||
if (!out_projection)
|
||||
out_projection = createProjection(pipe.getHeader());
|
||||
|
||||
plan = createPlanFromPipe(std::move(pipe), "with final");
|
||||
plan = createPlanFromPipe(std::move(pipe), query_id, data, "with final");
|
||||
}
|
||||
|
||||
auto expression_step = std::make_unique<ExpressionStep>(
|
||||
plan->getCurrentDataStream(),
|
||||
metadata_snapshot->getSortingKey().expression->getActionsDAG().clone());
|
||||
|
||||
expression_step->setStepDescription("Calculate sorting key expression");
|
||||
plan->addStep(std::move(expression_step));
|
||||
|
||||
/// If do_not_merge_across_partitions_select_final is true and there is only one part in partition
|
||||
/// with level > 0 then we won't postprocess this part
|
||||
if (settings.do_not_merge_across_partitions_select_final &&
|
||||
@ -1420,13 +1473,6 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
|
||||
continue;
|
||||
}
|
||||
|
||||
auto expression_step = std::make_unique<ExpressionStep>(
|
||||
plan->getCurrentDataStream(),
|
||||
metadata_snapshot->getSortingKey().expression->getActionsDAG().clone());
|
||||
|
||||
expression_step->setStepDescription("Calculate sorting key expression");
|
||||
plan->addStep(std::move(expression_step));
|
||||
|
||||
Names sort_columns = metadata_snapshot->getSortingKeyColumns();
|
||||
SortDescription sort_description;
|
||||
size_t sort_columns_size = sort_columns.size();
|
||||
@ -1452,6 +1498,69 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
|
||||
partition_plans.emplace_back(std::move(plan));
|
||||
}
|
||||
|
||||
if (!lonely_parts.empty())
|
||||
{
|
||||
Pipes pipes;
|
||||
|
||||
size_t num_streams_for_lonely_parts = num_streams * lonely_parts.size();
|
||||
|
||||
const size_t min_marks_for_concurrent_read = minMarksForConcurrentRead(
|
||||
settings.merge_tree_min_rows_for_concurrent_read,
|
||||
settings.merge_tree_min_bytes_for_concurrent_read,
|
||||
data_settings->index_granularity,
|
||||
index_granularity_bytes,
|
||||
sum_marks_in_lonely_parts);
|
||||
|
||||
/// Reduce the number of num_streams_for_lonely_parts if the data is small.
|
||||
if (sum_marks_in_lonely_parts < num_streams_for_lonely_parts * min_marks_for_concurrent_read && lonely_parts.size() < num_streams_for_lonely_parts)
|
||||
num_streams_for_lonely_parts = std::max((sum_marks_in_lonely_parts + min_marks_for_concurrent_read - 1) / min_marks_for_concurrent_read, lonely_parts.size());
|
||||
|
||||
|
||||
MergeTreeReadPoolPtr pool = std::make_shared<MergeTreeReadPool>(
|
||||
num_streams_for_lonely_parts,
|
||||
sum_marks_in_lonely_parts,
|
||||
min_marks_for_concurrent_read,
|
||||
std::move(lonely_parts),
|
||||
data,
|
||||
metadata_snapshot,
|
||||
query_info.prewhere_info,
|
||||
true,
|
||||
column_names,
|
||||
MergeTreeReadPool::BackoffSettings(settings),
|
||||
settings.preferred_block_size_bytes,
|
||||
false);
|
||||
|
||||
LOG_TRACE(log, "Reading approx. {} rows with {} streams", total_rows_in_lonely_parts, num_streams_for_lonely_parts);
|
||||
|
||||
for (size_t i = 0; i < num_streams_for_lonely_parts; ++i)
|
||||
{
|
||||
auto source = std::make_shared<MergeTreeThreadSelectBlockInputProcessor>(
|
||||
i, pool, min_marks_for_concurrent_read, max_block_size,
|
||||
settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes,
|
||||
data, metadata_snapshot, use_uncompressed_cache,
|
||||
query_info.prewhere_info, reader_settings, virt_columns);
|
||||
|
||||
pipes.emplace_back(std::move(source));
|
||||
}
|
||||
|
||||
auto pipe = Pipe::unitePipes(std::move(pipes));
|
||||
|
||||
/// Drop temporary columns, added by 'sorting_key_expr'
|
||||
if (!out_projection)
|
||||
out_projection = createProjection(pipe.getHeader());
|
||||
|
||||
QueryPlanPtr plan = createPlanFromPipe(std::move(pipe), query_id, data, "with final");
|
||||
|
||||
auto expression_step = std::make_unique<ExpressionStep>(
|
||||
plan->getCurrentDataStream(),
|
||||
metadata_snapshot->getSortingKey().expression->getActionsDAG().clone());
|
||||
|
||||
expression_step->setStepDescription("Calculate sorting key expression");
|
||||
plan->addStep(std::move(expression_step));
|
||||
|
||||
partition_plans.emplace_back(std::move(plan));
|
||||
}
|
||||
|
||||
if (partition_plans.empty())
|
||||
return {};
|
||||
|
||||
|
@ -58,7 +58,8 @@ private:
|
||||
const SelectQueryInfo & query_info,
|
||||
const Names & virt_columns,
|
||||
const Settings & settings,
|
||||
const MergeTreeReaderSettings & reader_settings) const;
|
||||
const MergeTreeReaderSettings & reader_settings,
|
||||
const String & query_id) const;
|
||||
|
||||
/// out_projection - save projection only with columns, requested to read
|
||||
QueryPlanPtr spreadMarkRangesAmongStreamsWithOrder(
|
||||
@ -73,7 +74,8 @@ private:
|
||||
const Names & virt_columns,
|
||||
const Settings & settings,
|
||||
const MergeTreeReaderSettings & reader_settings,
|
||||
ActionsDAGPtr & out_projection) const;
|
||||
ActionsDAGPtr & out_projection,
|
||||
const String & query_id) const;
|
||||
|
||||
QueryPlanPtr spreadMarkRangesAmongStreamsFinal(
|
||||
RangesInDataParts && parts,
|
||||
@ -86,7 +88,8 @@ private:
|
||||
const Names & virt_columns,
|
||||
const Settings & settings,
|
||||
const MergeTreeReaderSettings & reader_settings,
|
||||
ActionsDAGPtr & out_projection) const;
|
||||
ActionsDAGPtr & out_projection,
|
||||
const String & query_id) const;
|
||||
|
||||
/// Get the approximate value (bottom estimate - only by full marks) of the number of rows falling under the index.
|
||||
size_t getApproximateTotalRowsToRead(
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Poco/File.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <DataStreams/ITTLAlgorithm.h>
|
||||
|
||||
#include <Parsers/queryToString.h>
|
||||
|
||||
@ -91,31 +92,23 @@ void updateTTL(
|
||||
const TTLDescription & ttl_entry,
|
||||
IMergeTreeDataPart::TTLInfos & ttl_infos,
|
||||
DB::MergeTreeDataPartTTLInfo & ttl_info,
|
||||
Block & block,
|
||||
const Block & block,
|
||||
bool update_part_min_max_ttls)
|
||||
{
|
||||
bool remove_column = false;
|
||||
if (!block.has(ttl_entry.result_column))
|
||||
{
|
||||
ttl_entry.expression->execute(block);
|
||||
remove_column = true;
|
||||
}
|
||||
auto ttl_column = ITTLAlgorithm::executeExpressionAndGetColumn(ttl_entry.expression, block, ttl_entry.result_column);
|
||||
|
||||
const auto & current = block.getByName(ttl_entry.result_column);
|
||||
|
||||
const IColumn * column = current.column.get();
|
||||
if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(column))
|
||||
if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(ttl_column.get()))
|
||||
{
|
||||
const auto & date_lut = DateLUT::instance();
|
||||
for (const auto & val : column_date->getData())
|
||||
ttl_info.update(date_lut.fromDayNum(DayNum(val)));
|
||||
}
|
||||
else if (const ColumnUInt32 * column_date_time = typeid_cast<const ColumnUInt32 *>(column))
|
||||
else if (const ColumnUInt32 * column_date_time = typeid_cast<const ColumnUInt32 *>(ttl_column.get()))
|
||||
{
|
||||
for (const auto & val : column_date_time->getData())
|
||||
ttl_info.update(val);
|
||||
}
|
||||
else if (const ColumnConst * column_const = typeid_cast<const ColumnConst *>(column))
|
||||
else if (const ColumnConst * column_const = typeid_cast<const ColumnConst *>(ttl_column.get()))
|
||||
{
|
||||
if (typeid_cast<const ColumnUInt16 *>(&column_const->getDataColumn()))
|
||||
{
|
||||
@ -134,9 +127,6 @@ void updateTTL(
|
||||
|
||||
if (update_part_min_max_ttls)
|
||||
ttl_infos.updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
|
||||
|
||||
if (remove_column)
|
||||
block.erase(ttl_entry.result_column);
|
||||
}
|
||||
|
||||
}
|
||||
@ -383,6 +373,12 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
|
||||
if (metadata_snapshot->hasRowsTTL())
|
||||
updateTTL(metadata_snapshot->getRowsTTL(), new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true);
|
||||
|
||||
for (const auto & ttl_entry : metadata_snapshot->getGroupByTTLs())
|
||||
updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.group_by_ttl[ttl_entry.result_column], block, true);
|
||||
|
||||
for (const auto & ttl_entry : metadata_snapshot->getRowsWhereTTLs())
|
||||
updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.rows_where_ttl[ttl_entry.result_column], block, true);
|
||||
|
||||
for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs())
|
||||
updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true);
|
||||
|
||||
|
@ -111,6 +111,8 @@ struct Settings;
|
||||
M(Bool, remove_empty_parts, true, "Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm", 0) \
|
||||
M(Bool, assign_part_uuids, false, "Generate UUIDs for parts. Before enabling check that all replicas support new format.", 0) \
|
||||
M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited. This setting is the default that can be overridden by the query-level setting with the same name.", 0) \
|
||||
M(UInt64, max_concurrent_queries, 0, "Max number of concurrently executed queries related to the MergeTree table (0 - disabled). Queries will still be limited by other max_concurrent_queries settings.", 0) \
|
||||
M(UInt64, min_marks_to_honor_max_concurrent_queries, 0, "Minimal number of marks to honor the MergeTree-level's max_concurrent_queries (0 - disabled). Queries will still be limited by other max_concurrent_queries settings.", 0) \
|
||||
\
|
||||
/** Obsolete settings. Kept for backward compatibility only. */ \
|
||||
M(UInt64, min_relative_delay_to_yield_leadership, 120, "Obsolete setting, does nothing.", 0) \
|
||||
|
@ -1,7 +1,6 @@
|
||||
#include <Storages/StorageDistributed.h>
|
||||
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <Disks/StoragePolicy.h>
|
||||
#include <Disks/IDisk.h>
|
||||
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
|
@ -128,7 +128,7 @@ TTLTableDescription StorageInMemoryMetadata::getTableTTLs() const
|
||||
|
||||
bool StorageInMemoryMetadata::hasAnyTableTTL() const
|
||||
{
|
||||
return hasAnyMoveTTL() || hasRowsTTL() || hasAnyRecompressionTTL();
|
||||
return hasAnyMoveTTL() || hasRowsTTL() || hasAnyRecompressionTTL() || hasAnyGroupByTTL() || hasAnyRowsWhereTTL();
|
||||
}
|
||||
|
||||
TTLColumnsDescription StorageInMemoryMetadata::getColumnTTLs() const
|
||||
@ -151,6 +151,16 @@ bool StorageInMemoryMetadata::hasRowsTTL() const
|
||||
return table_ttl.rows_ttl.expression != nullptr;
|
||||
}
|
||||
|
||||
TTLDescriptions StorageInMemoryMetadata::getRowsWhereTTLs() const
|
||||
{
|
||||
return table_ttl.rows_where_ttl;
|
||||
}
|
||||
|
||||
bool StorageInMemoryMetadata::hasAnyRowsWhereTTL() const
|
||||
{
|
||||
return !table_ttl.rows_where_ttl.empty();
|
||||
}
|
||||
|
||||
TTLDescriptions StorageInMemoryMetadata::getMoveTTLs() const
|
||||
{
|
||||
return table_ttl.move_ttl;
|
||||
@ -171,6 +181,16 @@ bool StorageInMemoryMetadata::hasAnyRecompressionTTL() const
|
||||
return !table_ttl.recompression_ttl.empty();
|
||||
}
|
||||
|
||||
TTLDescriptions StorageInMemoryMetadata::getGroupByTTLs() const
|
||||
{
|
||||
return table_ttl.group_by_ttl;
|
||||
}
|
||||
|
||||
bool StorageInMemoryMetadata::hasAnyGroupByTTL() const
|
||||
{
|
||||
return !table_ttl.group_by_ttl.empty();
|
||||
}
|
||||
|
||||
ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(const NameSet & updated_columns) const
|
||||
{
|
||||
if (updated_columns.empty())
|
||||
|
@ -109,6 +109,9 @@ struct StorageInMemoryMetadata
|
||||
TTLDescription getRowsTTL() const;
|
||||
bool hasRowsTTL() const;
|
||||
|
||||
TTLDescriptions getRowsWhereTTLs() const;
|
||||
bool hasAnyRowsWhereTTL() const;
|
||||
|
||||
/// Just wrapper for table TTLs, return moves (to disks or volumes) parts of
|
||||
/// table TTL.
|
||||
TTLDescriptions getMoveTTLs() const;
|
||||
@ -118,6 +121,10 @@ struct StorageInMemoryMetadata
|
||||
TTLDescriptions getRecompressionTTLs() const;
|
||||
bool hasAnyRecompressionTTL() const;
|
||||
|
||||
// Just wrapper for table TTLs, return info about recompression ttl
|
||||
TTLDescriptions getGroupByTTLs() const;
|
||||
bool hasAnyGroupByTTL() const;
|
||||
|
||||
/// Returns columns, which will be needed to calculate dependencies (skip
|
||||
/// indices, TTL expressions) if we update @updated_columns set of columns.
|
||||
ColumnDependencies getColumnDependencies(const NameSet & updated_columns) const;
|
||||
|
@ -1,6 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
#include "config_core.h"
|
||||
#endif
|
||||
|
||||
#if USE_MYSQL
|
||||
|
||||
|
@ -22,7 +22,6 @@
|
||||
#include <Storages/MergeTree/MergeTreeBlockOutputStream.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
|
||||
#include <Storages/MergeTree/PartitionPruner.h>
|
||||
#include <Disks/StoragePolicy.h>
|
||||
#include <Storages/MergeTree/MergeList.h>
|
||||
#include <Storages/MergeTree/checkDataPart.h>
|
||||
#include <Processors/Pipe.h>
|
||||
|
@ -27,7 +27,6 @@
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreePartHeader.h>
|
||||
#include <Storages/VirtualColumnUtils.h>
|
||||
|
||||
#include <Disks/StoragePolicy.h>
|
||||
|
||||
#include <Databases/IDatabase.h>
|
||||
|
||||
|
@ -23,6 +23,7 @@ const char * auto_contributors[] {
|
||||
"Alexander Burmak",
|
||||
"Alexander Ermolaev",
|
||||
"Alexander GQ Gerasiov",
|
||||
"Alexander Gololobov",
|
||||
"Alexander Kazakov",
|
||||
"Alexander Kozhikhov",
|
||||
"Alexander Krasheninnikov",
|
||||
@ -43,6 +44,7 @@ const char * auto_contributors[] {
|
||||
"Alexandr Krasheninnikov",
|
||||
"Alexandr Orlov",
|
||||
"Alexandra Latysheva",
|
||||
"Alexandre Snarskii",
|
||||
"Alexei Averchenko",
|
||||
"Alexey",
|
||||
"Alexey Arno",
|
||||
@ -143,6 +145,7 @@ const char * auto_contributors[] {
|
||||
"CurtizJ",
|
||||
"Daniel Bershatsky",
|
||||
"Daniel Dao",
|
||||
"Daniel Qin",
|
||||
"Danila Kutenin",
|
||||
"Dao Minh Thuc",
|
||||
"Daria Mozhaeva",
|
||||
@ -309,7 +312,9 @@ const char * auto_contributors[] {
|
||||
"Marek Vavrusa",
|
||||
"Marek Vavruša",
|
||||
"Marek Vavruša",
|
||||
"Mariano Benítez Mulet",
|
||||
"Mark Andreev",
|
||||
"Mark Frost",
|
||||
"Mark Papadakis",
|
||||
"Maroun Maroun",
|
||||
"Marsel Arduanov",
|
||||
@ -422,6 +427,7 @@ const char * auto_contributors[] {
|
||||
"Rafael David Tinoco",
|
||||
"Ramazan Polat",
|
||||
"Ravengg",
|
||||
"RegulusZ",
|
||||
"Reilee",
|
||||
"Reto Kromer",
|
||||
"Ri",
|
||||
@ -482,9 +488,11 @@ const char * auto_contributors[] {
|
||||
"Tangaev",
|
||||
"Tema Novikov",
|
||||
"The-Alchemist",
|
||||
"TiunovNN",
|
||||
"Tobias Adamson",
|
||||
"Tom Bombadil",
|
||||
"Tsarkova Anastasia",
|
||||
"TszkitLo40",
|
||||
"Ubuntu",
|
||||
"Ubus",
|
||||
"UnamedRus",
|
||||
@ -556,6 +564,7 @@ const char * auto_contributors[] {
|
||||
"Yury Stankevich",
|
||||
"Zhichang Yu",
|
||||
"Zhipeng",
|
||||
"Zoran Pandovski",
|
||||
"a.palagashvili",
|
||||
"abdrakhmanov",
|
||||
"abyss7",
|
||||
@ -571,6 +580,7 @@ const char * auto_contributors[] {
|
||||
"alex.lvxin",
|
||||
"alexander kozhikhov",
|
||||
"alexey-milovidov",
|
||||
"alfredlu",
|
||||
"amoschen",
|
||||
"amudong",
|
||||
"ana-uvarova",
|
||||
@ -588,14 +598,17 @@ const char * auto_contributors[] {
|
||||
"avsharapov",
|
||||
"awesomeleo",
|
||||
"benamazing",
|
||||
"benbiti",
|
||||
"bgranvea",
|
||||
"bharatnc",
|
||||
"blazerer",
|
||||
"bluebirddm",
|
||||
"bo zeng",
|
||||
"bobrovskij artemij",
|
||||
"booknouse",
|
||||
"bseng",
|
||||
"cekc",
|
||||
"centos7",
|
||||
"champtar",
|
||||
"chang.chen",
|
||||
"chengy8934",
|
||||
@ -606,6 +619,7 @@ const char * auto_contributors[] {
|
||||
"comunodi",
|
||||
"coraxster",
|
||||
"damozhaeva",
|
||||
"dankondr",
|
||||
"daoready",
|
||||
"dasmfm",
|
||||
"davydovska",
|
||||
@ -627,6 +641,7 @@ const char * auto_contributors[] {
|
||||
"elBroom",
|
||||
"elenaspb2019",
|
||||
"emakarov",
|
||||
"emhlbmc",
|
||||
"emironyuk",
|
||||
"evtan",
|
||||
"exprmntr",
|
||||
@ -673,6 +688,7 @@ const char * auto_contributors[] {
|
||||
"javi santana",
|
||||
"jetgm",
|
||||
"jianmei zhang",
|
||||
"jyz0309",
|
||||
"kmeaw",
|
||||
"koshachy",
|
||||
"kreuzerkrieg",
|
||||
@ -779,7 +795,9 @@ const char * auto_contributors[] {
|
||||
"taiyang-li",
|
||||
"tao jiang",
|
||||
"tavplubix",
|
||||
"templarzq",
|
||||
"tiger.yan",
|
||||
"tison",
|
||||
"topvisor",
|
||||
"tyrionhuang",
|
||||
"ubuntu",
|
||||
@ -800,6 +818,7 @@ const char * auto_contributors[] {
|
||||
"weeds085490",
|
||||
"xPoSx",
|
||||
"yangshuai",
|
||||
"ygrek",
|
||||
"yhgcn",
|
||||
"ylchou",
|
||||
"yonesko",
|
||||
|
@ -68,6 +68,14 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_)
|
||||
{"recompression_ttl_info.expression", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"recompression_ttl_info.min", std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
|
||||
{"recompression_ttl_info.max", std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
|
||||
|
||||
{"group_by_ttl_info.expression", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"group_by_ttl_info.min", std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
|
||||
{"group_by_ttl_info.max", std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
|
||||
|
||||
{"rows_where_ttl_info.expression", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"rows_where_ttl_info.min", std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
|
||||
{"rows_where_ttl_info.max", std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())}
|
||||
}
|
||||
)
|
||||
{
|
||||
@ -181,6 +189,8 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns_, const Sto
|
||||
columns_[i++]->insert(queryToString(part->default_codec->getCodecDesc()));
|
||||
|
||||
add_ttl_info_map(part->ttl_infos.recompression_ttl);
|
||||
add_ttl_info_map(part->ttl_infos.group_by_ttl);
|
||||
add_ttl_info_map(part->ttl_infos.rows_where_ttl);
|
||||
|
||||
/// _state column should be the latest.
|
||||
if (has_state_column)
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Storages/StorageMaterializeMySQL.h>
|
||||
#include <Storages/VirtualColumnUtils.h>
|
||||
#include <Access/ContextAccess.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
@ -119,6 +120,13 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, const
|
||||
|
||||
String engine_name = storage->getName();
|
||||
|
||||
#if USE_MYSQL
|
||||
if (auto * proxy = dynamic_cast<StorageMaterializeMySQL *>(storage.get()))
|
||||
{
|
||||
auto nested = proxy->getNested();
|
||||
storage.swap(nested);
|
||||
}
|
||||
#endif
|
||||
if (!dynamic_cast<MergeTreeData *>(storage.get()))
|
||||
continue;
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <Disks/StoragePolicy.h>
|
||||
#include <Disks/IStoragePolicy.h>
|
||||
#include <Processors/Sources/SourceWithProgress.h>
|
||||
#include <Processors/Pipe.h>
|
||||
#include <DataTypes/DataTypeUUID.h>
|
||||
|
@ -1,15 +1,21 @@
|
||||
#include <Storages/TTLDescription.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
#include <Interpreters/TreeRewriter.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
#include <Interpreters/addTypeConversionToAST.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTTTLElement.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTAssignment.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <Parsers/queryToString.h>
|
||||
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
@ -77,6 +83,24 @@ void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const Strin
|
||||
}
|
||||
}
|
||||
|
||||
class FindAggregateFunctionData
|
||||
{
|
||||
public:
|
||||
using TypeToVisit = ASTFunction;
|
||||
bool has_aggregate_function = false;
|
||||
|
||||
void visit(const ASTFunction & func, ASTPtr &)
|
||||
{
|
||||
/// Do not throw if found aggregate function inside another aggregate function,
|
||||
/// because it will be checked, while creating expressions.
|
||||
if (AggregateFunctionFactory::instance().isAggregateFunctionName(func.name))
|
||||
has_aggregate_function = true;
|
||||
}
|
||||
};
|
||||
|
||||
using FindAggregateFunctionFinderMatcher = OneTypeMatcher<FindAggregateFunctionData>;
|
||||
using FindAggregateFunctionVisitor = InDepthNodeVisitor<FindAggregateFunctionFinderMatcher, true>;
|
||||
|
||||
}
|
||||
|
||||
TTLDescription::TTLDescription(const TTLDescription & other)
|
||||
@ -182,11 +206,8 @@ TTLDescription TTLDescription::getTTLFromAST(
|
||||
if (ttl_element->group_by_key.size() > pk_columns.size())
|
||||
throw Exception("TTL Expression GROUP BY key should be a prefix of primary key", ErrorCodes::BAD_TTL_EXPRESSION);
|
||||
|
||||
NameSet primary_key_columns_set(pk_columns.begin(), pk_columns.end());
|
||||
NameSet aggregation_columns_set;
|
||||
|
||||
for (const auto & column : primary_key.expression->getRequiredColumns())
|
||||
primary_key_columns_set.insert(column);
|
||||
NameSet used_primary_key_columns_set;
|
||||
|
||||
for (size_t i = 0; i < ttl_element->group_by_key.size(); ++i)
|
||||
{
|
||||
@ -194,61 +215,54 @@ TTLDescription TTLDescription::getTTLFromAST(
|
||||
throw Exception(
|
||||
"TTL Expression GROUP BY key should be a prefix of primary key",
|
||||
ErrorCodes::BAD_TTL_EXPRESSION);
|
||||
|
||||
used_primary_key_columns_set.insert(pk_columns[i]);
|
||||
}
|
||||
|
||||
for (const auto & [name, value] : ttl_element->group_by_aggregations)
|
||||
std::vector<std::pair<String, ASTPtr>> aggregations;
|
||||
for (const auto & ast : ttl_element->group_by_assignments)
|
||||
{
|
||||
if (primary_key_columns_set.count(name))
|
||||
throw Exception(
|
||||
"Can not set custom aggregation for column in primary key in TTL Expression",
|
||||
ErrorCodes::BAD_TTL_EXPRESSION);
|
||||
const auto assignment = ast->as<const ASTAssignment &>();
|
||||
auto expression = assignment.expression();
|
||||
|
||||
aggregation_columns_set.insert(name);
|
||||
FindAggregateFunctionVisitor::Data data{false};
|
||||
FindAggregateFunctionVisitor(data).visit(expression);
|
||||
|
||||
if (!data.has_aggregate_function)
|
||||
throw Exception(ErrorCodes::BAD_TTL_EXPRESSION,
|
||||
"Invalid expression for assignment of column {}. Should contain an aggregate function", assignment.column_name);
|
||||
|
||||
expression = addTypeConversionToAST(std::move(expression), columns.getPhysical(assignment.column_name).type->getName());
|
||||
aggregations.emplace_back(assignment.column_name, std::move(expression));
|
||||
aggregation_columns_set.insert(assignment.column_name);
|
||||
}
|
||||
|
||||
if (aggregation_columns_set.size() != ttl_element->group_by_aggregations.size())
|
||||
if (aggregation_columns_set.size() != ttl_element->group_by_assignments.size())
|
||||
throw Exception(
|
||||
"Multiple aggregations set for one column in TTL Expression",
|
||||
ErrorCodes::BAD_TTL_EXPRESSION);
|
||||
|
||||
|
||||
result.group_by_keys = Names(pk_columns.begin(), pk_columns.begin() + ttl_element->group_by_key.size());
|
||||
|
||||
auto aggregations = ttl_element->group_by_aggregations;
|
||||
const auto & primary_key_expressions = primary_key.expression_list_ast->children;
|
||||
|
||||
for (size_t i = 0; i < pk_columns.size(); ++i)
|
||||
/// Wrap with 'any' aggregate function primary key columns,
|
||||
/// which are not in 'GROUP BY' key and was not set explicitly.
|
||||
/// The separate step, because not all primary key columns are ordinary columns.
|
||||
for (size_t i = ttl_element->group_by_key.size(); i < primary_key_expressions.size(); ++i)
|
||||
{
|
||||
ASTPtr value = primary_key.expression_list_ast->children[i]->clone();
|
||||
|
||||
if (i >= ttl_element->group_by_key.size())
|
||||
if (!aggregation_columns_set.count(pk_columns[i]))
|
||||
{
|
||||
ASTPtr value_max = makeASTFunction("max", value->clone());
|
||||
aggregations.emplace_back(value->getColumnName(), std::move(value_max));
|
||||
}
|
||||
|
||||
if (value->as<ASTFunction>())
|
||||
{
|
||||
auto syntax_result = TreeRewriter(context).analyze(value, columns.getAllPhysical(), {}, {}, true);
|
||||
auto expr_actions = ExpressionAnalyzer(value, syntax_result, context).getActions(false);
|
||||
for (const auto & column : expr_actions->getRequiredColumns())
|
||||
{
|
||||
if (i < ttl_element->group_by_key.size())
|
||||
{
|
||||
ASTPtr expr = makeASTFunction("any", std::make_shared<ASTIdentifier>(column));
|
||||
aggregations.emplace_back(column, std::move(expr));
|
||||
}
|
||||
else
|
||||
{
|
||||
ASTPtr expr = makeASTFunction("argMax", std::make_shared<ASTIdentifier>(column), value->clone());
|
||||
aggregations.emplace_back(column, std::move(expr));
|
||||
}
|
||||
}
|
||||
ASTPtr expr = makeASTFunction("any", primary_key_expressions[i]->clone());
|
||||
aggregations.emplace_back(pk_columns[i], std::move(expr));
|
||||
aggregation_columns_set.insert(pk_columns[i]);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto & column : columns.getAllPhysical())
|
||||
/// Wrap with 'any' aggregate function other columns, which was not set explicitly.
|
||||
for (const auto & column : columns.getOrdinary())
|
||||
{
|
||||
if (!primary_key_columns_set.count(column.name) && !aggregation_columns_set.count(column.name))
|
||||
if (!aggregation_columns_set.count(column.name) && !used_primary_key_columns_set.count(column.name))
|
||||
{
|
||||
ASTPtr expr = makeASTFunction("any", std::make_shared<ASTIdentifier>(column.name));
|
||||
aggregations.emplace_back(column.name, std::move(expr));
|
||||
@ -280,8 +294,6 @@ TTLDescription TTLDescription::getTTLFromAST(
|
||||
}
|
||||
|
||||
checkTTLExpression(result.expression, result.result_column);
|
||||
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -289,8 +301,10 @@ TTLDescription TTLDescription::getTTLFromAST(
|
||||
TTLTableDescription::TTLTableDescription(const TTLTableDescription & other)
|
||||
: definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr)
|
||||
, rows_ttl(other.rows_ttl)
|
||||
, rows_where_ttl(other.rows_where_ttl)
|
||||
, move_ttl(other.move_ttl)
|
||||
, recompression_ttl(other.recompression_ttl)
|
||||
, group_by_ttl(other.group_by_ttl)
|
||||
{
|
||||
}
|
||||
|
||||
@ -305,8 +319,10 @@ TTLTableDescription & TTLTableDescription::operator=(const TTLTableDescription &
|
||||
definition_ast.reset();
|
||||
|
||||
rows_ttl = other.rows_ttl;
|
||||
rows_where_ttl = other.rows_where_ttl;
|
||||
move_ttl = other.move_ttl;
|
||||
recompression_ttl = other.recompression_ttl;
|
||||
group_by_ttl = other.group_by_ttl;
|
||||
|
||||
return *this;
|
||||
}
|
||||
@ -323,21 +339,33 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST(
|
||||
|
||||
result.definition_ast = definition_ast->clone();
|
||||
|
||||
bool seen_delete_ttl = false;
|
||||
bool have_unconditional_delete_ttl = false;
|
||||
for (const auto & ttl_element_ptr : definition_ast->children)
|
||||
{
|
||||
auto ttl = TTLDescription::getTTLFromAST(ttl_element_ptr, columns, context, primary_key);
|
||||
if (ttl.mode == TTLMode::DELETE || ttl.mode == TTLMode::GROUP_BY)
|
||||
if (ttl.mode == TTLMode::DELETE)
|
||||
{
|
||||
if (seen_delete_ttl)
|
||||
throw Exception("More than one DELETE TTL expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION);
|
||||
result.rows_ttl = ttl;
|
||||
seen_delete_ttl = true;
|
||||
if (!ttl.where_expression)
|
||||
{
|
||||
if (have_unconditional_delete_ttl)
|
||||
throw Exception("More than one DELETE TTL expression without WHERE expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION);
|
||||
|
||||
have_unconditional_delete_ttl = true;
|
||||
result.rows_ttl = ttl;
|
||||
}
|
||||
else
|
||||
{
|
||||
result.rows_where_ttl.emplace_back(std::move(ttl));
|
||||
}
|
||||
}
|
||||
else if (ttl.mode == TTLMode::RECOMPRESS)
|
||||
{
|
||||
result.recompression_ttl.emplace_back(std::move(ttl));
|
||||
}
|
||||
else if (ttl.mode == TTLMode::GROUP_BY)
|
||||
{
|
||||
result.group_by_ttl.emplace_back(std::move(ttl));
|
||||
}
|
||||
else
|
||||
{
|
||||
result.move_ttl.emplace_back(std::move(ttl));
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user