Merge remote-tracking branch 'origin/master' into rocksdb_metacache

This commit is contained in:
taiyang-li 2022-01-04 10:00:34 +08:00
commit 3f6d830536
474 changed files with 16487 additions and 2657 deletions

4
.github/CODEOWNERS vendored
View File

@ -1,3 +1 @@
docs/* @ClickHouse/docs
docs/zh/* @ClickHouse/docs-zh
website/* @ClickHouse/docs

View File

@ -1,30 +1,29 @@
option (ENABLE_AZURE_BLOB_STORAGE "Enable Azure blob storage" ${ENABLE_LIBRARIES})
option(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY
"Set to FALSE to use system Azure SDK instead of bundled (OFF currently not implemented)"
ON)
if (ENABLE_AZURE_BLOB_STORAGE)
option(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY
"Set to FALSE to use system Azure SDK instead of bundled (OFF currently not implemented)"
ON)
set(USE_AZURE_BLOB_STORAGE 1)
set(AZURE_BLOB_STORAGE_LIBRARY azure_sdk)
if ((NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/sdk"
OR NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/cmake-modules")
AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
message (WARNING "submodule contrib/azure is missing. to fix try run: \n git submodule update --init")
set(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY OFF)
set(USE_AZURE_BLOB_STORAGE 0)
endif ()
if (NOT USE_INTERNAL_SSL_LIBRARY AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
message (FATAL_ERROR "Currently Blob Storage support can be built only with internal SSL library")
endif()
if (NOT USE_INTERNAL_CURL AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
message (FATAL_ERROR "Currently Blob Storage support can be built only with internal curl library")
endif()
endif()
if ((NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/sdk"
OR NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/cmake-modules")
AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
message (WARNING "submodule contrib/azure is missing. to fix try run: \n git submodule update --init")
set(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY OFF)
set(USE_AZURE_BLOB_STORAGE 0)
endif ()
if (NOT USE_INTERNAL_SSL_LIBRARY AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
message (FATAL_ERROR "Currently Blob Storage support can be built only with internal SSL library")
endif()
if (NOT USE_INTERNAL_CURL AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
message (FATAL_ERROR "Currently Blob Storage support can be built only with internal curl library")
endif()
if (USE_AZURE_BLOB_STORAGE)
message (STATUS "Using Azure Blob Storage - ${USE_AZURE_BLOB_STORAGE}")
endif()
message (STATUS "Using Azure Blob Storage - ${USE_AZURE_BLOB_STORAGE}")

View File

@ -31,6 +31,7 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}")
set(LAUNCHER ${CCACHE_FOUND})
# debian (debhelpers) set SOURCE_DATE_EPOCH environment variable, that is
# filled from the debian/changelog or current time.
@ -39,13 +40,8 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
# of the manifest, which do not allow to use previous cache,
# - 4.2+ ccache ignores SOURCE_DATE_EPOCH for every file w/o __DATE__/__TIME__
#
# So for:
# - 4.2+ does not require any sloppiness
# - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable.
if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2")
message(STATUS "ccache is 4.2+ no quirks for SOURCE_DATE_EPOCH required")
set(LAUNCHER ${CCACHE_FOUND})
elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0")
# Exclude SOURCE_DATE_EPOCH env for ccache versions between [4.0, 4.2).
if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0" AND CCACHE_VERSION VERSION_LESS "4.2")
message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
set(LAUNCHER env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND})
endif()

View File

@ -17,6 +17,8 @@ $ docker run -d --name some-clickhouse-server --ulimit nofile=262144:262144 clic
By default ClickHouse will be accessible only via docker network. See the [networking section below](#networking).
By default, starting above server instance will be run as default user without password.
### connect to it from a native client
```bash
$ docker run -it --rm --link some-clickhouse-server:clickhouse-server clickhouse/clickhouse-client --host clickhouse-server

View File

@ -52,9 +52,21 @@ function clone
}
function wget_with_retry
{
for _ in 1 2 3 4; do
if wget -nv -nd -c "$1";then
return 0
else
sleep 0.5
fi
done
return 1
}
function download
{
wget -nv -nd -c "$BINARY_URL_TO_DOWNLOAD"
wget_with_retry "$BINARY_URL_TO_DOWNLOAD"
chmod +x clickhouse
ln -s ./clickhouse ./clickhouse-server

View File

@ -77,7 +77,7 @@ Tables are accessed via schema name and table name at the same time:
``` sql
CREATE DATABASE database1
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password')
SETTINGS materialized_postgresql_tables_list = 'schema1.table1,schema2.table2,schema1.table3';
SETTINGS materialized_postgresql_tables_list = 'schema1.table1,schema2.table2,schema1.table3',
materialized_postgresql_tables_list_with_schema = 1;
SELECT * FROM database1.`schema1.table1`;

View File

@ -189,7 +189,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us
|libhdfs3\_conf | "" |
### Limitations {#limitations}
* `hadoop_security_kerberos_ticket_cache_path` and `libhdfs3_conf` can be global only, not user specific
* `hadoop_security_kerberos_ticket_cache_path` and `libhdfs3_conf` can be global only, not user specific
## Kerberos support {#kerberos-support}

View File

@ -11,6 +11,7 @@ Questions:
- [Which ClickHouse version to use in production?](../../faq/operations/production.md)
- [Is it possible to delete old records from a ClickHouse table?](../../faq/operations/delete-old-data.md)
- [Does ClickHouse support multi-region replication?](../../faq/operations/multi-region-replication.md)
!!! info "Dont see what you were looking for?"
Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar.

View File

@ -0,0 +1,13 @@
---
title: Does ClickHouse support multi-region replication?
toc_hidden: true
toc_priority: 30
---
# Does ClickHouse support multi-region replication? {#does-clickhouse-support-multi-region-replication}
The short answer is "yes". However, we recommend keeping latency between all regions/datacenters in two-digit range, otherwise write performance will suffer as it goes through distributed consensus protocol. For example, replication between US coasts will likely work fine, but between the US and Europe won't.
Configuration-wise there's no difference compared to single-region replication, simply use hosts that are located in different locations for replicas.
For more information, see [full article on data replication](../../engines/table-engines/mergetree-family/replication.md).

View File

@ -142,6 +142,12 @@ On Gentoo, you can just use `emerge clickhouse` to install ClickHouse from sourc
To start the server as a daemon, run:
``` bash
$ sudo clickhouse start
```
There are also another ways to run ClickHouse:
``` bash
$ sudo service clickhouse-server start
```
@ -152,6 +158,12 @@ If you do not have `service` command, run as
$ sudo /etc/init.d/clickhouse-server start
```
If you have `systemctl` command, run as
``` bash
$ sudo systemctl start clickhouse-server.service
```
See the logs in the `/var/log/clickhouse-server/` directory.
If the server does not start, check the configurations in the file `/etc/clickhouse-server/config.xml`.

View File

@ -9,6 +9,8 @@ The HTTP interface lets you use ClickHouse on any platform from any programming
By default, `clickhouse-server` listens for HTTP on port 8123 (this can be changed in the config).
Sometimes, `curl` command is not available on user operating systems. On Ubuntu or Debian, run `sudo apt install curl`. Please refer this [documentation](https://curl.se/download.html) to install it before running the examples.
If you make a `GET /` request without parameters, it returns 200 response code and the string which defined in [http_server_default_response](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-http_server_default_response) default value “Ok.” (with a line feed at the end)
``` bash
@ -186,7 +188,7 @@ $ echo "SELECT 1" | gzip -c | \
```
``` bash
# Receiving compressed data from the server
# Receiving compressed data archive from the server
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" \
-H 'Accept-Encoding: gzip' --output result.gz -d 'SELECT number FROM system.numbers LIMIT 3'
$ zcat result.gz
@ -195,6 +197,15 @@ $ zcat result.gz
2
```
```bash
# Receiving compressed data from the server and using the gunzip to receive decompressed data
$ curl -sS "http://localhost:8123/?enable_http_compression=1" \
-H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 3' | gunzip -
0
1
2
```
## Default Database {#default-database}
You can use the database URL parameter or the X-ClickHouse-Database header to specify the default database.
@ -424,10 +435,10 @@ Next are the configuration methods for different `type`.
`query` value is a predefined query of `predefined_query_handler`, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration.
The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully.
The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` settings, then queries the system table to check whether these settings were set successfully.
!!! note "Warning"
To keep the default `handlers` such as` query`, `play`,` ping`, use the `<defaults/>` rule.
To keep the default `handlers` such as` query`, `play`,` ping`, use the `<defaults/>` rule.
Example:
@ -451,9 +462,9 @@ Example:
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_alter_threads?max_threads=1&max_alter_threads=2'
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2'
1
max_alter_threads 2
max_final_threads 2
```
!!! note "caution"
@ -465,7 +476,7 @@ In `dynamic_query_handler`, the query is written in the form of param of the HTT
ClickHouse extracts and executes the value corresponding to the `query_param_name` value in the URL of the HTTP request. The default value of `query_param_name` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the param is not passed in.
To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` and `queries` whether the settings were set successfully.
To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` and `queries` whether the settings were set successfully.
Example:
@ -484,9 +495,9 @@ Example:
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_alter_threads=2&param_name_1=max_threads&param_name_2=max_alter_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_final_threads=2&param_name_1=max_threads&param_name_2=max_final_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
max_threads 1
max_alter_threads 2
max_final_threads 2
```
### static {#static}

View File

@ -60,8 +60,10 @@ toc_title: Adopters
| <a href="https://www.exness.com/" class="favicon">Exness</a> | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) |
| <a href="https://www.eventbunker.io/" class="favicon">EventBunker.io</a> | Serverless Data Processing | — | — | — | [Tweet, April 2021](https://twitter.com/Halil_D_/status/1379839133472985091) |
| <a href="https://fastnetmon.com/" class="favicon">FastNetMon</a> | DDoS Protection | Main Product | | — | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) |
| <a href="https://www.firebolt.io/" class="favicon">Firebolt</a> | Analytics | Main product | - | - | [YouTube Tech Talk](https://www.youtube.com/watch?v=9rW9uEJ15tU) |
| <a href="https://www.flipkart.com/" class="favicon">Flipkart</a> | e-Commerce | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239) |
| <a href="https://fun.co/rp" class="favicon">FunCorp</a> | Games | | — | 14 bn records/day as of Jan 2021 | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) |
| <a href="https://futurragroup.com/" class="favicon">Futurra Group</a> | Analytics | — | — | — | [Article in Russian, December 2021](https://dou.ua/forums/topic/35587/) |
| <a href="https://geniee.co.jp" class="favicon">Geniee</a> | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) |
| <a href="https://www.genotek.ru/" class="favicon">Genotek</a> | Bioinformatics | Main product | — | — | [Video, August 2020](https://youtu.be/v3KyZbz9lEE) |
| <a href="https://gigapipe.com/" class="favicon">Gigapipe</a> | Managed ClickHouse | Main product | — | — | [Official website](https://gigapipe.com/) |
@ -70,6 +72,7 @@ toc_title: Adopters
| <a href="https://www.grouparoo.com" class="favicon">Grouparoo</a> | Data Warehouse Integrations | Main product | — | — | [Official Website, November 2021](https://www.grouparoo.com/integrations) |
| <a href="https://www.huya.com/" class="favicon">HUYA</a> | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) |
| <a href="https://www.hydrolix.io/" class="favicon">Hydrolix</a> | Cloud data platform | Main product | — | — | [Documentation](https://docs.hydrolix.io/guide/query) |
| <a href="https://hystax.com" class="favicon">Hystax</a> | Cloud Operations | Observability Analytics | - | - | [Blog](https://hystax.com/clickhouse-for-real-time-cost-saving-analytics-how-to-stop-hammering-screws-and-use-an-electric-screwdriver/) |
| <a href="https://www.the-ica.com/" class="favicon">ICA</a> | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) |
| <a href="https://www.idealista.com" class="favicon">Idealista</a> | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.com/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) |
| <a href="https://infobaleen.com" class="favicon">Infobaleen</a> | AI markting tool | Analytics | — | — | [Official site](https://infobaleen.com) |
@ -81,14 +84,18 @@ toc_title: Adopters
| <a href="https://ippon.tech" class="favicon">Ippon Technologies</a> | Technology Consulting | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=205) |
| <a href="https://www.ivi.ru/" class="favicon">Ivi</a> | Online Cinema | Analytics, Monitoring | — | — | [Article in Russian, Jan 2018](https://habr.com/en/company/ivi/blog/347408/) |
| <a href="https://jinshuju.net" class="favicon">Jinshuju 金数据</a> | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) |
| <a href="https://jitsu.com" class="favicon">Jitsu</a> | Cloud Software | Data Pipeline | — | — | [Documentation](https://jitsu.com/docs/destinations-configuration/clickhouse-destination), [Hacker News](https://news.ycombinator.com/item?id=29106082) |
| <a href="https://jitsu.com" class="favicon">Jitsu</a> | Cloud Software | Data Pipeline | — | — | [Documentation](https://jitsu.com/docs/destinations-configuration/clickhouse-destination), [Hacker News post](https://news.ycombinator.com/item?id=29106082) |
| <a href="https://juicefs.com/" class="favicon">JuiceFS</a> | Storage | Shopping Cart | - | - | [Blog](https://juicefs.com/blog/en/posts/shopee-clickhouse-with-juicefs/) |
| <a href="https://www.kakaocorp.com/" class="favicon">kakaocorp</a> | Internet company | — | — | — | [if(kakao)2020](https://tv.kakao.com/channel/3693125/cliplink/414129353), [if(kakao)2021](https://if.kakao.com/session/24) |
| <a href="https://www.kodiakdata.com/" class="favicon">Kodiak Data</a> | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) |
| <a href="https://kontur.ru" class="favicon">Kontur</a> | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) |
| <a href="https://www.kuaishou.com/" class="favicon">Kuaishou</a> | Video | — | — | — | [ClickHouse Meetup, October 2018](https://clickhouse.com/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/) |
| <a href="https://www.kgk-global.com/en/" class="favicon">KGK Global</a> | Vehicle monitoring | — | — | — | [Press release, June 2021](https://zoom.cnews.ru/news/item/530921) |
| <a href="https://www.lancom-systems.com/" class="favicon">LANCOM Systems</a> | Network Solutions | Traffic analysis | - | - | [ClickHouse Operator for Kubernetes](https://www.lancom-systems.com/), [Hacker News post] (https://news.ycombinator.com/item?id=29413660) |
| <a href="https://www.lbl.gov" class="favicon">Lawrence Berkeley National Laboratory</a> | Research | Traffic analysis | 5 servers | 55 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) |
| <a href="https://www.lever.co/" class="favicon">Lever</a> | Talent Management | Recruiting | - | - | [Hacker News post](https://news.ycombinator.com/item?id=29558544) |
| <a href="https://lifestreet.com/" class="favicon">LifeStreet</a> | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) |
| <a href="https://lookforsale.ru/" class="favicon">Lookforsale</a> | E-Commerce | — | — | — | [Job Posting, December 2021](https://telegram.me/javascript_jobs/587318) |
| <a href="https://mcs.mail.ru/" class="favicon">Mail.ru Cloud Solutions</a> | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) |
| <a href="https://maxilect.com/" class="favicon">MAXILECT</a> | Ad Tech, Blockchain, ML, AI | — | — | — | [Job advertisement, 2021](https://www.linkedin.com/feed/update/urn:li:activity:6780842017229430784/) |
| <a href="https://tech.mymarilyn.ru" class="favicon">Marilyn</a> | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) |
@ -106,6 +113,7 @@ toc_title: Adopters
| <a href="https://ok.ru" class="favicon">Ok.ru</a> | Social Network | — | 72 servers | 810 TB compressed, 50bn rows/day, 1.5 TB/day | [SmartData conference, October 2021](https://assets.ctfassets.net/oxjq45e8ilak/4JPHkbJenLgZhBGGyyonFP/57472ec6987003ec4078d0941740703b/____________________ClickHouse_______________________.pdf) |
| <a href="https://omnicomm.ru/" class="favicon">Omnicomm</a> | Transportation Monitoring | — | — | — | [Facebook post, October 2021](https://www.facebook.com/OmnicommTeam/posts/2824479777774500) |
| <a href="https://www.oneapm.com/" class="favicon">OneAPM</a> | Monitoring and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
| <a href="https://opensee.io/" class="favicon">Opensee</a> | Financial Analytics | Main product | - | - | [Blog](https://opensee.io/news/from-moscow-to-wall-street-the-remarkable-journey-of-clickhouse/) |
| <a href="https://www.opentargets.org/" class="favicon">Open Targets</a> | Genome Research | Genome Search | — | — | [Tweet, October 2021](https://twitter.com/OpenTargets/status/1452570865342758913?s=20), [Blog](https://blog.opentargets.org/graphql/) |
| <a href="https://corp.ozon.com/" class="favicon">OZON</a> | E-commerce | — | — | — | [Official website](https://job.ozon.ru/vacancy/razrabotchik-clickhouse-ekspluatatsiya-40991870/) |
| <a href="https://panelbear.com/" class="favicon">Panelbear | Analytics | Monitoring and Analytics | — | — | [Tech Stack, November 2020](https://panelbear.com/blog/tech-stack/) |
@ -118,6 +126,7 @@ toc_title: Adopters
| <a href="https://prana-system.com/en/" class="favicon">PRANA</a> | Industrial predictive analytics | Main product | — | — | [News (russian), Feb 2021](https://habr.com/en/news/t/541392/) |
| <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
| <a href="https://qrator.net" class="favicon">Qrator</a> | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) |
| <a href="https://rvision.pro/en/" class="favicon">R-Vision</a> | Information Security | — | — | — | [Article in Russian, December 2021](https://www.anti-malware.ru/reviews/R-Vision-SENSE-15) |
| <a href="https://www.rbinternational.com/" class="favicon">Raiffeisenbank</a> | Banking | Analytics | — | — | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html) |
| <a href="https://rambler.ru" class="favicon">Rambler</a> | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) |
| <a href="https://replicahq.com" class="favicon">Replica</a> | Urban Planning | Analytics | — | — | [Job advertisement](https://boards.greenhouse.io/replica/jobs/5547732002?gh_jid=5547732002) |
@ -153,6 +162,7 @@ toc_title: Adopters
| <a href="https://www.tinybird.co/" class="favicon">Tinybird</a> | Real-time Data Products | Data processing | — | — | [Official website](https://www.tinybird.co/) |
| <a href="https://trafficstars.com/" class="favicon">Traffic Stars</a> | AD network | — | 300 servers in Europe/US | 1.8 PiB, 700 000 insert rps (as of 2021) | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) |
| <a href="https://www.uber.com" class="favicon">Uber</a> | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/uber.pdf) |
| <a href="https://usetech.com/" class="favicon">UseTech</a> | Software Development | — | — | — | [Job Posting, December 2021](https://vk.com/wall136266658_2418) |
| <a href="https://hello.utmstat.com/" class="favicon">UTMSTAT</a> | Analytics | Main product | — | — | [Blog post, June 2020](https://vc.ru/tribuna/133956-striming-dannyh-iz-servisa-skvoznoy-analitiki-v-clickhouse) |
| <a href="https://vercel.com/" class="favicon">Vercel</a> | Traffic and Performance Analytics | — | — | — | Direct reference, October 2021 |
| <a href="https://vk.com" class="favicon">VKontakte</a> | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) |
@ -168,7 +178,8 @@ toc_title: Adopters
| <a href="https://cloud.yandex.ru/services/managed-clickhouse" class="favicon">Yandex Cloud</a> | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) |
| <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a> | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/datalens.pdf) |
| <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a> | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/introduction/#13) |
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Macin product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/introduction/#13) |
| <a href="https://www.yellowfinbi.com" class="favicon"><COMPANYNAME></a> | Analytics | Main product | - | - | [Integration](https://www.yellowfinbi.com/campaign/yellowfin-9-whats-new#el-30219e0e) |
| <a href="https://www.yotascale.com/" class="favicon">Yotascale</a> | Cloud | Data pipeline | — | 2 bn records/day | [LinkedIn (Accomplishments)](https://www.linkedin.com/in/adilsaleem/) |
| <a href="https://www.your-analytics.org/" class="favicon">Your Analytics</a> | Product Analytics | Main Product | — | - | [Tweet, November 2021](https://twitter.com/mikenikles/status/1459737241165565953) |
| <a href="https://zagravagames.com/en/" class="favicon">Zagrava Trading</a> | — | — | — | — | [Job offer, May 2021](https://twitter.com/datastackjobs/status/1394707267082063874) |
@ -178,9 +189,5 @@ toc_title: Adopters
| <a href="https://promo.croc.ru/digitalworker" class="favicon">Цифровой Рабочий</a> | Industrial IoT, Analytics | — | — | — | [Blog post in Russian, March 2021](https://habr.com/en/company/croc/blog/548018/) |
| <a href="https://shop.okraina.ru/" class="favicon">ООО «МПЗ Богородский»</a> | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) |
| <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) |
| <a href="https://futurragroup.com/" class="favicon">Futurra Group</a> | Analytics | — | — | — | [Article in Russian, December 2021](https://dou.ua/forums/topic/35587/) |
| <a href="https://usetech.com/" class="favicon">UseTech</a> | Software Development | — | — | — | [Job Posting, December 2021](https://vk.com/wall136266658_2418) |
| <a href="https://lookforsale.ru/" class="favicon">Lookforsale</a> | E-Commerce | — | — | — | [Job Posting, December 2021](https://telegram.me/javascript_jobs/587318) |
| <a href="https://rvision.pro/en/" class="favicon">R-Vision</a> | Information Security | — | — | — | [Article in Russian, December 2021](https://www.anti-malware.ru/reviews/R-Vision-SENSE-15) |
[Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide-->

View File

@ -3,14 +3,14 @@ toc_priority: 66
toc_title: ClickHouse Keeper
---
# [pre-production] ClickHouse Keeper
# [pre-production] ClickHouse Keeper {#clickHouse-keeper}
ClickHouse server uses [ZooKeeper](https://zookeeper.apache.org/) coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is an alternative coordination system compatible with ZooKeeper.
!!! warning "Warning"
This feature is currently in the pre-production stage. We test it in our CI and on small internal installations.
## Implementation details
## Implementation details {#implementation-details}
ZooKeeper is one of the first well-known open-source coordination systems. It's implemented in Java, has quite a simple and powerful data model. ZooKeeper's coordination algorithm called ZAB (ZooKeeper Atomic Broadcast) doesn't provide linearizability guarantees for reads, because each ZooKeeper node serves reads locally. Unlike ZooKeeper ClickHouse Keeper is written in C++ and uses [RAFT algorithm](https://raft.github.io/) [implementation](https://github.com/eBay/NuRaft). This algorithm allows to have linearizability for reads and writes, has several open-source implementations in different languages.
@ -21,7 +21,7 @@ ClickHouse Keeper supports Access Control List (ACL) the same way as [ZooKeeper]
!!! info "Note"
External integrations are not supported.
## Configuration
## Configuration {#configuration}
ClickHouse Keeper can be used as a standalone replacement for ZooKeeper or as an internal part of the ClickHouse server, but in both cases configuration is almost the same `.xml` file. The main ClickHouse Keeper configuration tag is `<keeper_server>`. Keeper configuration has the following parameters:
@ -102,7 +102,7 @@ Examples of configuration for quorum with three nodes can be found in [integrati
</keeper_server>
```
## How to run
## How to run {#how-to-run}
ClickHouse Keeper is bundled into the ClickHouse server package, just add configuration of `<keeper_server>` and start ClickHouse server as always. If you want to run standalone ClickHouse Keeper you can start it in a similar way with:
@ -110,13 +110,14 @@ ClickHouse Keeper is bundled into the ClickHouse server package, just add config
clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon
```
## Four Letter Word Commands
## Four Letter Word Commands {#four-letter-word-commands}
ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively.
The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value "conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro".
You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port.
```
echo mntr | nc localhost 9181
```
@ -296,7 +297,7 @@ Sessions with Ephemerals (1):
/clickhouse/task_queue/ddl
```
## [experimental] Migration from ZooKeeper
## [experimental] Migration from ZooKeeper {#migration-from-zookeeper}
Seamlessly migration from ZooKeeper to ClickHouse Keeper is impossible you have to stop your ZooKeeper cluster, convert data and start ClickHouse Keeper. `clickhouse-keeper-converter` tool allows converting ZooKeeper logs and snapshots to ClickHouse Keeper snapshot. It works only with ZooKeeper > 3.4. Steps for migration:

View File

@ -672,7 +672,8 @@ On hosts with low RAM and swap, you possibly need setting `max_server_memory_usa
## max_concurrent_queries {#max-concurrent-queries}
The maximum number of simultaneously processed queries related to MergeTree table. Queries may be limited by other settings: [max_concurrent_queries_for_user](#max-concurrent-queries-for-user), [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries).
The maximum number of simultaneously processed queries related to MergeTree table.
Queries may be limited by other settings: [max_concurrent_insert_queries](#max-concurrent-insert-queries), [max_concurrent_select_queries](#max-concurrent-select-queries), [max_concurrent_queries_for_user](#max-concurrent-queries-for-user), [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries).
!!! info "Note"
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
@ -688,6 +689,42 @@ Possible values:
<max_concurrent_queries>100</max_concurrent_queries>
```
## max_concurrent_insert_queries {#max-concurrent-insert-queries}
The maximum number of simultaneously processed insert queries.
!!! info "Note"
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
Possible values:
- Positive integer.
- 0 — Disabled.
**Example**
``` xml
<max_concurrent_insert_queries>100</max_concurrent_insert_queries>
```
## max_concurrent_select_queries {#max-concurrent-select-queries}
The maximum number of simultaneously processed select queries.
!!! info "Note"
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
Possible values:
- Positive integer.
- 0 — Disabled.
**Example**
``` xml
<max_concurrent_select_queries>100</max_concurrent_select_queries>
```
## max_concurrent_queries_for_user {#max-concurrent-queries-for-user}
The maximum number of simultaneously processed queries related to MergeTree table per user.

View File

@ -3154,6 +3154,12 @@ Possible values:
Default value: `0`.
!!! warning "Warning"
Nullable primary key usually indicates bad design. It is forbidden in almost all main stream DBMS. The feature is mainly for [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) and is not heavily tested. Use with care.
!!! warning "Warning"
Do not enable this feature in version `<= 21.8`. It's not properly implemented and may lead to server crash.
## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}
Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility.
@ -4149,3 +4155,20 @@ Default value: `''`.
Sets the character that is interpreted as a suffix after the result set for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
Default value: `''`.
## shutdown_wait_unfinished_queries
Enables or disables waiting unfinished queries when shutdown server.
Possible values:
- 0 — Disabled.
- 1 — Enabled. The wait time equal shutdown_wait_unfinished config.
Default value: 0.
## shutdown_wait_unfinished
The waiting time in seconds for currently handled connections when shutdown server.
Default Value: 5.

View File

@ -6,7 +6,7 @@ You can use this table to get information similar to the [DESCRIBE TABLE](../../
Columns from [temporary tables](../../sql-reference/statements/create/table.md#temporary-tables) are visible in the `system.columns` only in those session where they have been created. They are shown with the empty `database` field.
Columns:
The `system.columns` table contains the following columns (the column type is shown in brackets):
- `database` ([String](../../sql-reference/data-types/string.md)) — Database name.
- `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
@ -86,21 +86,4 @@ numeric_scale: ᴺᵁᴸᴸ
datetime_precision: ᴺᵁᴸᴸ
```
The `system.columns` table contains the following columns (the column type is shown in brackets):
- `database` (String) — Database name.
- `table` (String) — Table name.
- `name` (String) — Column name.
- `type` (String) — Column type.
- `default_kind` (String) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined.
- `default_expression` (String) — Expression for the default value, or an empty string if it is not defined.
- `data_compressed_bytes` (UInt64) — The size of compressed data, in bytes.
- `data_uncompressed_bytes` (UInt64) — The size of decompressed data, in bytes.
- `marks_bytes` (UInt64) — The size of marks, in bytes.
- `comment` (String) — Comment on the column, or an empty string if it is not defined.
- `is_in_partition_key` (UInt8) — Flag that indicates whether the column is in the partition expression.
- `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression.
- `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression.
- `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression.
[Original article](https://clickhouse.com/docs/en/operations/system-tables/columns) <!--hide-->

View File

@ -35,7 +35,7 @@ SELECT * FROM system.metrics LIMIT 10
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred.
- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` and `system.events`.
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.
[Original article](https://clickhouse.com/docs/en/operations/system-tables/metrics) <!--hide-->

View File

@ -122,7 +122,12 @@ Setting fields:
- `command` — The absolute path to the executable file, or the file name (if the program directory is written to `PATH`).
- `format` — The file format. All the formats described in [Formats](../../../interfaces/formats.md#formats) are supported.
- `command_termination_timeout` — executable script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter.
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_read_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false.
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder. Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `0`. Optional parameter.
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.
@ -150,10 +155,14 @@ Setting fields:
- `command` — The absolute path to the executable file, or the file name (if the program directory is written to `PATH`).
- `format` — The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported.
- `pool_size` — Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions.
- `command_termination_timeout`Executable pool script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter.
- `pool_size` — Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions. Default value is `16`.
- `command_termination_timeout`executable script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter.
- `max_command_execution_time` — Maximum executable script command execution time for processing block of data. Specified in seconds. Default value is 10. Optional parameter.
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_read_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false. Optional parameter.
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder. Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter.
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.

View File

@ -73,26 +73,74 @@ User defined function configurations are searched relative to the path specified
A function configuration contains the following settings:
- `name` - a function name.
- `command` - a command or a script to execute.
- `command` - script name to execute or command if `execute_direct` is false.
- `argument` - argument description with the `type` of an argument. Each argument is described in a separate setting.
- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command.
- `return_type` - the type of a returned value.
- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created.
- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`.
- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`.
- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`.
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_read_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `pool_size` - the size of a command pool. Optional. Default value is `16`.
- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded.
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder. Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter.
- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded. Default value is `0`. Optional parameter.
The command must read arguments from `STDIN` and must output the result to `STDOUT`. The command must process arguments iteratively. That is after processing a chunk of arguments it must wait for the next chunk.
**Example**
Creating `test_function` using XML configuration:
```
Creating `test_function` using XML configuration.
File test_function.xml.
```xml
<functions>
<function>
<type>executable</type>
<name>test_function</name>
<name>test_function_python</name>
<return_type>String</return_type>
<argument>
<type>UInt64</type>
</argument>
<format>TabSeparated</format>
<command>test_function.py</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function.py`.
```python
#!/usr/bin/python3
import sys
if __name__ == '__main__':
for line in sys.stdin:
print("Value " + line, end='')
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_python(toUInt64(2));
```
Result:
``` text
┌─test_function_python(2)─┐
│ Value 2 │
└─────────────────────────┘
```
Creating `test_function_sum` manually specifying `execute_direct` to `0` using XML configuration.
File test_function.xml.
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_sum</name>
<return_type>UInt64</return_type>
<argument>
<type>UInt64</type>
@ -102,7 +150,7 @@ Creating `test_function` using XML configuration:
</argument>
<format>TabSeparated</format>
<command>cd /; clickhouse-local --input-format TabSeparated --output-format TabSeparated --structure 'x UInt64, y UInt64' --query "SELECT x + y FROM table"</command>
<lifetime>0</lifetime>
<execute_direct>0</execute_direct>
</function>
</functions>
```
@ -110,15 +158,15 @@ Creating `test_function` using XML configuration:
Query:
``` sql
SELECT test_function(toUInt64(2), toUInt64(2));
SELECT test_function_sum(2, 2);
```
Result:
``` text
┌─test_function(toUInt64(2), toUInt64(2))─┐
4 │
└─────────────────────────────────────────
┌─test_function_sum(2, 2)─┐
│ 4 │
└─────────────────────────┘
```

View File

@ -351,8 +351,6 @@ Checks whether the string matches the `pattern` regular expression. A `re2` regu
Returns 0 if it does not match, or 1 if it matches.
Note that the backslash symbol (`\`) is used for escaping in the regular expression. The same symbol is used for escaping in string literals. So in order to escape the symbol in a regular expression, you must write two backslashes (\\) in a string literal.
The regular expression works with the string as if it is a set of bytes. The regular expression cant contain null bytes.
For patterns to search for substrings in a string, it is better to use LIKE or position, since they work much faster.

View File

@ -9,11 +9,12 @@ The following operations with [projections](../../../engines/table-engines/merge
- `ALTER TABLE [db].name ADD PROJECTION name ( SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY] )` - Adds projection description to tables metadata.
- `ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk.
- `ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
- `ALTER TABLE [db.]table MATERIALIZE PROJECTION name IN PARTITION partition_name` - The query rebuilds the projection `name` in the partition `partition_name`. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
- `ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description.
- `ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
The commands `ADD`, `DROP` and `CLEAR` are lightweight in a sense that they only change metadata or remove files.

View File

@ -10,7 +10,7 @@ Creates a new [external dictionary](../../../sql-reference/dictionaries/external
**Syntax**
``` sql
CREATE DICTIONARY [OR REPLACE][IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
CREATE [OR REPLACE] DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
(
key1 type1 [DEFAULT|EXPRESSION expr1] [IS_OBJECT_ID],
key2 type2 [DEFAULT|EXPRESSION expr2],

View File

@ -21,7 +21,7 @@ GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.ta
- `user` — ClickHouse user account.
The `WITH GRANT OPTION` clause grants `user` or `role` with permission to execute the `GRANT` query. Users can grant privileges of the same scope they have and less.
The `WITH REPLACE OPTION` clause replace old privileges by new privileges for the `user` or `role`, if not specified it is append privileges.
The `WITH REPLACE OPTION` clause replace old privileges by new privileges for the `user` or `role`, if is not specified it appends privileges.
## Assigning Role Syntax {#assign-role-syntax}
@ -33,7 +33,7 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US
- `user` — ClickHouse user account.
The `WITH ADMIN OPTION` clause grants [ADMIN OPTION](#admin-option-privilege) privilege to `user` or `role`.
The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or `role`, if not specified it is append roles.
The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or `role`, if is not specified it appends roles.
## Usage {#grant-usage}

View File

@ -0,0 +1 @@
../../../en/faq/operations/multi-region-replication.md

View File

@ -397,7 +397,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
`<query>` 値は以下の定義済みクエリです `<predefined_query_handler>` これは、Http要求が一致し、クエリの結果が返されたときにClickHouseによって実行されます。 これは必須構成です。
次の例では、次の値を定義します `max_threads``max_alter_threads` 設定、そしてクエリのテーブルから設定設定します。
次の例では、次の値を定義します `max_threads``max_final_threads` 設定、そしてクエリのテーブルから設定設定します。
例:
@ -420,9 +420,9 @@ $ curl -v 'http://localhost:8123/predefined_query'
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_alter_threads?max_threads=1&max_alter_threads=2'
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2'
1
max_alter_threads 2
max_final_threads 2
```
!!! note "注意"
@ -434,7 +434,7 @@ max_alter_threads 2
クリックハウスは、 `<query_param_name>` HTTP要求のurlの値。 のデフォルト値 `<query_param_name>``/query` . これはオプションの構成です。 設定ファイルに定義がない場合、paramは渡されません。
この機能を試すために、この例ではmax_threadsとmax_alter_threadsの値を定義し、設定が正常に設定されたかどうかを照会します。
この機能を試すために、この例ではmax_threadsとmax_final_threadsの値を定義し、設定が正常に設定されたかどうかを照会します。
例:
@ -452,9 +452,9 @@ max_alter_threads 2
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_alter_threads=2&param_name_1=max_threads&param_name_2=max_alter_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_final_threads=2&param_name_1=max_threads&param_name_2=max_final_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
max_threads 1
max_alter_threads 2
max_final_threads 2
```
## 静的 {#static}

View File

@ -40,7 +40,7 @@ ClickHouse не работает и не собирается на 32-битны
Выполните в терминале:
git clone git@github.com:ClickHouse/ClickHouse.git
git clone git@github.com:your_github_username/ClickHouse.git
cd ClickHouse
Замените первое вхождение слова `ClickHouse` в команде для git на имя вашего аккаунта на GitHub.

View File

@ -5,7 +5,7 @@ toc_title: HDFS
# HDFS {#table_engines-hdfs}
Управляет данными в HDFS. Данный движок похож на движки [File](../special/file.md#table_engines-file) и [URL](../special/url.md#table_engines-url).
Этот движок обеспечивает интеграцию с экосистемой [Apache Hadoop](https://ru.wikipedia.org/wiki/Hadoop), позволяя управлять данными в HDFS посредством ClickHouse. Данный движок похож на движки [File](../special/file.md#table_engines-file) и [URL](../special/url.md#table_engines-url), но предоставляет возможности, характерные для Hadoop.
## Использование движка {#usage}
@ -13,9 +13,11 @@ toc_title: HDFS
ENGINE = HDFS(URI, format)
```
В параметр `URI` нужно передавать полный URI файла в HDFS.
**Параметры движка**
В параметр `URI` нужно передавать полный URI файла в HDFS. Часть URI с путем файла может содержать шаблоны. В этом случае таблица может использоваться только для чтения.
Параметр `format` должен быть таким, который ClickHouse может использовать и в запросах `INSERT`, и в запросах `SELECT`. Полный список поддерживаемых форматов смотрите в разделе [Форматы](../../../interfaces/formats.md#formats).
Часть URI с путем файла может содержать шаблоны. В этом случае таблица может использоваться только для чтения.
**Пример:**
@ -67,12 +69,12 @@ SELECT * FROM hdfs_engine_table LIMIT 2
1. Предположим, у нас есть несколько файлов со следующими URI в HDFS:
- 'hdfs://hdfs1:9000/some_dir/some_file_1'
- 'hdfs://hdfs1:9000/some_dir/some_file_2'
- 'hdfs://hdfs1:9000/some_dir/some_file_3'
- 'hdfs://hdfs1:9000/another_dir/some_file_1'
- 'hdfs://hdfs1:9000/another_dir/some_file_2'
- 'hdfs://hdfs1:9000/another_dir/some_file_3'
- 'hdfs://hdfs1:9000/some_dir/some_file_1'
- 'hdfs://hdfs1:9000/some_dir/some_file_2'
- 'hdfs://hdfs1:9000/some_dir/some_file_3'
- 'hdfs://hdfs1:9000/another_dir/some_file_1'
- 'hdfs://hdfs1:9000/another_dir/some_file_2'
- 'hdfs://hdfs1:9000/another_dir/some_file_3'
1. Есть несколько возможностей создать таблицу, состояющую из этих шести файлов:
@ -128,6 +130,7 @@ CREATE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9
| **параметр** | **по умолчанию** |
| - | - |
| rpc\_client\_connect\_tcpnodelay | true |
| dfs\_client\_read\_shortcircuit | true |
| output\_replace-datanode-on-failure | true |
@ -177,22 +180,23 @@ CREATE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9
#### Расширенные параметры для ClickHouse {#clickhouse-extras}
| **параметр** | **по умолчанию** |
| - | - |
|hadoop\_kerberos\_keytab | "" |
|hadoop\_kerberos\_principal | "" |
|hadoop\_kerberos\_kinit\_command | kinit |
### Ограничения {#limitations}
* hadoop\_security\_kerberos\_ticket\_cache\_path могут быть определены только на глобальном уровне
* `hadoop_security_kerberos_ticket_cache_path` и `libhdfs3_conf` могут быть определены только на глобальном, а не на пользовательском уровне
## Поддержка Kerberos {#kerberos-support}
Если hadoop\_security\_authentication параметр имеет значение 'kerberos', ClickHouse аутентифицируется с помощью Kerberos.
[Расширенные параметры](#clickhouse-extras) и hadoop\_security\_kerberos\_ticket\_cache\_path помогают сделать это.
Если параметр `hadoop_security_authentication` имеет значение `kerberos`, ClickHouse аутентифицируется с помощью Kerberos.
[Расширенные параметры](#clickhouse-extras) и `hadoop_security_kerberos_ticket_cache_path` помогают сделать это.
Обратите внимание что из-за ограничений libhdfs3 поддерживается только устаревший метод аутентификации,
коммуникация с узлами данных не защищена SASL (HADOOP\_SECURE\_DN\_USER надежный показатель такого
подхода к безопасности). Используйте tests/integration/test\_storage\_kerberized\_hdfs/hdfs_configs/bootstrap.sh для примера настроек.
коммуникация с узлами данных не защищена SASL (`HADOOP_SECURE_DN_USER` надежный показатель такого
подхода к безопасности). Используйте `tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh` для примера настроек.
Если hadoop\_kerberos\_keytab, hadoop\_kerberos\_principal или hadoop\_kerberos\_kinit\_command указаны в настройках, kinit будет вызван. hadoop\_kerberos\_keytab и hadoop\_kerberos\_principal обязательны в этом случае. Необходимо также будет установить kinit и файлы конфигурации krb5.
Если `hadoop_kerberos_keytab`, `hadoop_kerberos_principal` или `hadoop_kerberos_kinit_command` указаны в настройках, `kinit` будет вызван. `hadoop_kerberos_keytab` и `hadoop_kerberos_principal` обязательны в этом случае. Необходимо также будет установить `kinit` и файлы конфигурации krb5.
## Виртуальные столбцы {#virtual-columns}

View File

@ -191,5 +191,5 @@ ClickHouse может поддерживать учетные данные Kerbe
**Смотрите также**
- [Виртуальные столбцы](index.md#table_engines-virtual_columns)
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
- [background_message_broker_schedule_pool_size](../../../operations/settings/settings.md#background_message_broker_schedule_pool_size)

View File

@ -0,0 +1 @@
../../../en/faq/operations/multi-region-replication.md

View File

@ -422,7 +422,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
Значение `query` — это предопределенный запрос `predefined_query_handler`, который выполняется ClickHouse при совпадении HTTP-запроса и возврате результата запроса. Это обязательная настройка.
В следующем примере определяются настройки [max_threads](../operations/settings/settings.md#settings-max_threads) и `max_alter_threads`, а затем запрашивается системная таблица, чтобы проверить, были ли эти параметры успешно установлены.
В следующем примере определяются настройки [max_threads](../operations/settings/settings.md#settings-max_threads) и `max_final_threads`, а затем запрашивается системная таблица, чтобы проверить, были ли эти параметры успешно установлены.
!!! note "Предупреждение"
Чтобы сохранить стандартные `handlers` такие как `query`, `play`, `ping`, используйте правило `<defaults/>`.
@ -449,9 +449,9 @@ $ curl -v 'http://localhost:8123/predefined_query'
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_alter_threads?max_threads=1&max_alter_threads=2'
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2'
1
max_alter_threads 2
max_final_threads 2
```
!!! note "Предупреждение"
@ -463,7 +463,7 @@ max_alter_threads 2
ClickHouse извлекает и выполняет значение, соответствующее значению `query_param_name` URL-адресе HTTP-запроса. Значение по умолчанию `query_param_name` — это `/query` . Это необязательная настройка. Если в файле конфигурации нет определения, параметр не передается.
Чтобы поэкспериментировать с этой функциональностью, в примере определяются значения [max_threads](../operations/settings/settings.md#settings-max_threads) и `max_alter_threads` и запрашивается, успешно ли были установлены настройки.
Чтобы поэкспериментировать с этой функциональностью, в примере определяются значения [max_threads](../operations/settings/settings.md#settings-max_threads) и `max_final_threads` и запрашивается, успешно ли были установлены настройки.
Пример:
@ -482,9 +482,9 @@ ClickHouse извлекает и выполняет значение, соотв
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_alter_threads=2&param_name_1=max_threads&param_name_2=max_alter_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_final_threads=2&param_name_1=max_threads&param_name_2=max_final_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
max_threads 1
max_alter_threads 2
max_final_threads 2
```
### static {#static}

View File

@ -3,14 +3,14 @@ toc_priority: 66
toc_title: ClickHouse Keeper
---
# [пре-продакшн] ClickHouse Keeper
# [пре-продакшн] ClickHouse Keeper {#clickHouse-keeper}
Сервер ClickHouse использует сервис координации [ZooKeeper](https://zookeeper.apache.org/) для [репликации](../engines/table-engines/mergetree-family/replication.md) данных и выполнения [распределенных DDL запросов](../sql-reference/distributed-ddl.md). ClickHouse Keeper — это альтернативный сервис координации, совместимый с ZooKeeper.
!!! warning "Предупреждение"
ClickHouse Keeper находится в стадии пре-продакшн и тестируется в CI ClickHouse и на нескольких внутренних инсталляциях.
## Детали реализации
## Детали реализации {#implementation-details}
ZooKeeper — один из первых широко известных сервисов координации с открытым исходным кодом. Он реализован на языке программирования Java, имеет достаточно простую и мощную модель данных. Алгоритм координации Zookeeper называется ZAB (ZooKeeper Atomic Broadcast). Он не гарантирует линеаризуемость операций чтения, поскольку каждый узел ZooKeeper обслуживает чтения локально. В отличие от ZooKeeper, ClickHouse Keeper реализован на C++ и использует алгоритм [RAFT](https://raft.github.io/), [реализация](https://github.com/eBay/NuRaft). Этот алгоритм позволяет достичь линеаризуемости чтения и записи, имеет несколько реализаций с открытым исходным кодом на разных языках.
@ -21,7 +21,7 @@ ZooKeeper — один из первых широко известных сер
!!! info "Примечание"
Внешние интеграции не поддерживаются.
## Конфигурация
## Конфигурация {#configuration}
ClickHouse Keeper может использоваться как равноценная замена ZooKeeper или как внутренняя часть сервера ClickHouse, но в обоих случаях конфигурация представлена файлом `.xml`. Главный тег конфигурации ClickHouse Keeper — это `<keeper_server>`. Параметры конфигурации:
@ -54,6 +54,7 @@ ClickHouse Keeper может использоваться как равноце
- `auto_forwarding` — разрешить пересылку запросов на запись от последователей лидеру (по умолчанию: true).
- `shutdown_timeout` — время ожидания завершения внутренних подключений и выключения, в миллисекундах (по умолчанию: 5000).
- `startup_timeout` — время отключения сервера, если он не подключается к другим участникам кворума, в миллисекундах (по умолчанию: 30000).
- `four_letter_word_white_list` — список разрешенных 4-х буквенных команд (по умолчанию: "conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro").
Конфигурация кворума находится в `<keeper_server>.<raft_configuration>` и содержит описание серверов.
@ -101,7 +102,7 @@ ClickHouse Keeper может использоваться как равноце
</keeper_server>
```
## Как запустить
## Как запустить {#how-to-run}
ClickHouse Keeper входит в пакет `clickhouse-server`, просто добавьте кофигурацию `<keeper_server>` и запустите сервер ClickHouse как обычно. Если вы хотите запустить ClickHouse Keeper автономно, сделайте это аналогичным способом:
@ -109,7 +110,195 @@ ClickHouse Keeper входит в пакет `clickhouse-server`, просто
clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon
```
## [экспериментально] Переход с ZooKeeper
## 4-х буквенные команды {#four-letter-word-commands}
ClickHouse Keeper также поддерживает 4-х буквенные команды, почти такие же, как у Zookeeper. Каждая команда состоит из 4-х символов, например, `mntr`, `stat` и т. д. Несколько интересных команд: `stat` предоставляет общую информацию о сервере и подключенных клиентах, а `srvr` и `cons` предоставляют расширенные сведения о сервере и подключениях соответственно.
У 4-х буквенных команд есть параметр для настройки разрешенного списка `four_letter_word_white_list`, который имеет значение по умолчанию "conf,cons,crst,envi,ruok,srst,srvr,stat, wchc,wchs,dirs,mntr,isro".
Вы можете отправлять команды в ClickHouse Keeper через telnet или nc на порт для клиента.
```
echo mntr | nc localhost 9181
```
Ниже приведен подробный список 4-х буквенных команд:
- `ruok`: Проверяет, что сервер запущен без ошибок. В этом случае сервер ответит `imok`. В противном случае он не ответит. Ответ `imok` не обязательно означает, что сервер присоединился к кворуму, а указывает, что процесс сервера активен и привязан к указанному клиентскому порту. Используйте команду `stat` для получения подробной информации о состоянии кворума и клиентском подключении.
```
imok
```
- `mntr`: Выводит список переменных, которые используются для мониторинга работоспособности кластера.
```
zk_version v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
zk_avg_latency 0
zk_max_latency 0
zk_min_latency 0
zk_packets_received 68
zk_packets_sent 68
zk_num_alive_connections 1
zk_outstanding_requests 0
zk_server_state leader
zk_znode_count 4
zk_watch_count 1
zk_ephemerals_count 0
zk_approximate_data_size 723
zk_open_file_descriptor_count 310
zk_max_file_descriptor_count 10240
zk_followers 0
zk_synced_followers 0
```
- `srvr`: Выводит информацию о сервере: его версию, роль участника кворума и т.п.
```
ClickHouse Keeper version: v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
Latency min/avg/max: 0/0/0
Received: 2
Sent : 2
Connections: 1
Outstanding: 0
Zxid: 34
Mode: leader
Node count: 4
```
- `stat`: Выводит краткие сведения о сервере и подключенных клиентах.
```
ClickHouse Keeper version: v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
Clients:
192.168.1.1:52852(recved=0,sent=0)
192.168.1.1:52042(recved=24,sent=48)
Latency min/avg/max: 0/0/0
Received: 4
Sent : 4
Connections: 1
Outstanding: 0
Zxid: 36
Mode: leader
Node count: 4
```
- `srst`: Сбрасывает статистику сервера. Команда влияет на результат вывода `srvr`, `mntr` и `stat`.
```
Server stats reset.
```
- `conf`: Выводит подробную информацию о серверной конфигурации.
```
server_id=1
tcp_port=2181
four_letter_word_white_list=*
log_storage_path=./coordination/logs
snapshot_storage_path=./coordination/snapshots
max_requests_batch_size=100
session_timeout_ms=30000
operation_timeout_ms=10000
dead_session_check_period_ms=500
heart_beat_interval_ms=500
election_timeout_lower_bound_ms=1000
election_timeout_upper_bound_ms=2000
reserved_log_items=1000000000000000
snapshot_distance=10000
auto_forwarding=true
shutdown_timeout=5000
startup_timeout=240000
raft_logs_level=information
snapshots_to_keep=3
rotate_log_storage_interval=100000
stale_log_gap=10000
fresh_log_gap=200
max_requests_batch_size=100
quorum_reads=false
force_sync=false
compress_logs=true
compress_snapshots_with_zstd_format=true
configuration_change_tries_count=20
```
- `cons`: Выводит полную информацию о подключениях/сессиях для всех клиентов, подключенных к этому серверу. Включает информацию о количестве принятых/отправленных пакетов, идентификаторе сессии, задержках операций, последней выполненной операции и т. д.
```
192.168.1.1:52163(recved=0,sent=0,sid=0xffffffffffffffff,lop=NA,est=1636454787393,to=30000,lzxid=0xffffffffffffffff,lresp=0,llat=0,minlat=0,avglat=0,maxlat=0)
192.168.1.1:52042(recved=9,sent=18,sid=0x0000000000000001,lop=List,est=1636454739887,to=30000,lcxid=0x0000000000000005,lzxid=0x0000000000000005,lresp=1636454739892,llat=0,minlat=0,avglat=0,maxlat=0)
```
- `crst`: Сбрасывает статистику подключений/сессий для всех подключений.
```
Connection stats reset.
```
- `envi`: Выводит подробную информацию о серверном окружении.
```
Environment:
clickhouse.keeper.version=v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
host.name=ZBMAC-C02D4054M.local
os.name=Darwin
os.arch=x86_64
os.version=19.6.0
cpu.count=12
user.name=root
user.home=/Users/JackyWoo/
user.dir=/Users/JackyWoo/project/jd/clickhouse/cmake-build-debug/programs/
user.tmp=/var/folders/b4/smbq5mfj7578f2jzwn602tt40000gn/T/
```
- `dirs`: Показывает общий размер файлов снэпшотов и журналов в байтах.
```
snapshot_dir_size: 0
log_dir_size: 3875
```
- `isro`: Проверяет, что сервер работает в режиме только для чтения. Сервер ответит `ro`, если он находится в режиме только для чтения, или `rw`, если нет.
```
rw
```
- `wchs`: Показывает краткую информацию о количестве отслеживаемых путей (watches) на сервере.
```
1 connections watching 1 paths
Total watches:1
```
- `wchc`: Показывает подробную информацию об отслеживаемых путях (watches) на сервере в разбивке по сессиям. При этом выводится список сессий (подключений) с соответствующими отслеживаемыми путями. Обратите внимание, что в зависимости от количества отслеживаемых путей эта операция может быть дорогостоящей (т. е. повлиять на производительность сервера), используйте ее осторожно.
```
0x0000000000000001
/clickhouse/task_queue/ddl
```
- `wchp`: Показывает подробную информацию об отслеживаемых путях (watches) на сервере в разбивке по пути. При этом выводится список путей (узлов) с соответствующими сессиями. Обратите внимание, что в зависимости от количества отселживаемых путей (watches) эта операция может быть дорогостоящей (т. е. повлиять на производительность сервера), используйте ее осторожно.
```
/clickhouse/task_queue/ddl
0x0000000000000001
```
- `dump`: Выводит список незавершенных сеансов и эфемерных узлов. Команда работает только на лидере.
```
Sessions dump (2):
0x0000000000000001
0x0000000000000002
Sessions with Ephemerals (1):
0x0000000000000001
/clickhouse/task_queue/ddl
```
## [экспериментально] Переход с ZooKeeper {#migration-from-zookeeper}
Плавный переход с ZooKeeper на ClickHouse Keeper невозможен, необходимо остановить кластер ZooKeeper, преобразовать данные и запустить ClickHouse Keeper. Утилита `clickhouse-keeper-converter` конвертирует журналы и снэпшоты ZooKeeper в снэпшот ClickHouse Keeper. Работа утилиты проверена только для версий ZooKeeper выше 3.4. Для миграции необходимо выполнить следующие шаги:

View File

@ -1641,18 +1641,19 @@ SELECT * FROM table_with_enum_column_for_csv_insert;
`INSERT` завершается успешно только в том случае, когда ClickHouse смог без ошибки записать данные в `insert_quorum` реплик за время `insert_quorum_timeout`. Если по любой причине количество реплик с успешной записью не достигнет `insert_quorum`, то запись считается не состоявшейся и ClickHouse удалит вставленный блок из всех реплик, куда уже успел записать данные.
Все реплики в кворуме консистентны, т.е. содержат данные всех более ранних запросов `INSERT`. Последовательность `INSERT` линеаризуется.
Когда `insert_quorum_parallel` выключена, все реплики кворума консистентны, то есть содержат данные всех предыдущих запросов `INSERT` (последовательность `INSERT` линеаризуется). При чтении с диска данных, записанных с помощью `insert_quorum` и при выключенной `insert_quorum_parallel`, можно включить последовательную консистентность для запросов `SELECT` с помощью [select_sequential_consistency](#settings-select_sequential_consistency).
При чтении данных, записанных с `insert_quorum` можно использовать настройку [select_sequential_consistency](#settings-select_sequential_consistency).
ClickHouse генерирует исключение
ClickHouse генерирует исключение:
- Если количество доступных реплик на момент запроса меньше `insert_quorum`.
- При попытке записать данные в момент, когда предыдущий блок ещё не вставлен в `insert_quorum` реплик. Эта ситуация может возникнуть, если пользователь вызвал `INSERT` прежде, чем завершился предыдущий с `insert_quorum`.
- При выключенной `insert_quorum_parallel` и при попытке записать данные в момент, когда предыдущий блок еще не вставлен в `insert_quorum` реплик (несколько параллельных `INSERT`-запросов). Эта ситуация может возникнуть при попытке пользователя выполнить очередной запрос `INSERT` к той же таблице, прежде чем завершится предыдущий с `insert_quorum`.
См. также:
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
- [insert_quorum_parallel](#settings-insert_quorum_parallel)
- [select_sequential_consistency](#settings-select_sequential_consistency)
## insert_quorum_timeout {#settings-insert_quorum_timeout}
@ -1664,11 +1665,29 @@ ClickHouse генерирует исключение
См. также:
- [insert_quorum](#settings-insert_quorum)
- [insert_quorum_parallel](#settings-insert_quorum_parallel)
- [select_sequential_consistency](#settings-select_sequential_consistency)
## insert_quorum_parallel {#settings-insert_quorum_parallel}
Включает и выключает параллелизм для кворумных вставок (`INSERT`-запросы). Когда опция включена, возможно выполнять несколько кворумных `INSERT`-запросов одновременно, при этом запросы не дожидаются окончания друг друга . Когда опция выключена, одновременные записи с кворумом в одну и ту же таблицу будут отклонены (будет выполнена только одна из них).
Возможные значения:
- 0 — Выключена.
- 1 — Включена.
Значение по умолчанию: 1.
См. также:
- [insert_quorum](#settings-insert_quorum)
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
- [select_sequential_consistency](#settings-select_sequential_consistency)
## select_sequential_consistency {#settings-select_sequential_consistency}
Включает или выключает последовательную консистентность для запросов `SELECT`.
Включает или выключает последовательную консистентность для запросов `SELECT`. Необходимо, чтобы `insert_quorum_parallel` была выключена (по умолчанию включена), а опция `insert_quorum` включена.
Возможные значения:
@ -1681,10 +1700,13 @@ ClickHouse генерирует исключение
Когда последовательная консистентность включена, то ClickHouse позволит клиенту выполнить запрос `SELECT` только к тем репликам, которые содержат данные всех предыдущих запросов `INSERT`, выполненных с `insert_quorum`. Если клиент обратится к неполной реплике, то ClickHouse сгенерирует исключение. В запросе SELECT не будут участвовать данные, которые ещё не были записаны на кворум реплик.
Если `insert_quorum_parallel` включена (по умолчанию это так), тогда `select_sequential_consistency` не будет работать. Причина в том, что параллельные запросы `INSERT` можно записать в разные наборы реплик кворума, поэтому нет гарантии того, что в отдельно взятую реплику будут сделаны все записи.
См. также:
- [insert_quorum](#settings-insert_quorum)
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
- [insert_quorum_parallel](#settings-insert_quorum_parallel)
## insert_deduplicate {#settings-insert-deduplicate}

View File

@ -3,10 +3,10 @@ toc_priority: 67
toc_title: NLP
---
# [экспериментально] Функции для работы с ествественным языком {#nlp-functions}
# [экспериментально] Функции для работы с естественным языком {#nlp-functions}
!!! warning "Предупреждение"
Сейчас использование функций для работы с ествественным языком является экспериментальной возможностью. Чтобы использовать данные функции, включите настройку `allow_experimental_nlp_functions = 1`.
Сейчас использование функций для работы с естественным языком является экспериментальной возможностью. Чтобы использовать данные функции, включите настройку `allow_experimental_nlp_functions = 1`.
## stem {#stem}
@ -84,7 +84,7 @@ SELECT lemmatize('en', 'wolves');
Находит синонимы к заданному слову. Представлены два типа расширений словарей: `plain` и `wordnet`.
Для работы расширения типа `plain` необходимо указать путь до простого текстового файла, где каждая строка соотвествует одному набору синонимов. Слова в данной строке должны быть разделены с помощью пробела или знака табуляции.
Для работы расширения типа `plain` необходимо указать путь до простого текстового файла, где каждая строка соответствует одному набору синонимов. Слова в данной строке должны быть разделены с помощью пробела или знака табуляции.
Для работы расширения типа `plain` необходимо указать путь до WordNet тезауруса. Тезаурус должен содержать WordNet sense index.

View File

@ -21,7 +21,7 @@ GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.ta
- `user` — Пользователь ClickHouse.
`WITH GRANT OPTION` разрешает пользователю или роли выполнять запрос `GRANT`. Пользователь может выдавать только те привилегии, которые есть у него, той же или меньшей области действий.
`WITH REPLACE OPTION` заменяет все старые привилегии новыми привилегиями для `user` или `role`, Если не указано, добавьте новые привилегии для старых.
`WITH REPLACE OPTION` заменяет все старые привилегии новыми привилегиями для `user` или `role`, если не указано, добавляет новые привилегии.
## Синтаксис назначения ролей {#assign-role-syntax}
@ -34,7 +34,7 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US
- `user` — Пользователь ClickHouse.
`WITH ADMIN OPTION` присваивает привилегию [ADMIN OPTION](#admin-option-privilege) пользователю или роли.
`WITH REPLACE OPTION` заменяет все старые роли новыми ролями для пользователя `user` или `role`, Если не указано, добавьте новые роли в старые.
`WITH REPLACE OPTION` заменяет все старые роли новыми ролями для пользователя `user` или `role`, если не указано, добавляет новые новые роли.
## Использование {#grant-usage}

View File

@ -95,7 +95,7 @@ def build_for_lang(lang, args):
site_dir=site_dir,
strict=True,
theme=theme_cfg,
copyright='©20162021 ClickHouse, Inc.',
copyright='©20162022 ClickHouse, Inc.',
use_directory_urls=True,
repo_name='ClickHouse/ClickHouse',
repo_url='https://github.com/ClickHouse/ClickHouse/',

View File

@ -0,0 +1 @@
../../../en/faq/operations/multi-region-replication.md

View File

@ -407,7 +407,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
`query` 是一个预定义的`predefined_query_handler`查询它由ClickHouse在匹配HTTP请求并返回查询结果时执行。这是一个必须的配置。
以下是定义的[max_threads](../operations/settings/settings.md#settings-max_threads)和`max_alter_threads`设置, 然后查询系统表以检查这些设置是否设置成功。
以下是定义的[max_threads](../operations/settings/settings.md#settings-max_threads)和`max_final_threads`设置, 然后查询系统表以检查这些设置是否设置成功。
示例:
@ -430,9 +430,9 @@ $ curl -v 'http://localhost:8123/predefined_query'
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_alter_threads?max_threads=1&max_alter_threads=2'
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2'
1
max_alter_threads 2
max_final_threads 2
```
!!! note "警告"
@ -444,7 +444,7 @@ max_alter_threads 2
ClickHouse提取并执行与HTTP请求URL中的`query_param_name`值对应的值。`query_param_name`的默认值是`/query`。这是一个可选的配置。如果配置文件中没有定义,则不会传入参数。
为了试验这个功能,示例定义了[max_threads](../operations/settings/settings.md#settings-max_threads)和`max_alter_threads``queries`设置是否成功的值。
为了试验这个功能,示例定义了[max_threads](../operations/settings/settings.md#settings-max_threads)和`max_final_threads``queries`设置是否成功的值。
示例:
@ -462,9 +462,9 @@ ClickHouse提取并执行与HTTP请求URL中的`query_param_name`值对应的值
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_alter_threads=2&param_name_1=max_threads&param_name_2=max_alter_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_final_threads=2&param_name_1=max_threads&param_name_2=max_final_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
max_threads 1
max_alter_threads 2
max_final_threads 2
```
### static {#static}

View File

@ -1,29 +1,89 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# system.columns {#system-columns}
# 系统。列 {#system-columns}
此系统表包含所有表中列的信息。
包含有关所有表中列的信息。
你可以使用这个表来获得类似于 [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) 查询的信息,但是可以同时获得多个表的信息。
您可以使用此表获取类似于以下内容的信息 [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) 查询,但对于多个表一次
[临时表](../../sql-reference/statements/create/table.md#temporary-tables)中的列只在创建它们的会话中的 `system.columns` 中才可见,并且它们的 `database` 字段显示为空
`system.columns` 表包含以下列(列类型显示在括号中):
`system.columns` 表包含以下列 (括号中显示的是列类型):
- `database` (String) — Database name.
- `table` (String) — Table name.
- `name` (String) — Column name.
- `type` (String) — Column type.
- `default_kind` (String) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`)为默认值,如果没有定义,则为空字符串。
- `default_expression` (String) — Expression for the default value, or an empty string if it is not defined.
- `data_compressed_bytes` (UInt64) — The size of compressed data, in bytes.
- `data_uncompressed_bytes` (UInt64) — The size of decompressed data, in bytes.
- `marks_bytes` (UInt64) — The size of marks, in bytes.
- `comment` (String) — Comment on the column, or an empty string if it is not defined.
- `is_in_partition_key` (UInt8) — Flag that indicates whether the column is in the partition expression.
- `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression.
- `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression.
- `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression.
- `database` ([String](../../sql-reference/data-types/string.md)) — 数据库名称。
- `table` ([String](../../sql-reference/data-types/string.md)) — 表名。
- `name` ([String](../../sql-reference/data-types/string.md)) — 列名。
- `type` ([String](../../sql-reference/data-types/string.md)) — 列类型。
- `position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 列在表中的顺序位置从1开始。
- `default_kind` ([String](../../sql-reference/data-types/string.md)) — 默认值的表达式类型(`DEFAULT`, `MATERIALIZED`, `ALIAS`) ,如果没有定义,则为空字符串。
- `default_expression` ([String](../../sql-reference/data-types/string.md)) — 默认值的表达式,如果未定义则为空字符串。
- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 压缩数据的大小,以字节为单位。
- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 解压后的数据的大小,以字节为单位。
- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 标记的大小,以字节为单位。
- `comment` ([String](../../sql-reference/data-types/string.md)) — 列注释,如果没有定义,则为空字符串。
- `is_in_partition_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 列是否在分区表达式中的标志。
- `is_in_sorting_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 列是否在排序键表达式中的标志。
- `is_in_primary_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 列是否在主键表达式中的标志。
- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 列是否在采样键表达式中的标志。
- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — 压缩编码的名称。
- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 二进制数据、字符数据或文本数据和图像的最大长度(以字节为单位)。在 ClickHouse 中只对 `FixedString` 数据类型有意义。否则,将返回 `NULL` 值。
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 近似数字型数据、精确数字型数据、整数型数据或货币数据的精度。在 ClickHouse 中,对于整数类型是比特率(bitness),对于 `Decimal` 类型是十进制精度。否则,将返回 `NULL` 值。
- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 数字系统的基数是近似数字型数据、精确数字型数据、整数型数据或货币数据的精度。在 ClickHouse 中对于整数类型是2对于 `Decimal` 类型是10。否则将返回 `NULL` 值。
- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 近似数字型数据、精确数字型数据、整数型数据或货币数据的比例。在 ClickHouse 中只对 `Decimal` 类型有意义。否则,将返回 `NULL` 值。
- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — `DateTime64` 数据类型的小数精度。对于其他数据类型,将返回 `NULL` 值。
**示例**
```sql
SELECT * FROM system.columns LIMIT 2 FORMAT Vertical;
```
```text
Row 1:
──────
database: INFORMATION_SCHEMA
table: COLUMNS
name: table_catalog
type: String
position: 1
default_kind:
default_expression:
data_compressed_bytes: 0
data_uncompressed_bytes: 0
marks_bytes: 0
comment:
is_in_partition_key: 0
is_in_sorting_key: 0
is_in_primary_key: 0
is_in_sampling_key: 0
compression_codec:
character_octet_length: ᴺᵁᴸᴸ
numeric_precision: ᴺᵁᴸᴸ
numeric_precision_radix: ᴺᵁᴸᴸ
numeric_scale: ᴺᵁᴸᴸ
datetime_precision: ᴺᵁᴸᴸ
Row 2:
──────
database: INFORMATION_SCHEMA
table: COLUMNS
name: table_schema
type: String
position: 2
default_kind:
default_expression:
data_compressed_bytes: 0
data_uncompressed_bytes: 0
marks_bytes: 0
comment:
is_in_partition_key: 0
is_in_sorting_key: 0
is_in_primary_key: 0
is_in_sampling_key: 0
compression_codec:
character_octet_length: ᴺᵁᴸᴸ
numeric_precision: ᴺᵁᴸᴸ
numeric_precision_radix: ᴺᵁᴸᴸ
numeric_scale: ᴺᵁᴸᴸ
datetime_precision: ᴺᵁᴸᴸ
```
[原文](https://clickhouse.com/docs/zh/operations/system-tables/columns) <!--hide-->

View File

@ -1,15 +1,10 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# system.contributors {#system-contributors}
# 系统。贡献者 {#system-contributors}
包含有关贡献者的信息。 该顺序在查询执行时是随机的。
此系统表包含有关贡献者的信息。排列顺序是在查询执行时随机生成的。
列:
- `name` (String) — Contributor (author) name from git log.
- `name` (String) — git 日志中的贡献者 (作者) 名字。
**示例**
@ -32,7 +27,7 @@ SELECT * FROM system.contributors LIMIT 10
└──────────────────┘
```
要在表中找出自己,请使用查询:
要在表中找到你自己,请这样查询:
``` sql
SELECT * FROM system.contributors WHERE name = 'Olga Khvostikova'
@ -43,3 +38,5 @@ SELECT * FROM system.contributors WHERE name = 'Olga Khvostikova'
│ Olga Khvostikova │
└──────────────────┘
```
[原文](https://clickhouse.com/docs/zh/operations/system-tables/contributors) <!--hide-->

View File

@ -1,14 +1,11 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# system.detached_parts {#system_tables-detached_parts}
# 系统。detached_parts {#system_tables-detached_parts}
包含关于 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 表的分离分区的信息。`reason` 列详细说明了该分区被分离的原因。
包含有关分离部分的信息 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 桌子 该 `reason` 列指定分离部件的原因
对于用户分离的分区,原因是空的。你可以通过 [ALTER TABLE ATTACH PARTITION\|PART](../../sql-reference/statements/alter/partition.md#alter_attach-partition) 命令添加这些分区
对于用户分离的部件,原因是空的。 这些部件可以附加 [ALTER TABLE ATTACH PARTITION\|PART](../../sql-reference/statements/alter.md#alter_attach-partition) 指挥部
关于其他列的描述,请参见 [system.parts](../../operations/system-tables/parts.md#system_tables-parts)。
有关其他列的说明,请参阅 [系统。零件](../../operations/system-tables/parts.md#system_tables-parts).
如果分区名称无效,一些列的值可能是`NULL`。你可以通过[ALTER TABLE DROP DETACHED PART](../../sql-reference/statements/alter/partition.md#alter_drop-detached)来删除这些分区。
如果部件名称无效,某些列的值可能为 `NULL`. 这些部分可以删除 [ALTER TABLE DROP DETACHED PART](../../sql-reference/statements/alter.md#alter_drop-detached).
[原文](https://clickhouse.com/docs/zh/operations/system-tables/detached_parts) <!--hide-->

View File

@ -1,19 +1,14 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# system.metrics {#system_tables-metrics}
# 系统。指标 {#system_tables-metrics}
包含可以立即计算或具有当前值的指标。 例如,同时处理的查询的数量或当前副本的延迟。 此表始终是最新的。
此系统表包含可以即时计算或具有当前值的指标。例如,同时处理的查询数量或当前的复制延迟。这个表始终是最新的。
列:
- `metric` ([字符串](../../sql-reference/data-types/string.md)) — Metric name.
- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value.
- `description` ([字符串](../../sql-reference/data-types/string.md)) — Metric description.
- `metric` ([字符串](../../sql-reference/data-types/string.md)) — 指标名称.
- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — 指标的值.
- `description` ([字符串](../../sql-reference/data-types/string.md)) — 指标的描述.
支持的指标列表,您可以在 [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) ClickHouse的源文件。
对于支持的指标列表,您可以查看 [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) ClickHouse 的源文件。
**示例**
@ -38,7 +33,7 @@ SELECT * FROM system.metrics LIMIT 10
**另请参阅**
- [系统。asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
- [系统。活动](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred.
- [系统。metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
- [测](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — 包含周期性的计算指标。
- [system.events](../../operations/system-tables/events.md#system_tables-events) — 包含发生的一些事件。
- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — 包含`system.metrics`表和`system.events`表的历史指标值。
- [控](../../operations/monitoring.md) — ClickHouse 监控的基本概念。

View File

@ -1,12 +1,32 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# system.numbers {#system-numbers}
# 系统。数字 {#system-numbers}
这个表有一个名为 `number` 的 UInt64 列,包含了几乎所有从 0 开始的自然数。
此表包含一个名为UInt64的列 `number` 它包含几乎所有从零开始的自然数
你可以用这个表进行测试,或者如果你需要进行暴力搜索
您可以使用此表进行测试,或者如果您需要进行暴力搜索
从该表的读取是不并行的
从此表中读取的内容不是并行的。
**示例**
```sql
:) SELECT * FROM system.numbers LIMIT 10;
```
```text
┌─number─┐
│ 0 │
│ 1 │
│ 2 │
│ 3 │
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
│ 9 │
└────────┘
10 rows in set. Elapsed: 0.001 sec.
```
[原文](https://clickhouse.com/docs/zh/operations/system-tables/numbers) <!--hide-->

View File

@ -1,85 +1,167 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# system.parts {#system_tables-parts}
# 系统。零件 {#system_tables-parts}
此系统表包含 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 表分区的相关信息。
包含有关的部分信息 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 桌子
每行描述一个数据部分。
每一行描述一个数据分区。
列:
- `partition` (String) The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter.md#query_language_queries_alter) 查询。
- `partition` ([String](../../sql-reference/data-types/string.md)) 分区名称。请参阅 [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) 查询的说明,来了解什么是分区
格式:
- `YYYYMM` 用于按月自动分区。
- `any_string` 手动分区时。
- `any_string` 手动分区时,是其他格式的字符串
- `name` (`String`) Name of the data part.
- `name` ([String](../../sql-reference/data-types/string.md)) 数据分区的名称。
- `active` (`UInt8`) Flag that indicates whether the data part is active. If a data part is active, it's used in a table. Otherwise, it's deleted. Inactive data parts remain after merging.
- `part_type` ([String](../../sql-reference/data-types/string.md)) — 数据分区的存储格式。
- `marks` (`UInt64`) The number of marks. To get the approximate number of rows in a data part, multiply `marks` 通过索引粒度通常为8192此提示不适用于自适应粒度
可能的值:
- `rows` (`UInt64`) The number of rows.
- `Wide` — 每一列在文件系统中的一个单独文件中存储。
- `Compact` — 所有列在文件系统中的一个文件中存储。
- `bytes_on_disk` (`UInt64`) Total size of all the data part files in bytes.
数据存储格式由 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 表的 `min_bytes_for_wide_part``min_rows_for_wide_part` 控制。
- `data_compressed_bytes` (`UInt64`) Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included.
- `active` ([UInt8](../../sql-reference/data-types/int-uint.md)) 指示数据分区是否处于活动状态的标志。如果数据分区处于活动状态,则此数据正在被表使用。反之,则不活跃(deleted)。合并后仍会保留非活跃的数据分区。
- `data_uncompressed_bytes` (`UInt64`) Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included.
- `marks` ([UInt64](../../sql-reference/data-types/int-uint.md)) 标记数。要获得数据分区中的大致行数:使用`marks`(标记数)乘以索引粒度(通常为 8192)。不适用于自适应颗粒度。
- `marks_bytes` (`UInt64`) The size of the file with marks.
- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) 行数.
- `modification_time` (`DateTime`) The time the directory with the data part was modified. This usually corresponds to the time of data part creation.\|
- `bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) 数据总大小(以字节为单位)。
- `remove_time` (`DateTime`) The time when the data part became inactive.
- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) 数据分区中压缩数据的总大小。不包括所有辅助文件(例如,带有标记的文件)。
- `refcount` (`UInt32`) The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges.
- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) 数据分区中未压缩数据的总大小。不包括所有辅助文件(例如,带有标记的文件)。
- `min_date` (`Date`) The minimum value of the date key in the data part.
- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) 带有标记的文件的大小。
- `max_date` (`Date`) The maximum value of the date key in the data part.
- `secondary_indices_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) 数据分区中二级索引的压缩数据总大小。所有的辅助文件(例如,带有标记的文件)都不包括在内。
- `min_time` (`DateTime`) The minimum value of the date and time key in the data part.
- `secondary_indices_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) 数据分区中二级索引的未压缩数据的总大小。所有的辅助文件(例如,带有标记的文件)都不包括在内。
- `max_time`(`DateTime`) The maximum value of the date and time key in the data part.
- `secondary_indices_marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) 带标记的二级索引的文件大小。
- `partition_id` (`String`) ID of the partition.
- `modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) 包含数据分区的目录被修改的时间。这通常对应于数据部分创建的时间。
- `min_block_number` (`UInt64`) The minimum number of data parts that make up the current part after merging.
- `remove_time` ([DateTime](../../sql-reference/data-types/datetime.md)) 数据分区变为非活动状态的时间。
- `max_block_number` (`UInt64`) The maximum number of data parts that make up the current part after merging.
- `refcount` ([UInt32](../../sql-reference/data-types/int-uint.md)) 使用数据部分的位置数。大于 2 的值表示数据部分用于查询或是用于合并。
- `level` (`UInt32`) Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts.
- `min_date` ([Date](../../sql-reference/data-types/date.md)) 数据部分中日期键的最小值。
- `data_version` (`UInt64`) Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than `data_version`).
- `max_date` ([Date](../../sql-reference/data-types/date.md)) 数据部分中日期键的最大值。
- `primary_key_bytes_in_memory` (`UInt64`) The amount of memory (in bytes) used by primary key values.
- `min_time` ([DateTime](../../sql-reference/data-types/datetime.md)) 数据部分中日期和时间键的最小值。
- `primary_key_bytes_in_memory_allocated` (`UInt64`) The amount of memory (in bytes) reserved for primary key values.
- `max_time`([DateTime](../../sql-reference/data-types/datetime.md)) 数据部分中日期和时间键的最大值。
- `is_frozen` (`UInt8`) Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup doesn't exist. For more details, see [FREEZE PARTITION](../../sql-reference/statements/alter.md#alter_freeze-partition)
- `partition_id` ([String](../../sql-reference/data-types/string.md)) 分区的 ID。
- `database` (`String`) Name of the database.
- `min_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) 合并后构成当前部分的最小数据部分数量。
- `table` (`String`) Name of the table.
- `max_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) 合并后构成当前部分的最大数据部分数量。
- `engine` (`String`) Name of the table engine without parameters.
- `level` ([UInt32](../../sql-reference/data-types/int-uint.md)) 合并树的深度。值为 0 表示该分区是通过插入创建的,而不是通过合并创建的。
- `path` (`String`) Absolute path to the folder with data part files.
- `data_version` ([UInt64](../../sql-reference/data-types/int-uint.md)) 用于确定应将哪些订正(mutations)应用于数据部分(版本高于 `data_version` 的订正(mutations))的数字。
- `disk` (`String`) Name of a disk that stores the data part.
- `primary_key_bytes_in_memory` ([UInt64](../../sql-reference/data-types/int-uint.md)) 主键值使用的内存量(以字节为单位)。
- `hash_of_all_files` (`String`) [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) 的压缩文件
- `primary_key_bytes_in_memory_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md)) 为主键值保留的内存量(以字节为单位)
- `hash_of_uncompressed_files` (`String`) [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) 未压缩的文件(带标记的文件,索引文件等。).
- `is_frozen` ([UInt8](../../sql-reference/data-types/int-uint.md)) 显示分区数据备份存在的标志。1备份存在。0备份不存在。更多细节见 [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md#alter_freeze-partition)。
- `uncompressed_hash_of_compressed_files` (`String`) [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) 压缩文件中的数据,就好像它们是未压缩的
- `database` ([String](../../sql-reference/data-types/string.md)) 数据库的名称
- `bytes` (`UInt64`) Alias for `bytes_on_disk`.
- `table` ([String](../../sql-reference/data-types/string.md)) 表的名称。
- `marks_size` (`UInt64`) Alias for `marks_bytes`.
- `engine` ([String](../../sql-reference/data-types/string.md)) 不带参数的表引擎名称。
- `path` ([String](../../sql-reference/data-types/string.md)) 包含数据部分文件的文件夹的绝对路径。
- `disk` ([String](../../sql-reference/data-types/string.md)) 存储数据部分的磁盘的名称。
- `hash_of_all_files` ([String](../../sql-reference/data-types/string.md)) 压缩文件的 [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128)。
- `hash_of_uncompressed_files` ([String](../../sql-reference/data-types/string.md)) 未压缩文件(带有标记的文件、索引文件等)的 [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128)。
- `uncompressed_hash_of_compressed_files` ([String](../../sql-reference/data-types/string.md)) 压缩文件中的数据(没有压缩时)的 [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128)。
- `delete_ttl_info_min` ([DateTime](../../sql-reference/data-types/datetime.md)) — [TTL DELETE 规则](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) 的日期和时间键的最小值。
- `delete_ttl_info_max` ([DateTime](../../sql-reference/data-types/datetime.md)) — [TTL DELETE 规则](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) 的日期和时间键的最大值。
- `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 表达式的数组。 每个表达式定义一个 [TTL MOVE 规则](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
!!! note "警告"
保留 `move_ttl_info.expression` 数组主要是为了向后兼容,现在检查 `TTL MOVE` 规则最简单的方法是使用 `move_ttl_info.min``move_ttl_info.max` 字段。
- `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — 日期值和时间值的数组。数组中的每个元素都描述了一个 [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) 的最小键值。
- `move_ttl_info.max` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — 日期值和时间值的数组。数组中的每个元素都描述了一个 [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) 的最大键值。
- `bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) `bytes_on_disk`的别名。
- `marks_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) `marks_bytes`的别名。
**示例**
``` sql
SELECT * FROM system.parts LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
partition: tuple()
name: all_1_4_1_6
part_type: Wide
active: 1
marks: 2
rows: 6
bytes_on_disk: 310
data_compressed_bytes: 157
data_uncompressed_bytes: 91
secondary_indices_compressed_bytes: 58
secondary_indices_uncompressed_bytes: 6
secondary_indices_marks_bytes: 48
marks_bytes: 144
modification_time: 2020-06-18 13:01:49
remove_time: 1970-01-01 00:00:00
refcount: 1
min_date: 1970-01-01
max_date: 1970-01-01
min_time: 1970-01-01 00:00:00
max_time: 1970-01-01 00:00:00
partition_id: all
min_block_number: 1
max_block_number: 4
level: 1
data_version: 6
primary_key_bytes_in_memory: 8
primary_key_bytes_in_memory_allocated: 64
is_frozen: 0
database: default
table: months
engine: MergeTree
disk_name: default
path: /var/lib/clickhouse/data/default/months/all_1_4_1_6/
hash_of_all_files: 2d0657a16d9430824d35e327fcbd87bf
hash_of_uncompressed_files: 84950cc30ba867c77a408ae21332ba29
uncompressed_hash_of_compressed_files: 1ad78f1c6843bbfb99a2c931abe7df7d
delete_ttl_info_min: 1970-01-01 00:00:00
delete_ttl_info_max: 1970-01-01 00:00:00
move_ttl_info.expression: []
move_ttl_info.min: []
move_ttl_info.max: []
```
**另请参阅**
- [MergeTree(合并树)家族](../../engines/table-engines/mergetree-family/mergetree.md)
- [列和表的 TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl)
[原文](https://clickhouse.com/docs/zh/operations/system-tables/parts) <!--hide-->

View File

@ -1,27 +1,22 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# system.settings {#system-tables-system-settings}
# 系统。设置 {#system-tables-system-settings}
包含有关当前用户的会话设置的信息。
包含当前用户会话设置的相关信息。
列:
- `name` ([字符串](../../sql-reference/data-types/string.md)) — Setting name.
- `value` ([字符串](../../sql-reference/data-types/string.md)) — Setting value.
- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting is changed from its default value.
- `description` ([字符串](../../sql-reference/data-types/string.md)) — Short setting description.
- `min` ([可为空](../../sql-reference/data-types/nullable.md)([字符串](../../sql-reference/data-types/string.md))) — Minimum value of the setting, if any is set via [制约因素](../../operations/settings/constraints-on-settings.md#constraints-on-settings). 如果设置没有最小值,则包含 [NULL](../../sql-reference/syntax.md#null-literal).
- `max` ([可为空](../../sql-reference/data-types/nullable.md)([字符串](../../sql-reference/data-types/string.md))) — Maximum value of the setting, if any is set via [制约因素](../../operations/settings/constraints-on-settings.md#constraints-on-settings). 如果设置没有最大值,则包含 [NULL](../../sql-reference/syntax.md#null-literal).
- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting:
- `0`Current user can change the setting.
- `1`Current user can't change the setting.
- `name` ([字符串](../../sql-reference/data-types/string.md)) — 设置名称。
- `value` ([字符串](../../sql-reference/data-types/string.md)) — 设置的值。
- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 显示该设置是否从其默认值修改。
- `description` ([字符串](../../sql-reference/data-types/string.md)) — 该设置的简要描述。
- `min` ([可为空](../../sql-reference/data-types/nullable.md)([字符串](../../sql-reference/data-types/string.md))) — 该设置的最小值,如果有最小值,则是通过[约束](../../operations/settings/constraints-on-settings.md#constraints-on-settings)设置的。如果该设置没有最小值,则包含 [NULL](../../sql-reference/syntax.md#null-literal).
- `max` ([可为空](../../sql-reference/data-types/nullable.md)([字符串](../../sql-reference/data-types/string.md))) — 该设置的最大值, 如果有最大值,则是通过[约束](../../operations/settings/constraints-on-settings.md#constraints-on-settings)设置的。如果该设置没有最大值,则包含 [NULL](../../sql-reference/syntax.md#null-literal).
- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 当前用户是否可以修改该设置:
- `0`当前用户可以修改此设置.
- `1`当前用户不能修改此设置.
**示例**
下面的示例演示如何获取有关名称包含的设置的信息 `min_i`.
下面的例子显示了如何获得设置名称中包含`min_i`的设置信息。
``` sql
SELECT *
@ -37,10 +32,10 @@ WHERE name LIKE '%min_i%'
└─────────────────────────────────────────────┴───────────┴─────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────┴──────┴──────────┘
```
使用 `WHERE changed` 可以是有用的,例如,当你想检查:
比如,当你想要检查以下情况时,使用 `WHERE changed` 会很有用:
- 配置文件中的设置是否正确加载并正在使用。
- 在当前会话中更改的设置。
- 配置文件中的设置是否正确加载并正在使用。
- 在当前会话中更改的设置。
<!-- -->
@ -52,4 +47,6 @@ SELECT * FROM system.settings WHERE changed AND name='load_balancing'
- [设置](../../operations/settings/index.md#session-settings-intro)
- [查询权限](../../operations/settings/permissions-for-queries.md#settings_readonly)
- [对设置的限制](../../operations/settings/constraints-on-settings.md)
- [对设置的约束](../../operations/settings/constraints-on-settings.md)
[原文](https://clickhouse.com/docs/zh/operations/system-tables/settings) <!--hide-->

View File

@ -12,4 +12,4 @@ toc_priority: 107
计算Pearson相关系数: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `corrStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `corrStable` 函数。 它的工作速度较慢,但提供较低的计算错误。

View File

@ -12,4 +12,4 @@ covarPop(x, y)
计算 `Σ((x - x̅)(y - y̅)) / n` 的值。
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarPopStable` 函数。 它的工作速度较慢,但提供了较低的计算错误。
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarPopStable` 函数。 它的工作速度较慢,但提供了较低的计算错误。

View File

@ -14,4 +14,4 @@ covarSamp(x, y)
返回Float64。 当 `n <= 1`, 返回 +∞。
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。

View File

@ -46,7 +46,7 @@ quantileTiming(level)(expr)
类型: `Float32`
!!! note "注"
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
**示例**

View File

@ -48,7 +48,7 @@ quantileTimingWeighted(level)(expr, weight)
类型: `Float32`
!!! note "注"
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
**示例**

View File

@ -4,7 +4,7 @@ toc_priority: 30
# stddevPop {#stddevpop}
结果等于 [varPop] (../../../sql-reference/aggregate-functions/reference/varpop.md)的平方根。
结果等于 [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md)的平方根。
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。

View File

@ -7,4 +7,4 @@ toc_priority: 31
结果等于 [varSamp] (../../../sql-reference/aggregate-functions/reference/varsamp.md)的平方根。
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。

View File

@ -9,4 +9,4 @@ toc_priority: 32
换句话说,计算一组数据的离差。 返回 `Float64`
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。

View File

@ -11,5 +11,5 @@ toc_priority: 33
返回 `Float64`。 当 `n <= 1`,返回 `+∞`
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。

View File

@ -1 +0,0 @@
../../../../en/sql-reference/statements/create/function.md

View File

@ -0,0 +1,60 @@
---
toc_priority: 38
toc_title: FUNCTION
---
# CREATE FUNCTION {#create-function}
用一个lambda表达式创建用户自定义函数。该表达式必须由函数参数、常数、运算符或其他函数调用组成。
**语法**
```sql
CREATE FUNCTION name AS (parameter0, ...) -> expression
```
一个函数可以有任意数量的参数。
存在一些限制如下:
- 函数名在用户自定义函数和系统函数中必须是唯一的。
- 递归函数是不允许的。
- 函数所使用的所有变量必须在其参数列表中指定。
如果违反了任何限制,就会产生异常。
**示例**
查询:
```sql
CREATE FUNCTION linear_equation AS (x, k, b) -> k*x + b;
SELECT number, linear_equation(number, 2, 1) FROM numbers(3);
```
结果:
``` text
┌─number─┬─plus(multiply(2, number), 1)─┐
│ 0 │ 1 │
│ 1 │ 3 │
│ 2 │ 5 │
└────────┴──────────────────────────────┘
```
在下面的查询中,[conditional function](../../../sql-reference/functions/conditional-functions.md)在用户自定义函数中被调用:
```sql
CREATE FUNCTION parity_str AS (n) -> if(n % 2, 'odd', 'even');
SELECT number, parity_str(number) FROM numbers(3);
```
结果:
``` text
┌─number─┬─if(modulo(number, 2), 'odd', 'even')─┐
│ 0 │ even │
│ 1 │ odd │
│ 2 │ even │
└────────┴──────────────────────────────────────┘
```

View File

@ -342,6 +342,9 @@ private:
}
}
/// Now we don't block the Ctrl+C signal and second signal will terminate the program without waiting.
interrupt_listener.unblock();
pool.wait();
total_watch.stop();
@ -586,7 +589,6 @@ public:
#ifndef __clang__
#pragma GCC optimize("-fno-var-tracking-assignments")
#endif
#pragma GCC diagnostic ignored "-Wmissing-declarations"
int mainEntryClickHouseBenchmark(int argc, char ** argv)
{

View File

@ -313,11 +313,11 @@ void LocalServer::cleanup()
std::string LocalServer::getInitialCreateTableQuery()
{
if (!config().has("table-structure"))
if (!config().has("table-structure") && !config().has("table-file"))
return {};
auto table_name = backQuoteIfNeed(config().getString("table-name", "table"));
auto table_structure = config().getString("table-structure");
auto table_structure = config().getString("table-structure", "auto");
auto data_format = backQuoteIfNeed(config().getString("table-data-format", "TSV"));
String table_file;
@ -332,7 +332,12 @@ std::string LocalServer::getInitialCreateTableQuery()
table_file = quoteString(config().getString("table-file"));
}
return fmt::format("CREATE TABLE {} ({}) ENGINE = File({}, {});",
if (table_structure == "auto")
table_structure = "";
else
table_structure = "(" + table_structure + ")";
return fmt::format("CREATE TABLE {} {} ENGINE = File({}, {});",
table_name, table_structure, data_format, table_file);
}
@ -422,7 +427,7 @@ try
#else
is_interactive = stdin_is_a_tty
&& (config().hasOption("interactive")
|| (!config().has("query") && !config().has("table-structure") && queries_files.empty()));
|| (!config().has("query") && !config().has("table-structure") && queries_files.empty() && !config().has("table-file")));
#endif
if (!is_interactive)
{

View File

@ -937,6 +937,12 @@ if (ThreadFuzzer::instance().isEffective())
if (config->has("max_concurrent_queries"))
global_context->getProcessList().setMaxSize(config->getInt("max_concurrent_queries", 0));
if (config->has("max_concurrent_insert_queries"))
global_context->getProcessList().setMaxInsertQueriesAmount(config->getInt("max_concurrent_insert_queries", 0));
if (config->has("max_concurrent_select_queries"))
global_context->getProcessList().setMaxSelectQueriesAmount(config->getInt("max_concurrent_select_queries", 0));
if (config->has("keeper_server"))
global_context->updateKeeperConfiguration(*config);

View File

@ -160,6 +160,7 @@ enum class AccessType
M(SYSTEM_FLUSH_DISTRIBUTED, "FLUSH DISTRIBUTED", TABLE, SYSTEM_FLUSH) \
M(SYSTEM_FLUSH_LOGS, "FLUSH LOGS", GLOBAL, SYSTEM_FLUSH) \
M(SYSTEM_FLUSH, "", GROUP, SYSTEM) \
M(SYSTEM_THREAD_FUZZER, "SYSTEM START THREAD FUZZER, SYSTEM STOP THREAD FUZZER, START THREAD FUZZER, STOP THREAD FUZZER", GLOBAL, SYSTEM) \
M(SYSTEM, "", GROUP, ALL) /* allows to execute SYSTEM {SHUTDOWN|RELOAD CONFIG|...} */ \
\
M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\

View File

@ -0,0 +1,20 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionNothing.h>
#include <AggregateFunctions/FactoryHelpers.h>
namespace DB
{
struct Settings;
void registerAggregateFunctionNothing(AggregateFunctionFactory & factory)
{
factory.registerFunction("nothing", [](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);
return std::make_shared<AggregateFunctionNothing>(argument_types, parameters);
});
}
}

View File

@ -4,6 +4,8 @@
#include <DataTypes/DataTypeNothing.h>
#include <Columns/IColumn.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
namespace DB
@ -26,7 +28,7 @@ public:
DataTypePtr getReturnType() const override
{
return argument_types.front();
return argument_types.empty() ? std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>()) : argument_types.front();
}
bool allocatesMemoryInArena() const override { return false; }
@ -62,12 +64,16 @@ public:
{
}
void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional<size_t>) const override
void serialize(ConstAggregateDataPtr __restrict, WriteBuffer & buf, std::optional<size_t>) const override
{
writeChar('\0', buf);
}
void deserialize(AggregateDataPtr, ReadBuffer &, std::optional<size_t>, Arena *) const override
void deserialize(AggregateDataPtr, ReadBuffer & buf, std::optional<size_t>, Arena *) const override
{
[[maybe_unused]] char symbol;
readChar(symbol, buf);
assert(symbol == '\0');
}
void insertResultInto(AggregateDataPtr, IColumn & to, Arena *) const override

View File

@ -50,6 +50,7 @@ void registerAggregateFunctionWelchTTest(AggregateFunctionFactory &);
void registerAggregateFunctionStudentTTest(AggregateFunctionFactory &);
void registerAggregateFunctionSingleValueOrNull(AggregateFunctionFactory &);
void registerAggregateFunctionSequenceNextNode(AggregateFunctionFactory &);
void registerAggregateFunctionNothing(AggregateFunctionFactory &);
void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory &);
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
@ -114,6 +115,7 @@ void registerAggregateFunctions()
registerAggregateFunctionSequenceNextNode(factory);
registerAggregateFunctionWelchTTest(factory);
registerAggregateFunctionStudentTTest(factory);
registerAggregateFunctionNothing(factory);
registerAggregateFunctionSingleValueOrNull(factory);
registerAggregateFunctionIntervalLengthSum(factory);
registerAggregateFunctionExponentialMovingAverage(factory);

View File

@ -529,6 +529,14 @@ if (USE_BZIP2)
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BZIP2_INCLUDE_DIR})
endif()
if(USE_SIMDJSON)
dbms_target_link_libraries(PRIVATE simdjson)
endif()
if(USE_RAPIDJSON)
dbms_target_include_directories(SYSTEM PRIVATE ${RAPIDJSON_INCLUDE_DIR})
endif()
dbms_target_link_libraries(PUBLIC consistent-hashing)
include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake")
@ -562,6 +570,7 @@ if (ENABLE_TESTS AND USE_GTEST)
clickhouse_storages_system
dbms
clickhouse_common_zookeeper
clickhouse_common_config
string_utils)
add_check(unit_tests_dbms)

View File

@ -800,7 +800,7 @@ void ClientBase::onProfileEvents(Block & block)
if (rows == 0)
return;
if (progress_indication.print_hardware_utilization)
if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS)
{
const auto & array_thread_id = typeid_cast<const ColumnUInt64 &>(*block.getByName("thread_id").column).getData();
const auto & names = typeid_cast<const ColumnString &>(*block.getByName("name").column);
@ -834,25 +834,25 @@ void ClientBase::onProfileEvents(Block & block)
}
auto elapsed_time = profile_events.watch.elapsedMicroseconds();
progress_indication.updateThreadEventData(thread_times, elapsed_time);
}
if (profile_events.print)
{
if (profile_events.watch.elapsedMilliseconds() >= profile_events.delay_ms)
if (profile_events.print)
{
initLogsOutputStream();
progress_indication.clearProgressOutput();
logs_out_stream->writeProfileEvents(block);
logs_out_stream->flush();
if (profile_events.watch.elapsedMilliseconds() >= profile_events.delay_ms)
{
initLogsOutputStream();
progress_indication.clearProgressOutput();
logs_out_stream->writeProfileEvents(block);
logs_out_stream->flush();
profile_events.last_block = {};
}
else
{
incrementProfileEventsBlock(profile_events.last_block, block);
profile_events.last_block = {};
}
else
{
incrementProfileEventsBlock(profile_events.last_block, block);
}
}
profile_events.watch.restart();
}
profile_events.watch.restart();
}
@ -1909,8 +1909,6 @@ void ClientBase::init(int argc, char ** argv)
Poco::Logger::root().setLevel(options["log-level"].as<std::string>());
if (options.count("server_logs_file"))
server_logs_file = options["server_logs_file"].as<std::string>();
if (options.count("hardware-utilization"))
progress_indication.print_hardware_utilization = true;
query_processing_stage = QueryProcessingStage::fromString(options["stage"].as<std::string>());
profile_events.print = options.count("print-profile-events");

View File

@ -4,6 +4,7 @@ set (SRCS
configReadClient.cpp
ConfigReloader.cpp
YAMLParser.cpp
ConfigHelper.cpp
)
add_library(clickhouse_common_config ${SRCS})

View File

@ -0,0 +1,23 @@
#include <Common/Config/ConfigHelper.h>
#include <Poco/Util/AbstractConfiguration.h>
namespace DB
{
namespace ConfigHelper
{
bool getBool(const Poco::Util::AbstractConfiguration & config, const std::string & key, bool default_, bool empty_as)
{
if (!config.has(key))
return default_;
Poco::Util::AbstractConfiguration::Keys sub_keys;
config.keys(key, sub_keys);
if (sub_keys.empty() && config.getString(key).empty())
return empty_as;
return config.getBool(key, default_);
}
}
}

View File

@ -0,0 +1,18 @@
#pragma once
namespace Poco
{
namespace Util
{
class AbstractConfiguration;
}
}
namespace DB::ConfigHelper
{
/// The behavior is like `config.getBool(key, default_)`,
/// except when the tag is empty (aka. self-closing), `empty_as` will be used instead of throwing Poco::Exception.
bool getBool(const Poco::Util::AbstractConfiguration & config, const std::string & key, bool default_, bool empty_as);
}

View File

@ -63,8 +63,10 @@
M(MaxDDLEntryID, "Max processed DDL entry of DDLWorker.") \
M(MaxPushedDDLEntryID, "Max DDL entry of DDLWorker that pushed to zookeeper.") \
M(PartsTemporary, "The part is generating now, it is not in data_parts list.") \
M(PartsPreCommitted, "The part is in data_parts, but not used for SELECTs.") \
M(PartsCommitted, "Active data part, used by current and upcoming SELECTs.") \
M(PartsPreCommitted, "Deprecated. See PartsPreActive.") \
M(PartsCommitted, "Deprecated. See PartsActive.") \
M(PartsPreActive, "The part is in data_parts, but not used for SELECTs.") \
M(PartsActive, "Active data part, used by current and upcoming SELECTs.") \
M(PartsOutdated, "Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes.") \
M(PartsDeleting, "Not active data part with identity refcounter, it is deleting right now by a cleaner.") \
M(PartsDeleteOnDestroy, "Part was moved to another disk and should be deleted in own destructor.") \

View File

@ -174,6 +174,20 @@ DateLUTImpl::DateLUTImpl(const std::string & time_zone_)
{
years_months_lut[year_months_lut_index] = first_day_of_last_month;
}
/// Fill saturated LUT.
{
ssize_t day = DATE_LUT_SIZE - 1;
for (; day >= 0; --day)
{
if (lut[day].date >= 0)
lut_saturated[day] = lut[day];
else
break;
}
for (; day >= 0; --day)
lut_saturated[day] = lut_saturated[day + 1];
}
}

View File

@ -61,6 +61,8 @@ private:
// has to be a separate type to support overloading
// TODO: make sure that any arithmetic on LUTIndex actually results in valid LUTIndex.
STRONG_TYPEDEF(UInt32, LUTIndex)
// Same as above but select different function overloads for zero saturation.
STRONG_TYPEDEF(UInt32, LUTIndexWithSaturation)
template <typename T>
friend inline LUTIndex operator+(const LUTIndex & index, const T v)
@ -182,6 +184,9 @@ private:
/// In comparison to std::vector, plain array is cheaper by one indirection.
Values lut[DATE_LUT_SIZE + 1];
/// Same as above but with dates < 1970-01-01 saturated to 1970-01-01.
Values lut_saturated[DATE_LUT_SIZE + 1];
/// Year number after DATE_LUT_MIN_YEAR -> LUTIndex in lut for start of year.
LUTIndex years_lut[DATE_LUT_YEARS];
@ -278,19 +283,39 @@ public:
auto getOffsetAtStartOfEpoch() const { return offset_at_start_of_epoch; }
auto getTimeOffsetAtStartOfLUT() const { return offset_at_start_of_lut; }
auto getDayNumOffsetEpoch() const { return daynum_offset_epoch; }
static auto getDayNumOffsetEpoch() { return daynum_offset_epoch; }
/// All functions below are thread-safe; arguments are not checked.
inline ExtendedDayNum toDayNum(ExtendedDayNum d) const
static ExtendedDayNum toDayNum(ExtendedDayNum d)
{
return d;
}
template <typename DateOrTime>
inline ExtendedDayNum toDayNum(DateOrTime v) const
static UInt32 saturateMinus(UInt32 x, UInt32 y)
{
return ExtendedDayNum{static_cast<ExtendedDayNum::UnderlyingType>(toLUTIndex(v).toUnderType() - daynum_offset_epoch)};
UInt32 res = x - y;
res &= -Int32(res <= x);
return res;
}
static ExtendedDayNum toDayNum(LUTIndex d)
{
return ExtendedDayNum{static_cast<ExtendedDayNum::UnderlyingType>(d.toUnderType() - daynum_offset_epoch)};
}
static DayNum toDayNum(LUTIndexWithSaturation d)
{
return DayNum{static_cast<DayNum::UnderlyingType>(saturateMinus(d.toUnderType(), daynum_offset_epoch))};
}
template <typename DateOrTime>
inline auto toDayNum(DateOrTime v) const
{
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
return DayNum{static_cast<DayNum::UnderlyingType>(saturateMinus(toLUTIndex(v).toUnderType(), daynum_offset_epoch))};
else
return ExtendedDayNum{static_cast<ExtendedDayNum::UnderlyingType>(toLUTIndex(v).toUnderType() - daynum_offset_epoch)};
}
/// Round down to start of monday.
@ -298,14 +323,20 @@ public:
inline Time toFirstDayOfWeek(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
return lut[i - (lut[i].day_of_week - 1)].date;
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
return lut_saturated[i - (lut[i].day_of_week - 1)].date;
else
return lut[i - (lut[i].day_of_week - 1)].date;
}
template <typename DateOrTime>
inline ExtendedDayNum toFirstDayNumOfWeek(DateOrTime v) const
inline auto toFirstDayNumOfWeek(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
return toDayNum(i - (lut[i].day_of_week - 1));
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
return toDayNum(LUTIndexWithSaturation(i - (lut[i].day_of_week - 1)));
else
return toDayNum(LUTIndex(i - (lut[i].day_of_week - 1)));
}
/// Round down to start of month.
@ -313,21 +344,30 @@ public:
inline Time toFirstDayOfMonth(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
return lut[i - (lut[i].day_of_month - 1)].date;
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
return lut_saturated[i - (lut[i].day_of_month - 1)].date;
else
return lut[i - (lut[i].day_of_month - 1)].date;
}
template <typename DateOrTime>
inline ExtendedDayNum toFirstDayNumOfMonth(DateOrTime v) const
inline auto toFirstDayNumOfMonth(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
return toDayNum(i - (lut[i].day_of_month - 1));
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
return toDayNum(LUTIndexWithSaturation(i - (lut[i].day_of_month - 1)));
else
return toDayNum(LUTIndex(i - (lut[i].day_of_month - 1)));
}
/// Round down to start of quarter.
template <typename DateOrTime>
inline ExtendedDayNum toFirstDayNumOfQuarter(DateOrTime v) const
inline auto toFirstDayNumOfQuarter(DateOrTime v) const
{
return toDayNum(toFirstDayOfQuarterIndex(v));
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
return toDayNum(LUTIndexWithSaturation(toFirstDayOfQuarterIndex(v)));
else
return toDayNum(LUTIndex(toFirstDayOfQuarterIndex(v)));
}
template <typename DateOrTime>
@ -365,9 +405,12 @@ public:
}
template <typename DateOrTime>
inline ExtendedDayNum toFirstDayNumOfYear(DateOrTime v) const
inline auto toFirstDayNumOfYear(DateOrTime v) const
{
return toDayNum(toFirstDayNumOfYearIndex(v));
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
return toDayNum(LUTIndexWithSaturation(toFirstDayNumOfYearIndex(v)));
else
return toDayNum(LUTIndex(toFirstDayNumOfYearIndex(v)));
}
inline Time toFirstDayOfNextMonth(Time t) const
@ -514,11 +557,17 @@ public:
* because the same calendar day starts/ends at different timestamps in different time zones)
*/
inline Time fromDayNum(DayNum d) const { return lut[toLUTIndex(d)].date; }
inline Time fromDayNum(DayNum d) const { return lut_saturated[toLUTIndex(d)].date; }
inline Time fromDayNum(ExtendedDayNum d) const { return lut[toLUTIndex(d)].date; }
template <typename DateOrTime>
inline Time toDate(DateOrTime v) const { return lut[toLUTIndex(v)].date; }
inline Time toDate(DateOrTime v) const
{
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
return lut_saturated[toLUTIndex(v)].date;
else
return lut[toLUTIndex(v)].date;
}
template <typename DateOrTime>
inline unsigned toMonth(DateOrTime v) const { return lut[toLUTIndex(v)].month; }
@ -581,9 +630,12 @@ public:
}
template <typename DateOrTime>
inline ExtendedDayNum toFirstDayNumOfISOYear(DateOrTime v) const
inline auto toFirstDayNumOfISOYear(DateOrTime v) const
{
return toDayNum(toFirstDayNumOfISOYearIndex(v));
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
return toDayNum(LUTIndexWithSaturation(toFirstDayNumOfISOYearIndex(v)));
else
return toDayNum(LUTIndex(toFirstDayNumOfISOYearIndex(v)));
}
inline Time toFirstDayOfISOYear(Time t) const
@ -596,7 +648,7 @@ public:
template <typename DateOrTime>
inline unsigned toISOWeek(DateOrTime v) const
{
return 1 + (toFirstDayNumOfWeek(v) - toFirstDayNumOfISOYear(v)) / 7;
return 1 + (toFirstDayNumOfWeek(v) - toDayNum(toFirstDayNumOfISOYearIndex(v))) / 7;
}
/*
@ -662,7 +714,7 @@ public:
{
if (!week_year_mode && ((first_weekday_mode && weekday != 0) || (!first_weekday_mode && weekday >= 4)))
return yw;
week_year_mode = 1;
week_year_mode = true;
(yw.first)--;
first_daynr -= (days = calc_days_in_year(yw.first));
weekday = (weekday + 53 * 7 - days) % 7;
@ -724,7 +776,7 @@ public:
/// Get first day of week with week_mode, return Sunday or Monday
template <typename DateOrTime>
inline ExtendedDayNum toFirstDayNumOfWeek(DateOrTime v, UInt8 week_mode) const
inline auto toFirstDayNumOfWeek(DateOrTime v, UInt8 week_mode) const
{
bool monday_first_mode = week_mode & static_cast<UInt8>(WeekModeFlag::MONDAY_FIRST);
if (monday_first_mode)
@ -733,7 +785,10 @@ public:
}
else
{
return (toDayOfWeek(v) != 7) ? ExtendedDayNum(v - toDayOfWeek(v)) : toDayNum(v);
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
return (toDayOfWeek(v) != 7) ? DayNum(saturateMinus(v, toDayOfWeek(v))) : toDayNum(v);
else
return (toDayOfWeek(v) != 7) ? ExtendedDayNum(v - toDayOfWeek(v)) : toDayNum(v);
}
}
@ -809,7 +864,7 @@ public:
}
template <typename DateOrTime>
inline ExtendedDayNum toStartOfYearInterval(DateOrTime v, UInt64 years) const
inline auto toStartOfYearInterval(DateOrTime v, UInt64 years) const
{
if (years == 1)
return toFirstDayNumOfYear(v);
@ -822,39 +877,59 @@ public:
if (unlikely(year < DATE_LUT_MIN_YEAR))
year = DATE_LUT_MIN_YEAR;
return toDayNum(years_lut[year - DATE_LUT_MIN_YEAR]);
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
return toDayNum(LUTIndexWithSaturation(years_lut[year - DATE_LUT_MIN_YEAR]));
else
return toDayNum(years_lut[year - DATE_LUT_MIN_YEAR]);
}
inline ExtendedDayNum toStartOfQuarterInterval(ExtendedDayNum d, UInt64 quarters) const
template <typename Date,
typename = std::enable_if_t<std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>>>
inline auto toStartOfQuarterInterval(Date d, UInt64 quarters) const
{
if (quarters == 1)
return toFirstDayNumOfQuarter(d);
return toStartOfMonthInterval(d, quarters * 3);
}
inline ExtendedDayNum toStartOfMonthInterval(ExtendedDayNum d, UInt64 months) const
template <typename Date,
typename = std::enable_if_t<std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>>>
inline auto toStartOfMonthInterval(Date d, UInt64 months) const
{
if (months == 1)
return toFirstDayNumOfMonth(d);
const Values & values = lut[toLUTIndex(d)];
UInt32 month_total_index = (values.year - DATE_LUT_MIN_YEAR) * 12 + values.month - 1;
return toDayNum(years_months_lut[month_total_index / months * months]);
if constexpr (std::is_same_v<Date, DayNum>)
return toDayNum(LUTIndexWithSaturation(years_months_lut[month_total_index / months * months]));
else
return toDayNum(years_months_lut[month_total_index / months * months]);
}
inline ExtendedDayNum toStartOfWeekInterval(ExtendedDayNum d, UInt64 weeks) const
template <typename Date,
typename = std::enable_if_t<std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>>>
inline auto toStartOfWeekInterval(Date d, UInt64 weeks) const
{
if (weeks == 1)
return toFirstDayNumOfWeek(d);
UInt64 days = weeks * 7;
// January 1st 1970 was Thursday so we need this 4-days offset to make weeks start on Monday.
return ExtendedDayNum(4 + (d - 4) / days * days);
if constexpr (std::is_same_v<Date, DayNum>)
return DayNum(4 + (d - 4) / days * days);
else
return ExtendedDayNum(4 + (d - 4) / days * days);
}
inline Time toStartOfDayInterval(ExtendedDayNum d, UInt64 days) const
template <typename Date,
typename = std::enable_if_t<std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>>>
inline Time toStartOfDayInterval(Date d, UInt64 days) const
{
if (days == 1)
return toDate(d);
return lut[toLUTIndex(ExtendedDayNum(d / days * days))].date;
if constexpr (std::is_same_v<Date, DayNum>)
return lut_saturated[toLUTIndex(ExtendedDayNum(d / days * days))].date;
else
return lut[toLUTIndex(ExtendedDayNum(d / days * days))].date;
}
inline Time toStartOfHourInterval(Time t, UInt64 hours) const
@ -1140,7 +1215,11 @@ public:
/// If resulting month has less deys than source month, then saturation can happen.
/// Example: 31 Aug + 1 month = 30 Sep.
inline Time NO_SANITIZE_UNDEFINED addMonths(Time t, Int64 delta) const
template <
typename DateTime,
typename
= std::enable_if_t<std::is_same_v<DateTime, UInt32> || std::is_same_v<DateTime, Int64> || std::is_same_v<DateTime, time_t>>>
inline Time NO_SANITIZE_UNDEFINED addMonths(DateTime t, Int64 delta) const
{
const auto result_day = addMonthsIndex(t, delta);
@ -1154,20 +1233,28 @@ public:
if (time >= lut[result_day].time_at_offset_change())
time -= lut[result_day].amount_of_offset_change();
return lut[result_day].date + time;
auto res = lut[result_day].date + time;
if constexpr (std::is_same_v<DateTime, UInt32>)
{
/// Common compiler should generate branchless code for this saturation operation.
return res <= 0 ? 0 : res;
}
else
return res;
}
inline ExtendedDayNum NO_SANITIZE_UNDEFINED addMonths(ExtendedDayNum d, Int64 delta) const
template <typename Date,
typename = std::enable_if_t<std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>>>
inline auto NO_SANITIZE_UNDEFINED addMonths(Date d, Int64 delta) const
{
return toDayNum(addMonthsIndex(d, delta));
if constexpr (std::is_same_v<Date, DayNum>)
return toDayNum(LUTIndexWithSaturation(addMonthsIndex(d, delta)));
else
return toDayNum(addMonthsIndex(d, delta));
}
inline Time NO_SANITIZE_UNDEFINED addQuarters(Time t, Int32 delta) const
{
return addMonths(t, static_cast<Int64>(delta) * 3);
}
inline ExtendedDayNum addQuarters(ExtendedDayNum d, Int32 delta) const
template <typename DateOrTime>
inline auto addQuarters(DateOrTime d, Int32 delta) const
{
return addMonths(d, static_cast<Int64>(delta) * 3);
}
@ -1189,7 +1276,11 @@ public:
}
/// Saturation can occur if 29 Feb is mapped to non-leap year.
inline Time addYears(Time t, Int64 delta) const
template <
typename DateTime,
typename
= std::enable_if_t<std::is_same_v<DateTime, UInt32> || std::is_same_v<DateTime, Int64> || std::is_same_v<DateTime, time_t>>>
inline Time addYears(DateTime t, Int64 delta) const
{
auto result_day = addYearsIndex(t, delta);
@ -1203,12 +1294,24 @@ public:
if (time >= lut[result_day].time_at_offset_change())
time -= lut[result_day].amount_of_offset_change();
return lut[result_day].date + time;
auto res = lut[result_day].date + time;
if constexpr (std::is_same_v<DateTime, UInt32>)
{
/// Common compiler should generate branchless code for this saturation operation.
return res <= 0 ? 0 : res;
}
else
return res;
}
inline ExtendedDayNum addYears(ExtendedDayNum d, Int64 delta) const
template <typename Date,
typename = std::enable_if_t<std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>>>
inline auto addYears(Date d, Int64 delta) const
{
return toDayNum(addYearsIndex(d, delta));
if constexpr (std::is_same_v<Date, DayNum>)
return toDayNum(LUTIndexWithSaturation(addYearsIndex(d, delta)));
else
return toDayNum(addYearsIndex(d, delta));
}

View File

@ -603,6 +603,8 @@
M(632, UNEXPECTED_DATA_AFTER_PARSED_VALUE) \
M(633, QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW) \
M(634, MONGODB_ERROR) \
M(635, CANNOT_POLL) \
M(636, CANNOT_EXTRACT_TABLE_STRUCTURE) \
\
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \

View File

@ -64,6 +64,18 @@ public:
setImpl(key, mapped, lock);
}
void remove(const Key & key)
{
std::lock_guard lock(mutex);
auto it = cells.find(key);
if (it == cells.end())
return;
auto & cell = it->second;
current_size -= cell.size;
queue.erase(cell.queue_iterator);
cells.erase(it);
}
/// If the value for the key is in the cache, returns it. If it is not, calls load_func() to
/// produce it, saves the result in the cache and returns it.
/// Only one of several concurrent threads calling getOrSet() will call load_func(),

View File

@ -0,0 +1,392 @@
#pragma once
#include <atomic>
#include <chrono>
#include <list>
#include <memory>
#include <mutex>
#include <unordered_map>
#include <unordered_set>
#include <base/logger_useful.h>
namespace DB
{
template <typename T>
struct TrivailLRUResourceCacheWeightFunction
{
size_t operator()(const T &) const { return 1; }
};
/**
* Similar to implementation in LRUCache.h, but with the difference that keys can
* only be evicted when they are releasable. Release state is controlled by this implementation.
* get() and getOrSet() methods return a Holder to actual value, which does release() in destructor.
*
* Warning (!): This implementation is in development, not to be used.
*/
template <typename TKey,
typename TMapped,
typename WeightFunction = TrivailLRUResourceCacheWeightFunction<TMapped>,
typename HashFunction = std::hash<TKey>>
class LRUResourceCache
{
public:
using Key = TKey;
using Mapped = TMapped;
using MappedPtr = std::shared_ptr<Mapped>;
class MappedHolder
{
public:
MappedHolder(LRUResourceCache * cache_, const Key & key_, MappedPtr value_)
: cache(cache_), key(key_), val(value_) {}
~MappedHolder() { cache->release(key); }
Mapped & value() { return *val; }
protected:
LRUResourceCache * cache;
Key key;
MappedPtr val;
};
using MappedHolderPtr = std::unique_ptr<MappedHolder>;
explicit LRUResourceCache(size_t max_weight_, size_t max_element_size_ = 0)
: max_weight(max_weight_), max_element_size(max_element_size_) {}
MappedHolderPtr get(const Key & key)
{
auto mapped_ptr = getImpl(key);
if (!mapped_ptr)
return nullptr;
return std::make_unique<MappedHolder>(this, key, mapped_ptr);
}
template <typename LoadFunc>
MappedHolderPtr getOrSet(const Key & key, LoadFunc && load_func)
{
auto mapped_ptr = getImpl(key, load_func);
if (!mapped_ptr)
return nullptr;
return std::make_unique<MappedHolder>(this, key, mapped_ptr);
}
// If the key's reference_count = 0, delete it immediately.
// Otherwise, mark it expired (not visible to get()), and delete when refcount is 0.
void tryRemove(const Key & key)
{
std::lock_guard lock(mutex);
auto it = cells.find(key);
if (it == cells.end())
return;
auto & cell = it->second;
if (cell.reference_count == 0)
{
queue.erase(cell.queue_iterator);
current_weight -= cell.weight;
cells.erase(it);
}
else
cell.expired = true;
}
size_t weight()
{
std::lock_guard lock(mutex);
return current_weight;
}
size_t size()
{
std::lock_guard lock(mutex);
return cells.size();
}
void getStats(size_t & out_hits, size_t & out_misses, size_t & out_evict_count) const
{
out_hits = hits;
out_misses = misses;
out_evict_count = evict_count;
}
private:
mutable std::mutex mutex;
using LRUQueue = std::list<Key>;
using LRUQueueIterator = typename LRUQueue::iterator;
struct Cell
{
MappedPtr value;
size_t weight = 0;
LRUQueueIterator queue_iterator;
size_t reference_count = 0;
bool expired = false;
};
using Cells = std::unordered_map<Key, Cell, HashFunction>;
Cells cells;
LRUQueue queue;
size_t current_weight = 0;
size_t max_weight = 0;
size_t max_element_size = 0;
/// Represents pending insertion attempt.
struct InsertToken
{
explicit InsertToken(LRUResourceCache & cache_) : cache(cache_) { }
std::mutex mutex;
bool cleaned_up = false; /// Protected by the token mutex
MappedPtr value; /// Protected by the token mutex
LRUResourceCache & cache;
size_t refcount = 0; /// Protected by the cache mutex
};
using InsertTokenById = std::unordered_map<Key, std::shared_ptr<InsertToken>, HashFunction>;
/// This class is responsible for removing used insert tokens from the insert_tokens map.
/// Among several concurrent threads the first successful one is responsible for removal. But if they all
/// fail, then the last one is responsible.
struct InsertTokenHolder
{
const Key * key = nullptr;
std::shared_ptr<InsertToken> token;
bool cleaned_up = false;
InsertTokenHolder() = default;
void
acquire(const Key * key_, const std::shared_ptr<InsertToken> & token_, [[maybe_unused]] std::lock_guard<std::mutex> & cache_lock)
{
key = key_;
token = token_;
++token->refcount;
}
void cleanup([[maybe_unused]] std::lock_guard<std::mutex> & token_lock, [[maybe_unused]] std::lock_guard<std::mutex> & cache_lock)
{
token->cache.insert_tokens.erase(*key);
token->cleaned_up = true;
cleaned_up = true;
}
~InsertTokenHolder()
{
if (!token)
return;
if (cleaned_up)
return;
std::lock_guard token_lock(token->mutex);
if (token->cleaned_up)
return;
std::lock_guard cache_lock(token->cache.mutex);
--token->refcount;
if (token->refcount == 0)
cleanup(token_lock, cache_lock);
}
};
friend struct InsertTokenHolder;
InsertTokenById insert_tokens;
WeightFunction weight_function;
std::atomic<size_t> hits{0};
std::atomic<size_t> misses{0};
std::atomic<size_t> evict_count{0};
/// Returns nullptr when there is no more space for the new value or the old value is in used.
template <typename LoadFunc>
MappedPtr getImpl(const Key & key, LoadFunc && load_func)
{
InsertTokenHolder token_holder;
{
std::lock_guard lock(mutex);
auto it = cells.find(key);
if (it != cells.end() && !it->second.expired)
{
if (!it->second.expired)
{
++hits;
it->second.reference_count += 1;
queue.splice(queue.end(), queue, it->second.queue_iterator);
return it->second.value;
}
else if (it->second.reference_count > 0)
return nullptr;
else
{
// should not reach here
LOG_ERROR(&Poco::Logger::get("LRUResourceCache"), "element is in invalid status.");
abort();
}
}
++misses;
auto & token = insert_tokens[key];
if (!token)
token = std::make_shared<InsertToken>(*this);
token_holder.acquire(&key, token, lock);
}
auto * token = token_holder.token.get();
std::lock_guard token_lock(token->mutex);
token_holder.cleaned_up = token->cleaned_up;
if (!token->value)
token->value = load_func();
std::lock_guard lock(mutex);
auto token_it = insert_tokens.find(key);
Cell * cell_ptr = nullptr;
if (token_it != insert_tokens.end() && token_it->second.get() == token)
{
cell_ptr = set(key, token->value);
}
else
{
auto cell_it = cells.find(key);
if (cell_it != cells.end() && !cell_it->second.expired)
{
cell_ptr = &cell_it->second;
}
}
if (!token->cleaned_up)
token_holder.cleanup(token_lock, lock);
if (cell_ptr)
{
queue.splice(queue.end(), queue, cell_ptr->queue_iterator);
cell_ptr->reference_count++;
return cell_ptr->value;
}
return nullptr;
}
MappedPtr getImpl(const Key & key)
{
std::lock_guard lock(mutex);
auto it = cells.find(key);
if (it == cells.end() || it->second.expired)
{
++misses;
return nullptr;
}
++hits;
it->second.reference_count += 1;
queue.splice(queue.end(), queue, it->second.queue_iterator);
return it->second.value;
}
// mark a reference is released
void release(const Key & key)
{
std::lock_guard lock(mutex);
auto it = cells.find(key);
if (it == cells.end() || it->second.reference_count == 0)
{
LOG_ERROR(&Poco::Logger::get("LRUResourceCache"), "try to release an invalid element");
abort();
}
auto & cell = it->second;
cell.reference_count -= 1;
if (cell.expired && cell.reference_count == 0)
{
queue.erase(cell.queue_iterator);
current_weight -= cell.weight;
cells.erase(it);
}
}
InsertToken * acquireInsertToken(const Key & key)
{
auto & token = insert_tokens[key];
token.reference_count += 1;
return &token;
}
void releaseInsertToken(const Key & key)
{
auto it = insert_tokens.find(key);
if (it != insert_tokens.end())
{
it->second.reference_count -= 1;
if (it->second.reference_count == 0)
insert_tokens.erase(it);
}
}
// key mustn't be in the cache
Cell * set(const Key & insert_key, MappedPtr value)
{
auto weight = value ? weight_function(*value) : 0;
auto queue_size = cells.size() + 1;
auto loss_weight = 0;
auto is_overflow = [&] {
return current_weight + weight - loss_weight > max_weight || (max_element_size != 0 && queue_size > max_element_size);
};
auto key_it = queue.begin();
std::unordered_set<Key, HashFunction> to_release_keys;
while (is_overflow() && queue_size > 1 && key_it != queue.end())
{
const Key & key = *key_it;
auto cell_it = cells.find(key);
if (cell_it == cells.end())
{
LOG_ERROR(&Poco::Logger::get("LRUResourceCache"), "LRUResourceCache became inconsistent. There must be a bug in it.");
abort();
}
auto & cell = cell_it->second;
if (cell.reference_count == 0)
{
loss_weight += cell.weight;
queue_size -= 1;
to_release_keys.insert(key);
}
++key_it;
}
if (is_overflow())
return nullptr;
if (loss_weight > current_weight + weight)
{
LOG_ERROR(&Poco::Logger::get("LRUResourceCache"), "LRUResourceCache became inconsistent. There must be a bug in it.");
abort();
}
for (auto & key : to_release_keys)
{
auto & cell = cells[key];
queue.erase(cell.queue_iterator);
cells.erase(key);
++evict_count;
}
current_weight = current_weight + weight - loss_weight;
auto & new_cell = cells[insert_key];
new_cell.value = value;
new_cell.weight = weight;
new_cell.queue_iterator = queue.insert(queue.end(), insert_key);
return &new_cell;
}
};
}

View File

@ -16,7 +16,7 @@ namespace
{
constexpr UInt64 ALL_THREADS = 0;
UInt64 calculateCoresNumber(DB::ThreadIdToTimeMap times, UInt64 elapsed)
double calculateCPUUsage(DB::ThreadIdToTimeMap times, UInt64 elapsed)
{
auto accumulated = std::accumulate(times.begin(), times.end(), 0,
[](Int64 acc, const auto & elem)
@ -25,7 +25,7 @@ namespace
return acc;
return acc + elem.second.time();
});
return (static_cast<UInt64>(accumulated) + elapsed - 1) / elapsed;
return static_cast<double>(accumulated) / elapsed;
}
}
@ -53,7 +53,7 @@ void ProgressIndication::resetProgress()
show_progress_bar = false;
written_progress_chars = 0;
write_progress_on_update = false;
host_active_cores.clear();
host_cpu_usage.clear();
thread_data.clear();
}
@ -81,8 +81,7 @@ void ProgressIndication::updateThreadEventData(HostToThreadTimesMap & new_thread
{
for (auto & new_host_map : new_thread_data)
{
auto new_cores = calculateCoresNumber(new_host_map.second, elapsed_time);
host_active_cores[new_host_map.first] = new_cores;
host_cpu_usage[new_host_map.first] = calculateCPUUsage(new_host_map.second, elapsed_time);
thread_data[new_host_map.first] = std::move(new_host_map.second);
}
}
@ -96,13 +95,12 @@ size_t ProgressIndication::getUsedThreadsCount() const
});
}
UInt64 ProgressIndication::getApproximateCoresNumber() const
double ProgressIndication::getCPUUsage() const
{
return std::accumulate(host_active_cores.cbegin(), host_active_cores.cend(), 0,
[](UInt64 acc, auto const & elem)
{
return acc + elem.second;
});
double res = 0;
for (const auto & elem : host_cpu_usage)
res += elem.second;
return res;
}
ProgressIndication::MemoryUsage ProgressIndication::getMemoryUsage() const
@ -116,6 +114,7 @@ ProgressIndication::MemoryUsage ProgressIndication::getMemoryUsage() const
// memory consumption it's enough to look for data with thread id 0.
if (auto it = host_data.second.find(ALL_THREADS); it != host_data.second.end())
host_usage = it->second.memory_usage;
return MemoryUsage{.total = acc.total + host_usage, .max = std::max(acc.max, host_usage)};
});
}
@ -183,27 +182,29 @@ void ProgressIndication::writeProgress()
written_progress_chars = message.count() - prefix_size - (strlen(indicator) - 2); /// Don't count invisible output (escape sequences).
// If approximate cores number is known, display it.
auto cores_number = getApproximateCoresNumber();
/// Display resource usage if possible.
std::string profiling_msg;
if (cores_number != 0 && print_hardware_utilization)
double cpu_usage = getCPUUsage();
auto [memory_usage, max_host_usage] = getMemoryUsage();
if (cpu_usage > 0 || memory_usage > 0)
{
WriteBufferFromOwnString profiling_msg_builder;
// Calculated cores number may be not accurate
// so it's better to print min(threads, cores).
UInt64 threads_number = getUsedThreadsCount();
profiling_msg_builder << " Running " << threads_number << " threads on "
<< std::min(cores_number, threads_number) << " cores";
auto [memory_usage, max_host_usage] = getMemoryUsage();
if (memory_usage != 0)
profiling_msg_builder << " with " << formatReadableSizeWithDecimalSuffix(memory_usage) << " RAM used";
if (thread_data.size() > 1 && max_host_usage)
profiling_msg_builder << " total (per host max: " << formatReadableSizeWithDecimalSuffix(max_host_usage) << ")";
profiling_msg_builder << ".";
profiling_msg_builder << "(" << fmt::format("{:.1f}", cpu_usage) << " CPU";
if (memory_usage > 0)
profiling_msg_builder << ", " << formatReadableSizeWithDecimalSuffix(memory_usage) << " RAM";
if (max_host_usage < memory_usage)
profiling_msg_builder << ", " << formatReadableSizeWithDecimalSuffix(max_host_usage) << " max/host";
profiling_msg_builder << ")";
profiling_msg = profiling_msg_builder.str();
}
int64_t remaining_space = static_cast<int64_t>(terminal_width) - written_progress_chars;
/// If the approximate number of rows to process is known, we can display a progress bar and percentage.
if (progress.total_rows_to_read || progress.total_raw_bytes_to_read)
{
@ -230,14 +231,35 @@ void ProgressIndication::writeProgress()
if (show_progress_bar)
{
ssize_t width_of_progress_bar = static_cast<ssize_t>(terminal_width) - written_progress_chars - strlen(" 99%") - profiling_msg.length();
/// We will display profiling info only if there is enough space for it.
int64_t width_of_progress_bar = remaining_space - strlen(" 99%");
/// We need at least twice the space, because it will be displayed either
/// at right after progress bar or at left on top of the progress bar.
if (width_of_progress_bar <= 1 + 2 * static_cast<int64_t>(profiling_msg.size()))
profiling_msg.clear();
else
width_of_progress_bar -= profiling_msg.size();
if (width_of_progress_bar > 0)
{
std::string bar
= UnicodeBar::render(UnicodeBar::getWidth(current_count, 0, max_count, width_of_progress_bar));
size_t bar_width = UnicodeBar::getWidth(current_count, 0, max_count, width_of_progress_bar);
std::string bar = UnicodeBar::render(bar_width);
/// Render profiling_msg at left on top of the progress bar.
bool render_profiling_msg_at_left = current_count * 2 >= max_count;
if (!profiling_msg.empty() && render_profiling_msg_at_left)
message << "\033[30;42m" << profiling_msg << "\033[0m";
message << "\033[0;32m" << bar << "\033[0m";
if (width_of_progress_bar > static_cast<ssize_t>(bar.size() / UNICODE_BAR_CHAR_SIZE))
/// Whitespaces after the progress bar.
if (width_of_progress_bar > static_cast<int64_t>(bar.size() / UNICODE_BAR_CHAR_SIZE))
message << std::string(width_of_progress_bar - bar.size() / UNICODE_BAR_CHAR_SIZE, ' ');
/// Render profiling_msg at right after the progress bar.
if (!profiling_msg.empty() && !render_profiling_msg_at_left)
message << "\033[2m" << profiling_msg << "\033[0m";
}
}
}
@ -245,8 +267,17 @@ void ProgressIndication::writeProgress()
/// Underestimate percentage a bit to avoid displaying 100%.
message << ' ' << (99 * current_count / max_count) << '%';
}
else
{
/// We can still display profiling info.
if (remaining_space >= static_cast<int64_t>(profiling_msg.size()))
{
if (remaining_space > static_cast<int64_t>(profiling_msg.size()))
message << std::string(remaining_space - profiling_msg.size(), ' ');
message << "\033[2m" << profiling_msg << "\033[0m";
}
}
message << profiling_msg;
message << CLEAR_TO_END_OF_LINE;
++increment;

View File

@ -60,13 +60,10 @@ public:
void updateThreadEventData(HostToThreadTimesMap & new_thread_data, UInt64 elapsed_time);
bool print_hardware_utilization = false;
private:
size_t getUsedThreadsCount() const;
UInt64 getApproximateCoresNumber() const;
double getCPUUsage() const;
struct MemoryUsage
{
@ -93,7 +90,7 @@ private:
bool write_progress_on_update = false;
std::unordered_map<String, UInt64> host_active_cores;
std::unordered_map<String, double> host_cpu_usage;
HostToThreadTimesMap thread_data;
};

View File

@ -65,14 +65,14 @@ ShellCommand::~ShellCommand()
size_t try_wait_timeout = config.terminate_in_destructor_strategy.wait_for_normal_exit_before_termination_seconds;
bool process_terminated_normally = tryWaitProcessWithTimeout(try_wait_timeout);
if (!process_terminated_normally)
{
LOG_TRACE(getLogger(), "Will kill shell command pid {} with SIGTERM", pid);
if (process_terminated_normally)
return;
int retcode = kill(pid, SIGTERM);
if (retcode != 0)
LOG_WARNING(getLogger(), "Cannot kill shell command pid {} errno '{}'", pid, errnoToString(retcode));
}
LOG_TRACE(getLogger(), "Will kill shell command pid {} with SIGTERM", pid);
int retcode = kill(pid, SIGTERM);
if (retcode != 0)
LOG_WARNING(getLogger(), "Cannot kill shell command pid {} errno '{}'", pid, errnoToString(retcode));
}
else
{
@ -91,7 +91,7 @@ bool ShellCommand::tryWaitProcessWithTimeout(size_t timeout_in_seconds)
{
int status = 0;
LOG_TRACE(getLogger(), "Try wait for shell command pid ({}) with timeout ({})", pid, timeout_in_seconds);
LOG_TRACE(getLogger(), "Try wait for shell command pid {} with timeout {}", pid, timeout_in_seconds);
wait_called = true;
struct timespec interval {.tv_sec = 1, .tv_nsec = 0};
@ -119,7 +119,9 @@ bool ShellCommand::tryWaitProcessWithTimeout(size_t timeout_in_seconds)
bool process_terminated_normally = (waitpid_res == pid);
if (process_terminated_normally)
{
return true;
}
else if (waitpid_res == 0)
{
--timeout_in_seconds;
@ -128,7 +130,9 @@ bool ShellCommand::tryWaitProcessWithTimeout(size_t timeout_in_seconds)
continue;
}
else if (waitpid_res == -1 && errno != EINTR)
{
return false;
}
}
return false;

View File

@ -86,7 +86,7 @@ namespace
/// https://stackoverflow.com/questions/32088140/multiple-string-tables-in-elf-object
void updateResources(std::string_view name, const void * address, SymbolIndex::Resources & resources)
void updateResources(ElfW(Addr) base_address, std::string_view object_name, std::string_view name, const void * address, SymbolIndex::Resources & resources)
{
const char * char_address = static_cast<const char *>(address);
@ -97,18 +97,23 @@ void updateResources(std::string_view name, const void * address, SymbolIndex::R
name = name.substr((name[0] == '_') + strlen("binary_"));
name = name.substr(0, name.size() - strlen("_start"));
resources.emplace(name, std::string_view{char_address, 0}); // NOLINT
resources.emplace(name, SymbolIndex::ResourcesBlob{
base_address,
object_name,
std::string_view{char_address, 0}, // NOLINT
});
}
else if (name.ends_with("_end"))
{
name = name.substr((name[0] == '_') + strlen("binary_"));
name = name.substr(0, name.size() - strlen("_end"));
if (auto it = resources.find(name); it != resources.end() && it->second.empty())
auto it = resources.find(name);
if (it != resources.end() && it->second.base_address == base_address && it->second.data.empty())
{
const char * start = it->second.data();
const char * start = it->second.data.data();
assert(char_address >= start);
it->second = std::string_view{start, static_cast<size_t>(char_address - start)};
it->second.data = std::string_view{start, static_cast<size_t>(char_address - start)};
}
}
}
@ -153,10 +158,12 @@ void collectSymbolsFromProgramHeaders(
size_t sym_cnt = 0;
for (const auto * it = dyn_begin; it->d_tag != DT_NULL; ++it)
{
ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr);
// TODO: this branch leads to invalid address of the hash table. Need further investigation.
// if (it->d_tag == DT_HASH)
// {
// const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(correct_address(info->dlpi_addr, it->d_un.d_ptr));
// const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(base_address);
// sym_cnt = hash[1];
// break;
// }
@ -167,7 +174,7 @@ void collectSymbolsFromProgramHeaders(
const uint32_t * buckets = nullptr;
const uint32_t * hashval = nullptr;
const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(correct_address(info->dlpi_addr, it->d_un.d_ptr));
const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(base_address);
buckets = hash + 4 + (hash[2] * sizeof(size_t) / 4);
@ -196,9 +203,11 @@ void collectSymbolsFromProgramHeaders(
const char * strtab = nullptr;
for (const auto * it = dyn_begin; it->d_tag != DT_NULL; ++it)
{
ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr);
if (it->d_tag == DT_STRTAB)
{
strtab = reinterpret_cast<const char *>(correct_address(info->dlpi_addr, it->d_un.d_ptr));
strtab = reinterpret_cast<const char *>(base_address);
break;
}
}
@ -208,10 +217,12 @@ void collectSymbolsFromProgramHeaders(
for (const auto * it = dyn_begin; it->d_tag != DT_NULL; ++it)
{
ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr);
if (it->d_tag == DT_SYMTAB)
{
/* Get the pointer to the first entry of the symbol table */
const ElfW(Sym) * elf_sym = reinterpret_cast<const ElfW(Sym) *>(correct_address(info->dlpi_addr, it->d_un.d_ptr));
const ElfW(Sym) * elf_sym = reinterpret_cast<const ElfW(Sym) *>(base_address);
/* Iterate over the symbol table */
for (ElfW(Word) sym_index = 0; sym_index < ElfW(Word)(sym_cnt); ++sym_index)
@ -236,7 +247,7 @@ void collectSymbolsFromProgramHeaders(
symbols.push_back(symbol);
/// But resources can be represented by a pair of empty symbols (indicating their boundaries).
updateResources(symbol.name, symbol.address_begin, resources);
updateResources(base_address, info->dlpi_name, symbol.name, symbol.address_begin, resources);
}
break;
@ -299,7 +310,7 @@ void collectSymbolsFromELFSymbolTable(
if (symbol_table_entry->st_size)
symbols.push_back(symbol);
updateResources(symbol.name, symbol.address_begin, resources);
updateResources(info->dlpi_addr, info->dlpi_name, symbol.name, symbol.address_begin, resources);
}
}

View File

@ -51,7 +51,7 @@ public:
std::string_view getResource(String name) const
{
if (auto it = data.resources.find(name); it != data.resources.end())
return it->second;
return it->second.data;
return {};
}
@ -59,7 +59,17 @@ public:
String getBuildID() const { return data.build_id; }
String getBuildIDHex() const;
using Resources = std::unordered_map<std::string_view /* symbol name */, std::string_view /* blob */>;
struct ResourcesBlob
{
/// Symbol can be presented in multiple shared objects,
/// base_address will be used to compare only symbols from the same SO.
ElfW(Addr) base_address;
/// Just a human name of the SO.
std::string_view object_name;
/// Data blob.
std::string_view data;
};
using Resources = std::unordered_map<std::string_view /* symbol name */, ResourcesBlob>;
struct Data
{

View File

@ -26,6 +26,7 @@ namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int BAD_ARGUMENTS;
}
}
@ -1133,4 +1134,54 @@ Coordination::RequestPtr makeCheckRequest(const std::string & path, int version)
return request;
}
std::string normalizeZooKeeperPath(std::string zookeeper_path, bool check_starts_with_slash, Poco::Logger * log)
{
if (!zookeeper_path.empty() && zookeeper_path.back() == '/')
zookeeper_path.resize(zookeeper_path.size() - 1);
/// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
if (!zookeeper_path.empty() && zookeeper_path.front() != '/')
{
/// Do not allow this for new tables, print warning for tables created in old versions
if (check_starts_with_slash)
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "ZooKeeper path must starts with '/', got '{}'", zookeeper_path);
if (log)
LOG_WARNING(log, "ZooKeeper path ('{}') does not start with '/'. It will not be supported in future releases");
zookeeper_path = "/" + zookeeper_path;
}
return zookeeper_path;
}
String extractZooKeeperName(const String & path)
{
static constexpr auto default_zookeeper_name = "default";
if (path.empty())
throw DB::Exception("ZooKeeper path should not be empty", DB::ErrorCodes::BAD_ARGUMENTS);
if (path[0] == '/')
return default_zookeeper_name;
auto pos = path.find(":/");
if (pos != String::npos && pos < path.find('/'))
{
auto zookeeper_name = path.substr(0, pos);
if (zookeeper_name.empty())
throw DB::Exception("Zookeeper path should start with '/' or '<auxiliary_zookeeper_name>:/'", DB::ErrorCodes::BAD_ARGUMENTS);
return zookeeper_name;
}
return default_zookeeper_name;
}
String extractZooKeeperPath(const String & path, bool check_starts_with_slash, Poco::Logger * log)
{
if (path.empty())
throw DB::Exception("ZooKeeper path should not be empty", DB::ErrorCodes::BAD_ARGUMENTS);
if (path[0] == '/')
return normalizeZooKeeperPath(path, check_starts_with_slash, log);
auto pos = path.find(":/");
if (pos != String::npos && pos < path.find('/'))
{
return normalizeZooKeeperPath(path.substr(pos + 1, String::npos), check_starts_with_slash, log);
}
return normalizeZooKeeperPath(path, check_starts_with_slash, log);
}
}

View File

@ -379,4 +379,11 @@ private:
};
using EphemeralNodeHolderPtr = EphemeralNodeHolder::Ptr;
String normalizeZooKeeperPath(std::string zookeeper_path, bool check_starts_with_slash, Poco::Logger * log = nullptr);
String extractZooKeeperName(const String & path);
String extractZooKeeperPath(const String & path, bool check_starts_with_slash, Poco::Logger * log = nullptr);
}

View File

@ -0,0 +1,37 @@
#include <Common/Config/ConfigHelper.h>
#include <Poco/AutoPtr.h>
#include <Poco/Util/XMLConfiguration.h>
#include <Poco/DOM/DOMParser.h>
#include <gtest/gtest.h>
using namespace DB;
TEST(Common, ConfigHelperGetBool)
{
std::string xml(R"CONFIG(<clickhouse>
<zero_as_false>0</zero_as_false>
<one_as_true>1</one_as_true>
<yes_as_true>Yes</yes_as_true>
<empty_as_true_1/>
<empty_as_true_2></empty_as_true_2>
<has_empty_child_1><empty_child/></has_empty_child_1>
<has_empty_child_2><empty_child/><child>1</child></has_empty_child_2>
<has_child_1><child>1</child></has_child_1>
<has_child_2><child0>Yes</child0><child>1</child></has_child_2>
</clickhouse>)CONFIG");
Poco::XML::DOMParser dom_parser;
Poco::AutoPtr<Poco::XML::Document> document = dom_parser.parseString(xml);
Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(document);
EXPECT_EQ(ConfigHelper::getBool(*config, "zero_as_false", false, true), false);
EXPECT_EQ(ConfigHelper::getBool(*config, "one_as_true", false, true), true);
EXPECT_EQ(ConfigHelper::getBool(*config, "yes_as_true", false, true), true);
EXPECT_EQ(ConfigHelper::getBool(*config, "empty_as_true_1", false, true), true);
EXPECT_EQ(ConfigHelper::getBool(*config, "empty_as_true_2", false, true), true);
ASSERT_THROW(ConfigHelper::getBool(*config, "has_empty_child_1", false, true), Poco::Exception);
EXPECT_EQ(ConfigHelper::getBool(*config, "has_empty_child_2", false, true), true);
EXPECT_EQ(ConfigHelper::getBool(*config, "has_child_1", false, true), true);
ASSERT_THROW(ConfigHelper::getBool(*config, "has_child_2", false, true), Poco::Exception);
}

View File

@ -0,0 +1,97 @@
#include <iomanip>
#include <iostream>
#include <gtest/gtest.h>
#include <Common/LRUCache.h>
TEST(LRUCache, set)
{
using SimpleLRUCache = DB::LRUCache<int, int>;
auto lru_cache = SimpleLRUCache(10, 10);
lru_cache.set(1, std::make_shared<int>(2));
lru_cache.set(2, std::make_shared<int>(3));
auto w = lru_cache.weight();
auto n = lru_cache.count();
ASSERT_EQ(w, 2);
ASSERT_EQ(n, 2);
}
TEST(LRUCache, update)
{
using SimpleLRUCache = DB::LRUCache<int, int>;
auto lru_cache = SimpleLRUCache(10, 10);
lru_cache.set(1, std::make_shared<int>(2));
lru_cache.set(1, std::make_shared<int>(3));
auto val = lru_cache.get(1);
ASSERT_TRUE(val != nullptr);
ASSERT_TRUE(*val == 3);
}
TEST(LRUCache, get)
{
using SimpleLRUCache = DB::LRUCache<int, int>;
auto lru_cache = SimpleLRUCache(10, 10);
lru_cache.set(1, std::make_shared<int>(2));
lru_cache.set(2, std::make_shared<int>(3));
SimpleLRUCache::MappedPtr value = lru_cache.get(1);
ASSERT_TRUE(value != nullptr);
ASSERT_EQ(*value, 2);
value = lru_cache.get(2);
ASSERT_TRUE(value != nullptr);
ASSERT_EQ(*value, 3);
}
struct ValueWeight
{
size_t operator()(const size_t & x) const { return x; }
};
TEST(LRUCache, evictOnSize)
{
using SimpleLRUCache = DB::LRUCache<int, size_t>;
auto lru_cache = SimpleLRUCache(20, 3);
lru_cache.set(1, std::make_shared<size_t>(2));
lru_cache.set(2, std::make_shared<size_t>(3));
lru_cache.set(3, std::make_shared<size_t>(4));
lru_cache.set(4, std::make_shared<size_t>(5));
auto n = lru_cache.count();
ASSERT_EQ(n, 3);
auto value = lru_cache.get(1);
ASSERT_TRUE(value == nullptr);
}
TEST(LRUCache, evictOnWeight)
{
using SimpleLRUCache = DB::LRUCache<int, size_t, std::hash<int>, ValueWeight>;
auto lru_cache = SimpleLRUCache(10, 10);
lru_cache.set(1, std::make_shared<size_t>(2));
lru_cache.set(2, std::make_shared<size_t>(3));
lru_cache.set(3, std::make_shared<size_t>(4));
lru_cache.set(4, std::make_shared<size_t>(5));
auto n = lru_cache.count();
ASSERT_EQ(n, 2);
auto w = lru_cache.weight();
ASSERT_EQ(w, 9);
auto value = lru_cache.get(1);
ASSERT_TRUE(value == nullptr);
value = lru_cache.get(2);
ASSERT_TRUE(value == nullptr);
}
TEST(LRUCache, getOrSet)
{
using SimpleLRUCache = DB::LRUCache<int, size_t, std::hash<int>, ValueWeight>;
auto lru_cache = SimpleLRUCache(10, 10);
size_t x = 10;
auto load_func = [&] { return std::make_shared<size_t>(x); };
auto [value, loaded] = lru_cache.getOrSet(1, load_func);
ASSERT_TRUE(value != nullptr);
ASSERT_TRUE(*value == 10);
}

View File

@ -0,0 +1,270 @@
#include <iomanip>
#include <iostream>
#include <gtest/gtest.h>
#include <Common/LRUResourceCache.h>
TEST(LRUResourceCache, get)
{
using MyCache = DB::LRUResourceCache<int, int>;
auto mcache = MyCache(10, 10);
int x = 10;
auto load_int = [&] { return std::make_shared<int>(x); };
auto holder1 = mcache.getOrSet(1, load_int);
x = 11;
auto holder2 = mcache.getOrSet(2, load_int);
ASSERT_TRUE(holder2 != nullptr);
ASSERT_TRUE(holder2->value() == 11);
auto holder3 = mcache.get(1);
ASSERT_TRUE(holder3 != nullptr);
ASSERT_TRUE(holder3->value() == 10);
}
TEST(LRUResourceCache, remove)
{
using MyCache = DB::LRUResourceCache<int, int>;
auto mcache = MyCache(10, 10);
int x = 10;
auto load_int = [&] { return std::make_shared<int>(x); };
auto holder0 = mcache.getOrSet(1, load_int);
auto holder1 = mcache.getOrSet(1, load_int);
mcache.tryRemove(1);
holder0 = mcache.get(1);
ASSERT_TRUE(holder0 == nullptr);
auto n = mcache.size();
ASSERT_TRUE(n == 1);
holder0.reset();
holder1.reset();
n = mcache.size();
ASSERT_TRUE(n == 0);
}
struct MyWeight
{
size_t operator()(const int & x) const { return static_cast<size_t>(x); }
};
TEST(LRUResourceCache, evictOnWweight)
{
using MyCache = DB::LRUResourceCache<int, int, MyWeight>;
auto mcache = MyCache(5, 10);
int x = 2;
auto load_int = [&] { return std::make_shared<int>(x); };
auto holder1 = mcache.getOrSet(1, load_int);
holder1.reset();
auto holder2 = mcache.getOrSet(2, load_int);
holder2.reset();
x = 3;
auto holder3 = mcache.getOrSet(3, load_int);
ASSERT_TRUE(holder3 != nullptr);
auto w = mcache.weight();
ASSERT_EQ(w, 5);
auto n = mcache.size();
ASSERT_EQ(n, 2);
holder1 = mcache.get(1);
ASSERT_TRUE(holder1 == nullptr);
holder2 = mcache.get(2);
ASSERT_TRUE(holder2 != nullptr);
holder3 = mcache.get(3);
ASSERT_TRUE(holder3 != nullptr);
}
TEST(LRUResourceCache, evictOnWeightV2)
{
using MyCache = DB::LRUResourceCache<int, int, MyWeight>;
auto mcache = MyCache(5, 10);
int x = 2;
auto load_int = [&] { return std::make_shared<int>(x); };
auto holder1 = mcache.getOrSet(1, load_int);
holder1.reset();
auto holder2 = mcache.getOrSet(2, load_int);
holder2.reset();
holder1 = mcache.get(1);
holder1.reset();
x = 3;
auto holder3 = mcache.getOrSet(3, load_int);
ASSERT_TRUE(holder3 != nullptr);
auto w = mcache.weight();
ASSERT_EQ(w, 5);
auto n = mcache.size();
ASSERT_EQ(n, 2);
holder1 = mcache.get(1);
ASSERT_TRUE(holder1 != nullptr);
holder2 = mcache.get(2);
ASSERT_TRUE(holder2 == nullptr);
holder3 = mcache.get(3);
ASSERT_TRUE(holder3 != nullptr);
}
TEST(LRUResourceCache, evictOnWeightV3)
{
using MyCache = DB::LRUResourceCache<int, int, MyWeight>;
auto mcache = MyCache(5, 10);
int x = 2;
auto load_int = [&] { return std::make_shared<int>(x); };
auto holder1 = mcache.getOrSet(1, load_int);
holder1.reset();
auto holder2 = mcache.getOrSet(2, load_int);
holder2.reset();
holder1 = mcache.getOrSet(1, load_int);
holder1.reset();
x = 3;
auto holder3 = mcache.getOrSet(3, load_int);
ASSERT_TRUE(holder3 != nullptr);
auto w = mcache.weight();
ASSERT_EQ(w, 5);
auto n = mcache.size();
ASSERT_EQ(n, 2);
holder1 = mcache.get(1);
ASSERT_TRUE(holder1 != nullptr);
holder2 = mcache.get(2);
ASSERT_TRUE(holder2 == nullptr);
holder3 = mcache.get(3);
ASSERT_TRUE(holder3 != nullptr);
}
TEST(LRUResourceCache, evictOnSize)
{
using MyCache = DB::LRUResourceCache<int, int>;
auto mcache = MyCache(5, 2);
int x = 2;
auto load_int = [&] { return std::make_shared<int>(x); };
auto holder1 = mcache.getOrSet(1, load_int);
holder1.reset();
auto holder2 = mcache.getOrSet(2, load_int);
holder2.reset();
x = 3;
auto holder3 = mcache.getOrSet(3, load_int);
ASSERT_TRUE(holder3 != nullptr);
auto n = mcache.size();
ASSERT_EQ(n, 2);
auto w = mcache.weight();
ASSERT_EQ(w, 2);
holder1 = mcache.get(1);
ASSERT_TRUE(holder1 == nullptr);
holder2 = mcache.get(2);
ASSERT_TRUE(holder2 != nullptr);
holder3 = mcache.get(3);
ASSERT_TRUE(holder3 != nullptr);
}
TEST(LRUResourceCache, notEvictUsedElement)
{
using MyCache = DB::LRUResourceCache<int, int, MyWeight>;
auto mcache = MyCache(7, 10);
int x = 2;
auto load_int = [&] { return std::make_shared<int>(x); };
auto holder1 = mcache.getOrSet(1, load_int);
auto holder2 = mcache.getOrSet(2, load_int);
holder2.reset();
auto holder3 = mcache.getOrSet(3, load_int);
holder3.reset();
x = 3;
auto holder4 = mcache.getOrSet(4, load_int);
ASSERT_TRUE(holder4 != nullptr);
auto n = mcache.size();
ASSERT_EQ(n, 3);
auto w = mcache.weight();
ASSERT_EQ(w, 7);
holder1 = mcache.get(1);
ASSERT_TRUE(holder1 != nullptr);
holder2 = mcache.get(2);
ASSERT_TRUE(holder2 == nullptr);
holder3 = mcache.get(3);
ASSERT_TRUE(holder3 != nullptr);
holder4 = mcache.get(4);
ASSERT_TRUE(holder4 != nullptr);
}
TEST(LRUResourceCache, getFail)
{
using MyCache = DB::LRUResourceCache<int, int, MyWeight>;
auto mcache = MyCache(5, 10);
int x = 2;
auto load_int = [&] { return std::make_shared<int>(x); };
auto holder1 = mcache.getOrSet(1, load_int);
auto holder2 = mcache.getOrSet(2, load_int);
auto holder3 = mcache.getOrSet(3, load_int);
ASSERT_TRUE(holder3 == nullptr);
auto n = mcache.size();
ASSERT_EQ(n, 2);
auto w = mcache.weight();
ASSERT_EQ(w, 4);
holder1 = mcache.get(1);
ASSERT_TRUE(holder1 != nullptr);
holder2 = mcache.get(2);
ASSERT_TRUE(holder2 != nullptr);
holder3 = mcache.get(3);
ASSERT_TRUE(holder3 == nullptr);
}
TEST(LRUResourceCache, dupGet)
{
using MyCache = DB::LRUResourceCache<int, int, MyWeight>;
auto mcache = MyCache(20, 10);
int x = 2;
auto load_int = [&] { return std::make_shared<int>(x); };
auto holder1 = mcache.getOrSet(1, load_int);
holder1.reset();
x = 11;
holder1 = mcache.getOrSet(1, load_int);
ASSERT_TRUE(holder1 != nullptr);
auto n = mcache.size();
ASSERT_EQ(n, 1);
auto w = mcache.weight();
ASSERT_EQ(w, 2);
holder1 = mcache.get(1);
ASSERT_TRUE(holder1 != nullptr);
ASSERT_TRUE(holder1->value() == 2);
}
TEST(LRUResourceCache, reGet)
{
using MyCache = DB::LRUResourceCache<int, int, MyWeight>;
auto mcache = MyCache(20, 10);
int x = 2;
auto load_int = [&] { return std::make_shared<int>(x); };
auto holder1 = mcache.getOrSet(1, load_int);
mcache.tryRemove(1);
x = 11;
holder1.reset();
holder1 = mcache.getOrSet(1, load_int);
ASSERT_TRUE(holder1 != nullptr);
auto n = mcache.size();
ASSERT_EQ(n, 1);
auto w = mcache.weight();
ASSERT_EQ(w, 11);
holder1 = mcache.get(1);
ASSERT_TRUE(holder1 != nullptr);
ASSERT_TRUE(holder1->value() == 11);
}

View File

@ -75,6 +75,17 @@ std::string checkAndGetSuperdigest(const String & user_and_digest)
return user_and_digest;
}
int32_t getValueOrMaxInt32AndLogWarning(uint64_t value, const std::string & name, Poco::Logger * log)
{
if (value > std::numeric_limits<int32_t>::max())
{
LOG_WARNING(log, "Got {} value for setting '{}' which is bigger than int32_t max value, lowering value to {}.", value, name, std::numeric_limits<int32_t>::max());
return std::numeric_limits<int32_t>::max();
}
return static_cast<int32_t>(value);
}
}
KeeperServer::KeeperServer(
@ -134,18 +145,18 @@ void KeeperServer::startup()
}
nuraft::raft_params params;
params.heart_beat_interval_ = coordination_settings->heart_beat_interval_ms.totalMilliseconds();
params.election_timeout_lower_bound_ = coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds();
params.election_timeout_upper_bound_ = coordination_settings->election_timeout_upper_bound_ms.totalMilliseconds();
params.reserved_log_items_ = coordination_settings->reserved_log_items;
params.snapshot_distance_ = coordination_settings->snapshot_distance;
params.stale_log_gap_ = coordination_settings->stale_log_gap;
params.fresh_log_gap_ = coordination_settings->fresh_log_gap;
params.client_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds();
params.heart_beat_interval_ = getValueOrMaxInt32AndLogWarning(coordination_settings->heart_beat_interval_ms.totalMilliseconds(), "heart_beat_interval_ms", log);
params.election_timeout_lower_bound_ = getValueOrMaxInt32AndLogWarning(coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds(), "election_timeout_lower_bound_ms", log);
params.election_timeout_upper_bound_ = getValueOrMaxInt32AndLogWarning(coordination_settings->election_timeout_upper_bound_ms.totalMilliseconds(), "election_timeout_upper_bound_ms", log);
params.reserved_log_items_ = getValueOrMaxInt32AndLogWarning(coordination_settings->reserved_log_items, "reserved_log_items", log);
params.snapshot_distance_ = getValueOrMaxInt32AndLogWarning(coordination_settings->snapshot_distance, "snapshot_distance", log);
params.stale_log_gap_ = getValueOrMaxInt32AndLogWarning(coordination_settings->stale_log_gap, "stale_log_gap", log);
params.fresh_log_gap_ = getValueOrMaxInt32AndLogWarning(coordination_settings->fresh_log_gap, "fresh_log_gap", log);
params.client_req_timeout_ = getValueOrMaxInt32AndLogWarning(coordination_settings->operation_timeout_ms.totalMilliseconds(), "operation_timeout_ms", log);
params.auto_forwarding_ = coordination_settings->auto_forwarding;
params.auto_forwarding_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds() * 2;
params.max_append_size_ = coordination_settings->max_requests_batch_size;
params.auto_forwarding_req_timeout_ = std::max<uint64_t>(coordination_settings->operation_timeout_ms.totalMilliseconds() * 2, std::numeric_limits<int32_t>::max());
params.auto_forwarding_req_timeout_ = getValueOrMaxInt32AndLogWarning(coordination_settings->operation_timeout_ms.totalMilliseconds() * 2, "operation_timeout_ms", log);
params.max_append_size_ = getValueOrMaxInt32AndLogWarning(coordination_settings->max_requests_batch_size, "max_requests_batch_size", log);
params.return_method_ = nuraft::raft_params::async_handler;

View File

@ -91,8 +91,7 @@ static bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, c
static bool fixupACL(
const std::vector<Coordination::ACL> & request_acls,
const std::vector<KeeperStorage::AuthID> & current_ids,
std::vector<Coordination::ACL> & result_acls,
bool hash_acls)
std::vector<Coordination::ACL> & result_acls)
{
if (request_acls.empty())
return true;
@ -125,8 +124,6 @@ static bool fixupACL(
return false;
valid_found = true;
if (hash_acls)
new_acl.id = generateDigest(new_acl.id);
result_acls.push_back(new_acl);
}
}
@ -310,7 +307,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr
KeeperStorage::Node created_node;
Coordination::ACLs node_acls;
if (!fixupACL(request.acls, session_auth_ids, node_acls, !request.restored_from_zookeeper_log))
if (!fixupACL(request.acls, session_auth_ids, node_acls))
{
response.error = Coordination::Error::ZINVALIDACL;
return {response_ptr, {}};
@ -778,7 +775,7 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr
auto & session_auth_ids = storage.session_and_auth[session_id];
Coordination::ACLs node_acls;
if (!fixupACL(request.acls, session_auth_ids, node_acls, !request.restored_from_zookeeper_log))
if (!fixupACL(request.acls, session_auth_ids, node_acls))
{
response.error = Coordination::Error::ZINVALIDACL;
return {response_ptr, {}};

View File

@ -169,7 +169,7 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header,
processors.push_back(std::move(sink));
processors.push_back(std::move(exception_handling));
auto executor = std::make_shared<PipelineExecutor>(processors);
auto executor = std::make_shared<PipelineExecutor>(processors, getContext()->getProcessListElement());
executor->execute(/*num_threads = */ 1);
/// We are ready to receive the next file, for this we clear all the information received

View File

@ -46,7 +46,6 @@ class IColumn;
M(UInt64, max_insert_threads, 0, "The maximum number of threads to execute the INSERT SELECT query. Values 0 or 1 means that INSERT SELECT is not run in parallel. Higher values will lead to higher memory usage. Parallel INSERT SELECT has effect only if the SELECT part is run on parallel, see 'max_threads' setting.", 0) \
M(UInt64, max_final_threads, 16, "The maximum number of threads to read from table with FINAL.", 0) \
M(MaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \
M(MaxThreads, max_alter_threads, 0, "The maximum number of threads to execute the ALTER requests. By default, it is determined automatically.", 0) \
M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \
M(UInt64, max_distributed_connections, 1024, "The maximum number of connections for distributed processing of one query (should be greater than max_threads).", 0) \
M(UInt64, max_query_size, DBMS_DEFAULT_MAX_QUERY_SIZE, "Which part of the query can be read into RAM for parsing (the remaining data for INSERT, if any, is read later)", 0) \
@ -572,7 +571,7 @@ class IColumn;
MAKE_OBSOLETE(M, UInt64, merge_tree_clear_old_temporary_directories_interval_seconds, 60) \
MAKE_OBSOLETE(M, UInt64, merge_tree_clear_old_parts_interval_seconds, 1) \
MAKE_OBSOLETE(M, UInt64, partial_merge_join_optimizations, 0) \
MAKE_OBSOLETE(M, MaxThreads, max_alter_threads, 0) \
/** The section above is for obsolete settings. Do not add anything there. */
@ -597,6 +596,8 @@ class IColumn;
M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \
M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \
M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \
M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \
M(UInt64, input_format_max_rows_to_read_for_schema_inference, 100, "The maximum rows of data to read for automatic schema inference", 0) \
\
M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \
M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \
@ -662,6 +663,7 @@ class IColumn;
M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \
\
M(EnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::EnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0)\
// End of FORMAT_FACTORY_SETTINGS
// Please add settings non-related to formats into the COMMON_SETTINGS above.

View File

@ -377,6 +377,8 @@ struct WhichDataType
constexpr bool isNullable() const { return idx == TypeIndex::Nullable; }
constexpr bool isFunction() const { return idx == TypeIndex::Function; }
constexpr bool isAggregateFunction() const { return idx == TypeIndex::AggregateFunction; }
constexpr bool isLowCarnality() const { return idx == TypeIndex::LowCardinality; }
};
/// IDataType helpers (alternative for IDataType virtual methods with single point of truth)

View File

@ -0,0 +1,33 @@
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/hasNullable.h>
namespace DB
{
bool hasNullable(const DataTypePtr & type)
{
if (type->isNullable() || type->isLowCardinalityNullable())
return true;
if (const DataTypeArray * type_array = typeid_cast<const DataTypeArray *>(type.get()))
return hasNullable(type_array->getNestedType());
else if (const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(type.get()))
{
for (const auto & subtype : type_tuple->getElements())
{
if (hasNullable(subtype))
return true;
}
return false;
}
else if (const DataTypeMap * type_map = typeid_cast<const DataTypeMap *>(type.get()))
{
// Key type cannot be nullable. We only check value type.
return hasNullable(type_map->getValueType());
}
return false;
}
}

View File

@ -0,0 +1,10 @@
#pragma once
#include <DataTypes/IDataType.h>
namespace DB
{
bool hasNullable(const DataTypePtr & type);
}

View File

@ -29,10 +29,13 @@ namespace
return nullptr;
DictionaryStructure dictionary_structure = ExternalDictionariesLoader::getDictionaryStructure(*load_result.config);
auto comment = load_result.config->config->getString("dictionary.comment", "");
return StorageDictionary::create(
StorageID(database_name, load_result.name),
load_result.name,
dictionary_structure,
comment,
StorageDictionary::Location::DictionaryDatabase,
context);
}

View File

@ -76,10 +76,16 @@ std::pair<String, StoragePtr> createTableFromAST(
/// - the database has not been loaded yet;
/// - the code is simpler, since the query is already brought to a suitable form.
if (!ast_create_query.columns_list || !ast_create_query.columns_list->columns)
throw Exception("Missing definition of columns.", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED);
columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context, true);
constraints = InterpreterCreateQuery::getConstraintsDescription(ast_create_query.columns_list->constraints);
{
if (!StorageFactory::instance().checkIfStorageSupportsSchemaInterface(ast_create_query.storage->engine->name))
throw Exception("Missing definition of columns.", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED);
/// Leave columns empty.
}
else
{
columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context, true);
constraints = InterpreterCreateQuery::getConstraintsDescription(ast_create_query.columns_list->constraints);
}
}
return

View File

@ -30,27 +30,33 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo
auto & ast_create_query = query->as<ASTCreateQuery &>();
bool has_structure = ast_create_query.columns_list && ast_create_query.columns_list->columns;
if (ast_create_query.as_table_function && !has_structure)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot alter table {} because it was created AS table function"
" and doesn't have structure in metadata", backQuote(ast_create_query.getTable()));
assert(has_structure);
ASTPtr new_columns = InterpreterCreateQuery::formatColumns(metadata.columns);
ASTPtr new_indices = InterpreterCreateQuery::formatIndices(metadata.secondary_indices);
ASTPtr new_constraints = InterpreterCreateQuery::formatConstraints(metadata.constraints);
ASTPtr new_projections = InterpreterCreateQuery::formatProjections(metadata.projections);
if (!has_structure && !ast_create_query.is_dictionary)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot alter table {} metadata doesn't have structure", backQuote(ast_create_query.getTable()));
ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns);
ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->indices, new_indices);
ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->constraints, new_constraints);
ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->projections, new_projections);
if (!ast_create_query.is_dictionary)
{
ASTPtr new_columns = InterpreterCreateQuery::formatColumns(metadata.columns);
ASTPtr new_indices = InterpreterCreateQuery::formatIndices(metadata.secondary_indices);
ASTPtr new_constraints = InterpreterCreateQuery::formatConstraints(metadata.constraints);
ASTPtr new_projections = InterpreterCreateQuery::formatProjections(metadata.projections);
ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns);
ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->indices, new_indices);
ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->constraints, new_constraints);
ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->projections, new_projections);
}
if (metadata.select.select_query)
{
query->replace(ast_create_query.select, metadata.select.select_query);
}
/// MaterializedView is one type of CREATE query without storage.
/// MaterializedView, Dictionary are types of CREATE query without storage.
if (ast_create_query.storage)
{
ASTStorage & storage_ast = *ast_create_query.storage;

View File

@ -1,10 +1,16 @@
#include "ExecutableDictionarySource.h"
#include <filesystem>
#include <boost/algorithm/string/split.hpp>
#include <base/logger_useful.h>
#include <Common/LocalDateTime.h>
#include <Common/filesystemHelpers.h>
#include <Common/ShellCommand.h>
#include <Processors/Sources/ShellCommandSource.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Formats/formatBlock.h>
#include <Interpreters/Context.h>
@ -27,15 +33,46 @@ namespace ErrorCodes
extern const int UNSUPPORTED_METHOD;
}
namespace
{
void updateCommandIfNeeded(String & command, bool execute_direct, ContextPtr context)
{
if (!execute_direct)
return;
auto global_context = context->getGlobalContext();
auto user_scripts_path = global_context->getUserScriptsPath();
auto script_path = user_scripts_path + '/' + command;
if (!fileOrSymlinkPathStartsWith(script_path, user_scripts_path))
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Executable file {} must be inside user scripts folder {}",
command,
user_scripts_path);
if (!std::filesystem::exists(std::filesystem::path(script_path)))
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Executable file {} does not exist inside user scripts folder {}",
command,
user_scripts_path);
command = std::move(script_path);
}
}
ExecutableDictionarySource::ExecutableDictionarySource(
const DictionaryStructure & dict_struct_,
const Configuration & configuration_,
Block & sample_block_,
std::shared_ptr<ShellCommandSourceCoordinator> coordinator_,
ContextPtr context_)
: log(&Poco::Logger::get("ExecutableDictionarySource"))
, dict_struct(dict_struct_)
, configuration(configuration_)
, sample_block{sample_block_}
, sample_block(sample_block_)
, coordinator(std::move(coordinator_))
, context(context_)
{
/// Remove keys from sample_block for implicit_key dictionary because
@ -58,6 +95,7 @@ ExecutableDictionarySource::ExecutableDictionarySource(const ExecutableDictionar
, dict_struct(other.dict_struct)
, configuration(other.configuration)
, sample_block(other.sample_block)
, coordinator(other.coordinator)
, context(Context::createCopy(other.context))
{
}
@ -69,11 +107,11 @@ Pipe ExecutableDictionarySource::loadAll()
LOG_TRACE(log, "loadAll {}", toString());
ShellCommand::Config config(configuration.command);
auto process = ShellCommand::execute(config);
const auto & coordinator_configuration = coordinator->getConfiguration();
auto command = configuration.command;
updateCommandIfNeeded(command, coordinator_configuration.execute_direct, context);
Pipe pipe(std::make_unique<ShellCommandSource>(context, configuration.format, sample_block, std::move(process)));
return pipe;
return coordinator->createPipe(command, configuration.command_arguments, sample_block, context);
}
Pipe ExecutableDictionarySource::loadUpdatedAll()
@ -82,17 +120,32 @@ Pipe ExecutableDictionarySource::loadUpdatedAll()
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "ExecutableDictionarySource with implicit_key does not support loadUpdatedAll method");
time_t new_update_time = time(nullptr);
SCOPE_EXIT(update_time = new_update_time);
std::string command_with_update_field = configuration.command;
const auto & coordinator_configuration = coordinator->getConfiguration();
auto command = configuration.command;
updateCommandIfNeeded(command, coordinator_configuration.execute_direct, context);
auto command_arguments = configuration.command_arguments;
if (update_time)
command_with_update_field += " " + configuration.update_field + " " + DB::toString(LocalDateTime(update_time - configuration.update_lag));
{
auto update_difference = DB::toString(LocalDateTime(update_time - configuration.update_lag));
LOG_TRACE(log, "loadUpdatedAll {}", command_with_update_field);
ShellCommand::Config config(command_with_update_field);
auto process = ShellCommand::execute(config);
Pipe pipe(std::make_unique<ShellCommandSource>(context, configuration.format, sample_block, std::move(process)));
return pipe;
if (coordinator_configuration.execute_direct)
{
command_arguments.emplace_back(configuration.update_field);
command_arguments.emplace_back(std::move(update_difference));
}
else
{
command += ' ' + configuration.update_field + ' ' + update_difference;
}
}
update_time = new_update_time;
LOG_TRACE(log, "loadUpdatedAll {}", command);
return coordinator->createPipe(command, command_arguments, sample_block, context);
}
Pipe ExecutableDictionarySource::loadIds(const std::vector<UInt64> & ids)
@ -113,27 +166,17 @@ Pipe ExecutableDictionarySource::loadKeys(const Columns & key_columns, const std
Pipe ExecutableDictionarySource::getStreamForBlock(const Block & block)
{
ShellCommand::Config config(configuration.command);
auto process = ShellCommand::execute(config);
auto * process_in = &process->in;
const auto & coordinator_configuration = coordinator->getConfiguration();
String command = configuration.command;
updateCommandIfNeeded(command, coordinator_configuration.execute_direct, context);
ShellCommandSource::SendDataTask task = {[process_in, block, this]()
{
auto & out = *process_in;
auto source = std::make_shared<SourceFromSingleChunk>(block);
auto shell_input_pipe = Pipe(std::move(source));
if (configuration.send_chunk_header)
{
writeText(block.rows(), out);
writeChar('\n', out);
}
Pipes shell_input_pipes;
shell_input_pipes.emplace_back(std::move(shell_input_pipe));
auto output_format = context->getOutputFormat(configuration.format, out, block.cloneEmpty());
formatBlock(output_format, block);
out.close();
}};
std::vector<ShellCommandSource::SendDataTask> tasks = {std::move(task)};
Pipe pipe(std::make_unique<ShellCommandSource>(context, configuration.format, sample_block, std::move(process), std::move(tasks)));
auto pipe = coordinator->createPipe(command, configuration.command_arguments, std::move(shell_input_pipes), sample_block, context);
if (configuration.implicit_key)
pipe.addTransform(std::make_shared<TransformWithAdditionalColumns>(block, pipe.getHeader()));
@ -189,17 +232,40 @@ void registerDictionarySourceExecutable(DictionarySourceFactory & factory)
std::string settings_config_prefix = config_prefix + ".executable";
bool execute_direct = config.getBool(settings_config_prefix + ".execute_direct", false);
std::string command_value = config.getString(settings_config_prefix + ".command");
std::vector<String> command_arguments;
if (execute_direct)
{
boost::split(command_arguments, command_value, [](char c) { return c == ' '; });
command_value = std::move(command_arguments[0]);
command_arguments.erase(command_arguments.begin());
}
ExecutableDictionarySource::Configuration configuration
{
.command = config.getString(settings_config_prefix + ".command"),
.format = config.getString(settings_config_prefix + ".format"),
.command = std::move(command_value),
.command_arguments = std::move(command_arguments),
.update_field = config.getString(settings_config_prefix + ".update_field", ""),
.update_lag = config.getUInt64(settings_config_prefix + ".update_lag", 1),
.implicit_key = config.getBool(settings_config_prefix + ".implicit_key", false),
.send_chunk_header = config.getBool(settings_config_prefix + ".send_chunk_header", false)
};
return std::make_unique<ExecutableDictionarySource>(dict_struct, configuration, sample_block, context);
ShellCommandSourceCoordinator::Configuration shell_command_coordinator_configration
{
.format = config.getString(settings_config_prefix + ".format"),
.command_termination_timeout_seconds = config.getUInt64(settings_config_prefix + ".command_termination_timeout", 10),
.command_read_timeout_milliseconds = config.getUInt64(settings_config_prefix + ".command_read_timeout", 10000),
.command_write_timeout_milliseconds = config.getUInt64(settings_config_prefix + ".command_write_timeout", 10000),
.is_executable_pool = false,
.send_chunk_header = config.getBool(settings_config_prefix + ".send_chunk_header", false),
.execute_direct = config.getBool(settings_config_prefix + ".execute_direct", false)
};
auto coordinator = std::make_shared<ShellCommandSourceCoordinator>(shell_command_coordinator_configration);
return std::make_unique<ExecutableDictionarySource>(dict_struct, configuration, sample_block, std::move(coordinator), context);
};
factory.registerSource("executable", create_table_source);

View File

@ -7,6 +7,7 @@
#include <Dictionaries/IDictionarySource.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Processors/Sources/ShellCommandSource.h>
namespace DB
@ -20,20 +21,19 @@ public:
struct Configuration
{
std::string command;
std::string format;
std::vector<std::string> command_arguments;
std::string update_field;
UInt64 update_lag;
/// Implicit key means that the source script will return only values,
/// and the correspondence to the requested keys is determined implicitly - by the order of rows in the result.
bool implicit_key;
/// Send number_of_rows\n before sending chunk to process
bool send_chunk_header;
};
ExecutableDictionarySource(
const DictionaryStructure & dict_struct_,
const Configuration & configuration_,
Block & sample_block_,
std::shared_ptr<ShellCommandSourceCoordinator> coordinator_,
ContextPtr context_);
ExecutableDictionarySource(const ExecutableDictionarySource & other);
@ -69,6 +69,7 @@ private:
const DictionaryStructure dict_struct;
const Configuration configuration;
Block sample_block;
std::shared_ptr<ShellCommandSourceCoordinator> coordinator;
ContextPtr context;
};

View File

@ -1,14 +1,20 @@
#include "ExecutablePoolDictionarySource.h"
#include <filesystem>
#include <boost/algorithm/string/split.hpp>
#include <base/logger_useful.h>
#include <Common/LocalDateTime.h>
#include <Common/filesystemHelpers.h>
#include <Common/ShellCommand.h>
#include <Processors/Formats/IOutputFormat.h>
#include <Processors/Sources/ShellCommandSource.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Formats/formatBlock.h>
#include <Interpreters/Context.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <Dictionaries/DictionarySourceFactory.h>
#include <Dictionaries/DictionarySourceHelpers.h>
@ -23,20 +29,19 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int DICTIONARY_ACCESS_DENIED;
extern const int UNSUPPORTED_METHOD;
extern const int TIMEOUT_EXCEEDED;
}
ExecutablePoolDictionarySource::ExecutablePoolDictionarySource(
const DictionaryStructure & dict_struct_,
const Configuration & configuration_,
Block & sample_block_,
std::shared_ptr<ShellCommandSourceCoordinator> coordinator_,
ContextPtr context_)
: dict_struct(dict_struct_)
, configuration(configuration_)
, sample_block(sample_block_)
, coordinator(std::move(coordinator_))
, context(context_)
/// If pool size == 0 then there is no size restrictions. Poco max size of semaphore is integer type.
, process_pool(std::make_shared<ProcessPool>(configuration.pool_size == 0 ? std::numeric_limits<int>::max() : configuration.pool_size))
, log(&Poco::Logger::get("ExecutablePoolDictionarySource"))
{
/// Remove keys from sample_block for implicit_key dictionary because
@ -59,8 +64,8 @@ ExecutablePoolDictionarySource::ExecutablePoolDictionarySource(const ExecutableP
: dict_struct(other.dict_struct)
, configuration(other.configuration)
, sample_block(other.sample_block)
, coordinator(other.coordinator)
, context(Context::createCopy(other.context))
, process_pool(std::make_shared<ProcessPool>(configuration.pool_size))
, log(&Poco::Logger::get("ExecutablePoolDictionarySource"))
{
}
@ -93,41 +98,47 @@ Pipe ExecutablePoolDictionarySource::loadKeys(const Columns & key_columns, const
Pipe ExecutablePoolDictionarySource::getStreamForBlock(const Block & block)
{
std::unique_ptr<ShellCommand> process;
bool result = process_pool->tryBorrowObject(process, [this]()
String command = configuration.command;
const auto & coordinator_configuration = coordinator->getConfiguration();
if (coordinator_configuration.execute_direct)
{
ShellCommand::Config config(configuration.command);
config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy{ true /*terminate_in_destructor*/, configuration.command_termination_timeout };
auto shell_command = ShellCommand::execute(config);
return shell_command;
}, configuration.max_command_execution_time * 10000);
auto global_context = context->getGlobalContext();
auto user_scripts_path = global_context->getUserScriptsPath();
auto script_path = user_scripts_path + '/' + command;
if (!result)
throw Exception(ErrorCodes::TIMEOUT_EXCEEDED,
"Could not get process from pool, max command execution timeout exceeded {} seconds",
configuration.max_command_execution_time);
if (!fileOrSymlinkPathStartsWith(script_path, user_scripts_path))
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Executable file {} must be inside user scripts folder {}",
command,
user_scripts_path);
size_t rows_to_read = block.rows();
auto * process_in = &process->in;
ShellCommandSource::SendDataTask task = [process_in, block, this]() mutable
{
auto & out = *process_in;
if (!std::filesystem::exists(std::filesystem::path(script_path)))
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Executable file {} does not exist inside user scripts folder {}",
command,
user_scripts_path);
if (configuration.send_chunk_header)
{
writeText(block.rows(), out);
writeChar('\n', out);
}
command = std::move(script_path);
}
auto output_format = context->getOutputFormat(configuration.format, out, block.cloneEmpty());
formatBlock(output_format, block);
};
std::vector<ShellCommandSource::SendDataTask> tasks = {std::move(task)};
auto source = std::make_shared<SourceFromSingleChunk>(block);
auto shell_input_pipe = Pipe(std::move(source));
ShellCommandSourceConfiguration command_configuration;
command_configuration.read_fixed_number_of_rows = true;
command_configuration.number_of_rows_to_read = rows_to_read;
Pipe pipe(std::make_unique<ShellCommandSource>(context, configuration.format, sample_block, std::move(process), std::move(tasks), command_configuration, process_pool));
command_configuration.number_of_rows_to_read = block.rows();
Pipes shell_input_pipes;
shell_input_pipes.emplace_back(std::move(shell_input_pipe));
auto pipe = coordinator->createPipe(
command,
configuration.command_arguments,
std::move(shell_input_pipes),
sample_block,
context,
command_configuration);
if (configuration.implicit_key)
pipe.addTransform(std::make_shared<TransformWithAdditionalColumns>(block, pipe.getHeader()));
@ -157,7 +168,8 @@ DictionarySourcePtr ExecutablePoolDictionarySource::clone() const
std::string ExecutablePoolDictionarySource::toString() const
{
return "ExecutablePool size: " + std::to_string(configuration.pool_size) + " command: " + configuration.command;
size_t pool_size = coordinator->getConfiguration().pool_size;
return "ExecutablePool size: " + std::to_string(pool_size) + " command: " + configuration.command;
}
void registerDictionarySourceExecutablePool(DictionarySourceFactory & factory)
@ -189,18 +201,40 @@ void registerDictionarySourceExecutablePool(DictionarySourceFactory & factory)
if (max_execution_time_seconds != 0 && max_command_execution_time > max_execution_time_seconds)
max_command_execution_time = max_execution_time_seconds;
bool execute_direct = config.getBool(settings_config_prefix + ".execute_direct", false);
std::string command_value = config.getString(settings_config_prefix + ".command");
std::vector<String> command_arguments;
if (execute_direct)
{
boost::split(command_arguments, command_value, [](char c) { return c == ' '; });
command_value = std::move(command_arguments[0]);
command_arguments.erase(command_arguments.begin());
}
ExecutablePoolDictionarySource::Configuration configuration
{
.command = config.getString(settings_config_prefix + ".command"),
.format = config.getString(settings_config_prefix + ".format"),
.pool_size = config.getUInt64(settings_config_prefix + ".size"),
.command_termination_timeout = config.getUInt64(settings_config_prefix + ".command_termination_timeout", 10),
.max_command_execution_time = max_command_execution_time,
.command = std::move(command_value),
.command_arguments = std::move(command_arguments),
.implicit_key = config.getBool(settings_config_prefix + ".implicit_key", false),
.send_chunk_header = config.getBool(settings_config_prefix + ".send_chunk_header", false)
};
return std::make_unique<ExecutablePoolDictionarySource>(dict_struct, configuration, sample_block, context);
ShellCommandSourceCoordinator::Configuration shell_command_coordinator_configration
{
.format = config.getString(settings_config_prefix + ".format"),
.command_termination_timeout_seconds = config.getUInt64(settings_config_prefix + ".command_termination_timeout", 10),
.command_read_timeout_milliseconds = config.getUInt64(settings_config_prefix + ".command_read_timeout", 10000),
.command_write_timeout_milliseconds = config.getUInt64(settings_config_prefix + ".command_write_timeout", 10000),
.pool_size = config.getUInt64(settings_config_prefix + ".pool_size", 16),
.max_command_execution_time_seconds = max_command_execution_time,
.is_executable_pool = true,
.send_chunk_header = config.getBool(settings_config_prefix + ".send_chunk_header", false),
.execute_direct = execute_direct
};
auto coordinator = std::make_shared<ShellCommandSourceCoordinator>(shell_command_coordinator_configration);
return std::make_unique<ExecutablePoolDictionarySource>(dict_struct, configuration, sample_block, std::move(coordinator), context);
};
factory.registerSource("executable_pool", create_table_source);

View File

@ -28,21 +28,15 @@ public:
struct Configuration
{
String command;
String format;
size_t pool_size;
size_t command_termination_timeout;
size_t max_command_execution_time;
/// Implicit key means that the source script will return only values,
/// and the correspondence to the requested keys is determined implicitly - by the order of rows in the result.
std::vector<String> command_arguments;
bool implicit_key;
/// Send number_of_rows\n before sending chunk to process
bool send_chunk_header;
};
ExecutablePoolDictionarySource(
const DictionaryStructure & dict_struct_,
const Configuration & configuration_,
Block & sample_block_,
std::shared_ptr<ShellCommandSourceCoordinator> coordinator_,
ContextPtr context_);
ExecutablePoolDictionarySource(const ExecutablePoolDictionarySource & other);
@ -77,8 +71,8 @@ private:
const Configuration configuration;
Block sample_block;
std::shared_ptr<ShellCommandSourceCoordinator> coordinator;
ContextPtr context;
std::shared_ptr<ProcessPool> process_pool;
Poco::Logger * log;
};

View File

@ -591,7 +591,9 @@ void HashedDictionary<dictionary_key_type, sparse>::loadData()
}
}
else
{
updateData();
}
if (configuration.require_nonempty && 0 == element_count)
throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY,

View File

@ -7,6 +7,8 @@
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/IDataType.h>
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/join.hpp>
@ -26,6 +28,7 @@ namespace ErrorCodes
extern const int FILE_DOESNT_EXIST;
extern const int UNKNOWN_EXCEPTION;
extern const int INCORRECT_DATA;
extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
}
capnp::StructSchema CapnProtoSchemaParser::getMessageSchema(const FormatSchemaInfo & schema_info)
@ -427,6 +430,113 @@ void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Blo
}
}
template <typename ValueType>
static DataTypePtr getEnumDataTypeFromEnumerants(const capnp::EnumSchema::EnumerantList & enumerants)
{
std::vector<std::pair<String, ValueType>> values;
for (auto enumerant : enumerants)
values.emplace_back(enumerant.getProto().getName(), ValueType(enumerant.getOrdinal()));
return std::make_shared<DataTypeEnum<ValueType>>(std::move(values));
}
static DataTypePtr getEnumDataTypeFromEnumSchema(const capnp::EnumSchema & enum_schema)
{
auto enumerants = enum_schema.getEnumerants();
if (enumerants.size() < 128)
return getEnumDataTypeFromEnumerants<Int8>(enumerants);
if (enumerants.size() < 32768)
return getEnumDataTypeFromEnumerants<Int16>(enumerants);
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "ClickHouse supports only 8 and 16-but Enums");
}
static DataTypePtr getDataTypeFromCapnProtoType(const capnp::Type & capnp_type)
{
switch (capnp_type.which())
{
case capnp::schema::Type::INT8:
return std::make_shared<DataTypeInt8>();
case capnp::schema::Type::INT16:
return std::make_shared<DataTypeInt16>();
case capnp::schema::Type::INT32:
return std::make_shared<DataTypeInt32>();
case capnp::schema::Type::INT64:
return std::make_shared<DataTypeInt64>();
case capnp::schema::Type::BOOL: [[fallthrough]];
case capnp::schema::Type::UINT8:
return std::make_shared<DataTypeUInt8>();
case capnp::schema::Type::UINT16:
return std::make_shared<DataTypeUInt16>();
case capnp::schema::Type::UINT32:
return std::make_shared<DataTypeUInt32>();
case capnp::schema::Type::UINT64:
return std::make_shared<DataTypeUInt64>();
case capnp::schema::Type::FLOAT32:
return std::make_shared<DataTypeFloat32>();
case capnp::schema::Type::FLOAT64:
return std::make_shared<DataTypeFloat64>();
case capnp::schema::Type::DATA: [[fallthrough]];
case capnp::schema::Type::TEXT:
return std::make_shared<DataTypeString>();
case capnp::schema::Type::ENUM:
return getEnumDataTypeFromEnumSchema(capnp_type.asEnum());
case capnp::schema::Type::LIST:
{
auto list_schema = capnp_type.asList();
auto nested_type = getDataTypeFromCapnProtoType(list_schema.getElementType());
return std::make_shared<DataTypeArray>(nested_type);
}
case capnp::schema::Type::STRUCT:
{
auto struct_schema = capnp_type.asStruct();
/// Check if it can be Nullable.
if (checkIfStructIsNamedUnion(struct_schema))
{
auto fields = struct_schema.getUnionFields();
if (fields.size() != 2 || (!fields[0].getType().isVoid() && !fields[1].getType().isVoid()))
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Unions are not supported");
auto value_type = fields[0].getType().isVoid() ? fields[1].getType() : fields[0].getType();
if (value_type.isStruct() || value_type.isList())
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Tuples and Lists cannot be inside Nullable");
auto nested_type = getDataTypeFromCapnProtoType(value_type);
return std::make_shared<DataTypeNullable>(nested_type);
}
if (checkIfStructContainsUnnamedUnion(struct_schema))
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Unnamed union is not supported");
/// Treat Struct as Tuple.
DataTypes nested_types;
Names nested_names;
for (auto field : struct_schema.getNonUnionFields())
{
nested_names.push_back(field.getProto().getName());
nested_types.push_back(getDataTypeFromCapnProtoType(field.getType()));
}
return std::make_shared<DataTypeTuple>(std::move(nested_types), std::move(nested_names));
}
default:
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Unsupported CapnProtoType: {}", getCapnProtoFullTypeName(capnp_type));
}
}
NamesAndTypesList capnProtoSchemaToCHSchema(const capnp::StructSchema & schema)
{
if (checkIfStructContainsUnnamedUnion(schema))
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Unnamed union is not supported");
NamesAndTypesList names_and_types;
for (auto field : schema.getNonUnionFields())
{
auto name = field.getProto().getName();
auto type = getDataTypeFromCapnProtoType(field.getType());
names_and_types.emplace_back(name, type);
}
return names_and_types;
}
}
#endif

View File

@ -38,6 +38,7 @@ capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Re
void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Block & header, FormatSettings::EnumComparingMode mode);
NamesAndTypesList capnProtoSchemaToCHSchema(const capnp::StructSchema & schema);
}
#endif

Some files were not shown because too many files have changed in this diff Show More