Merge branch 'master' into master

This commit is contained in:
mergify[bot] 2022-03-10 16:55:27 +00:00 committed by GitHub
commit 9f4ebc313b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
262 changed files with 3287 additions and 1085 deletions

2
contrib/icu vendored

@ -1 +1 @@
Subproject commit faa2f9f9e1fe74c5ed00eba371d2830134cdbea1
Subproject commit a56dde820dc35665a66f2e9ee8ba58e75049b668

View File

@ -212,7 +212,9 @@ set(ICUUC_SOURCES
"${ICU_SOURCE_DIR}/common/ubiditransform.cpp"
"${ICU_SOURCE_DIR}/common/pluralmap.cpp"
"${ICU_SOURCE_DIR}/common/static_unicode_sets.cpp"
"${ICU_SOURCE_DIR}/common/restrace.cpp")
"${ICU_SOURCE_DIR}/common/restrace.cpp"
"${ICU_SOURCE_DIR}/common/emojiprops.cpp"
"${ICU_SOURCE_DIR}/common/lstmbe.cpp")
set(ICUI18N_SOURCES
"${ICU_SOURCE_DIR}/i18n/ucln_in.cpp"
@ -398,7 +400,6 @@ set(ICUI18N_SOURCES
"${ICU_SOURCE_DIR}/i18n/sharedbreakiterator.cpp"
"${ICU_SOURCE_DIR}/i18n/scientificnumberformatter.cpp"
"${ICU_SOURCE_DIR}/i18n/dayperiodrules.cpp"
"${ICU_SOURCE_DIR}/i18n/nounit.cpp"
"${ICU_SOURCE_DIR}/i18n/number_affixutils.cpp"
"${ICU_SOURCE_DIR}/i18n/number_compact.cpp"
"${ICU_SOURCE_DIR}/i18n/number_decimalquantity.cpp"
@ -446,12 +447,21 @@ set(ICUI18N_SOURCES
"${ICU_SOURCE_DIR}/i18n/formattedvalue.cpp"
"${ICU_SOURCE_DIR}/i18n/formattedval_iterimpl.cpp"
"${ICU_SOURCE_DIR}/i18n/formattedval_sbimpl.cpp"
"${ICU_SOURCE_DIR}/i18n/formatted_string_builder.cpp")
"${ICU_SOURCE_DIR}/i18n/formatted_string_builder.cpp"
"${ICU_SOURCE_DIR}/i18n/measunit_extra.cpp"
"${ICU_SOURCE_DIR}/i18n/number_symbolswrapper.cpp"
"${ICU_SOURCE_DIR}/i18n/number_usageprefs.cpp"
"${ICU_SOURCE_DIR}/i18n/numrange_capi.cpp"
"${ICU_SOURCE_DIR}/i18n/pluralranges.cpp"
"${ICU_SOURCE_DIR}/i18n/units_complexconverter.cpp"
"${ICU_SOURCE_DIR}/i18n/units_converter.cpp"
"${ICU_SOURCE_DIR}/i18n/units_data.cpp"
"${ICU_SOURCE_DIR}/i18n/units_router.cpp")
file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" CONTENT " ")
enable_language(ASM)
set(ICUDATA_SOURCES
"${ICUDATA_SOURCE_DIR}/icudt66l_dat.S"
"${ICUDATA_SOURCE_DIR}/icudt70l_dat.S"
"${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" # Without this cmake can incorrectly detects library type (OBJECT) instead of SHARED/STATIC
)

2
contrib/icudata vendored

@ -1 +1 @@
Subproject commit f020820388e3faafb44cc643574a2d563dfde572
Subproject commit 72d9a4a7febc904e2b0a534ccb25ae40fac5f1e5

View File

@ -4,7 +4,7 @@ FROM ubuntu:20.04
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/"
ARG repository="deb https://packages.clickhouse.com/deb stable main"
ARG version=22.1.1.*
# set non-empty deb_location_url url to create a docker image
@ -58,7 +58,7 @@ RUN groupadd -r clickhouse --gid=101 \
wget \
tzdata \
&& mkdir -p /etc/apt/sources.list.d \
&& apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 \
&& apt-key adv --keyserver keyserver.ubuntu.com --recv 8919F6BD2B48D754 \
&& echo $repository > /etc/apt/sources.list.d/clickhouse.list \
&& if [ -n "$deb_location_url" ]; then \
echo "installing from custom url with deb packages: $deb_location_url" \

View File

@ -42,6 +42,9 @@ COPY prepare_hive_data.sh /
COPY demo_data.txt /
ENV PATH=/apache-hive-2.3.9-bin/bin:/hadoop-3.1.0/bin:/hadoop-3.1.0/sbin:$PATH
RUN service ssh start && sed s/HOSTNAME/$HOSTNAME/ /hadoop-3.1.0/etc/hadoop/core-site.xml.template > /hadoop-3.1.0/etc/hadoop/core-site.xml && hdfs namenode -format
RUN apt install -y python3 python3-pip
RUN pip3 install flask requests
COPY http_api_server.py /
COPY start.sh /

View File

@ -0,0 +1,70 @@
import os
import subprocess
import datetime
from flask import Flask, flash, request, redirect, url_for
def run_command(command, wait=False):
print("{} - execute shell command:{}".format(datetime.datetime.now(), command))
lines = []
p = subprocess.Popen(command,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
shell=True)
if wait:
for l in iter(p.stdout.readline, b''):
lines.append(l)
p.poll()
return (lines, p.returncode)
else:
return(iter(p.stdout.readline, b''), 0)
UPLOAD_FOLDER = './'
ALLOWED_EXTENSIONS = {'txt', 'sh'}
app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
@app.route('/')
def hello_world():
return 'Hello World'
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
@app.route('/upload', methods=['GET', 'POST'])
def upload_file():
if request.method == 'POST':
# check if the post request has the file part
if 'file' not in request.files:
flash('No file part')
return redirect(request.url)
file = request.files['file']
# If the user does not select a file, the browser submits an
# empty file without a filename.
if file.filename == '':
flash('No selected file')
return redirect(request.url)
if file and allowed_file(file.filename):
filename = file.filename
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
return redirect(url_for('upload_file', name=filename))
return '''
<!doctype html>
<title>Upload new File</title>
<h1>Upload new File</h1>
<form method=post enctype=multipart/form-data>
<input type=file name=file>
<input type=submit value=Upload>
</form>
'''
@app.route('/run', methods=['GET', 'POST'])
def parse_request():
data = request.data # data is empty
run_command(data, wait=True)
return 'Ok'
if __name__ == '__main__':
app.run(port=5011)

View File

@ -2,5 +2,9 @@
hive -e "create database test"
hive -e "create table test.demo(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; create table test.demo_orc(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; "
hive -e "create table test.parquet_demo(id string, score int) PARTITIONED BY(day string, hour string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'"
hive -e "create table test.demo_text(id string, score int, day string)row format delimited fields terminated by ','; load data local inpath '/demo_data.txt' into table test.demo_text "
hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text"
hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '00' as hour from test.demo;"
hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '01' as hour from test.demo;"

View File

@ -1,6 +1,5 @@
service ssh start
sed s/HOSTNAME/$HOSTNAME/ /hadoop-3.1.0/etc/hadoop/core-site.xml.template > /hadoop-3.1.0/etc/hadoop/core-site.xml
hadoop namenode -format
start-all.sh
service mysql start
mysql -u root -e "CREATE USER \"test\"@\"localhost\" IDENTIFIED BY \"test\""
@ -9,4 +8,4 @@ schematool -initSchema -dbType mysql
#nohup hiveserver2 &
nohup hive --service metastore &
bash /prepare_hive_data.sh
while true; do sleep 1000; done
python3 http_api_server.py

View File

@ -1,11 +1,11 @@
sudo apt-get install apt-transport-https ca-certificates dirmngr
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4
sudo apt-get install -y apt-transport-https ca-certificates dirmngr
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee \
echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \
/etc/apt/sources.list.d/clickhouse.list
sudo apt-get update
sudo apt-get install -y clickhouse-server clickhouse-client
sudo service clickhouse-server start
clickhouse-client # or "clickhouse-client --password" if you set up a password.
clickhouse-client # or "clickhouse-client --password" if you've set up a password.

View File

@ -0,0 +1,11 @@
sudo apt-get install apt-transport-https ca-certificates dirmngr
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4
echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee \
/etc/apt/sources.list.d/clickhouse.list
sudo apt-get update
sudo apt-get install -y clickhouse-server clickhouse-client
sudo service clickhouse-server start
clickhouse-client # or "clickhouse-client --password" if you set up a password.

View File

@ -1,7 +1,6 @@
sudo yum install yum-utils
sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.repo
sudo yum install clickhouse-server clickhouse-client
sudo yum install -y yum-utils
sudo yum-config-manager --add-repo https://packages.clickhouse.com/rpm/clickhouse.repo
sudo yum install -y clickhouse-server clickhouse-client
sudo /etc/init.d/clickhouse-server start
clickhouse-client # or "clickhouse-client --password" if you set up a password.

View File

@ -0,0 +1,7 @@
sudo yum install yum-utils
sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.repo
sudo yum install clickhouse-server clickhouse-client
sudo /etc/init.d/clickhouse-server start
clickhouse-client # or "clickhouse-client --password" if you set up a password.

View File

@ -1,19 +1,20 @@
export LATEST_VERSION=$(curl -s https://repo.clickhouse.com/tgz/stable/ | \
LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \
grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1)
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz
export LATEST_VERSION
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz"
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz"
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz"
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz"
tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh
tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz"
sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh"
tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz
sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh
tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz"
sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh"
tar -xzvf clickhouse-server-$LATEST_VERSION.tgz
sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh
tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz"
sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh"
sudo /etc/init.d/clickhouse-server start
tar -xzvf clickhouse-client-$LATEST_VERSION.tgz
sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh
tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz"
sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh"

View File

@ -0,0 +1,19 @@
export LATEST_VERSION=$(curl -s https://repo.clickhouse.com/tgz/stable/ | \
grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1)
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz
tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh
tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz
sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh
tar -xzvf clickhouse-server-$LATEST_VERSION.tgz
sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh
sudo /etc/init.d/clickhouse-server start
tar -xzvf clickhouse-client-$LATEST_VERSION.tgz
sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh

View File

@ -40,8 +40,8 @@ The list of third-party libraries:
| grpc | [Apache](https://github.com/ClickHouse-Extras/grpc/blob/60c986e15cae70aade721d26badabab1f822fdd6/LICENSE) |
| h3 | [Apache](https://github.com/ClickHouse-Extras/h3/blob/c7f46cfd71fb60e2fefc90e28abe81657deff735/LICENSE) |
| hyperscan | [Boost](https://github.com/ClickHouse-Extras/hyperscan/blob/e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa/LICENSE) |
| icu | [Public Domain](https://github.com/unicode-org/icu/blob/faa2f9f9e1fe74c5ed00eba371d2830134cdbea1/icu4c/LICENSE) |
| icudata | [Public Domain](https://github.com/ClickHouse-Extras/icudata/blob/f020820388e3faafb44cc643574a2d563dfde572/LICENSE) |
| icu | [Public Domain](https://github.com/unicode-org/icu/blob/a56dde820dc35665a66f2e9ee8ba58e75049b668/icu4c/LICENSE) |
| icudata | [Public Domain](https://github.com/ClickHouse-Extras/icudata/blob/72d9a4a7febc904e2b0a534ccb25ae40fac5f1e5/LICENSE) |
| jemalloc | [BSD 2-clause](https://github.com/ClickHouse-Extras/jemalloc/blob/e6891d9746143bf2cf617493d880ba5a0b9a3efd/COPYING) |
| krb5 | [MIT](https://github.com/ClickHouse-Extras/krb5/blob/5149dea4e2be0f67707383d2682b897c14631374/src/lib/gssapi/LICENSE) |
| libc-headers | [LGPL](https://github.com/ClickHouse-Extras/libc-headers/blob/a720b7105a610acbd7427eea475a5b6810c151eb/LICENSE) |

View File

@ -22,4 +22,4 @@ Here is the illustration of the difference between traditional row-oriented syst
**Columnar**
![Columnar](https://clickhouse.com/docs/en/images/column-oriented.gif#)
A columnar database is a preferred choice for analytical applications because it allows to have many columns in a table just in case, but do not pay the cost for unused columns on read query execution time. Column-oriented databases are designed for big data processing because and data warehousing, they often natively scale using distributed clusters of low-cost hardware to increase throughput. ClickHouse does it with combination of [distributed](../../engines/table-engines/special/distributed.md) and [replicated](../../engines/table-engines/mergetree-family/replication.md) tables.
A columnar database is a preferred choice for analytical applications because it allows to have many columns in a table just in case, but do not pay the cost for unused columns on read query execution time. Column-oriented databases are designed for big data processing and data warehousing, because they often natively scale using distributed clusters of low-cost hardware to increase throughput. ClickHouse does it with combination of [distributed](../../engines/table-engines/special/distributed.md) and [replicated](../../engines/table-engines/mergetree-family/replication.md) tables.

View File

@ -27,9 +27,17 @@ It is recommended to use official pre-compiled `deb` packages for Debian or Ubun
{% include 'install/deb.sh' %}
```
<details markdown="1">
<summary>Deprecated Method for installing deb-packages</summary>
``` bash
{% include 'install/deb_repo.sh' %}
```
</details>
You can replace `stable` with `lts` or `testing` to use different [release trains](../faq/operations/production.md) based on your needs.
You can also download and install packages manually from [here](https://repo.clickhouse.com/deb/stable/main/).
You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/stable).
#### Packages {#packages}
@ -49,11 +57,17 @@ It is recommended to use official pre-compiled `rpm` packages for CentOS, RedHat
First, you need to add the official repository:
``` bash
sudo yum install yum-utils
sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64
{% include 'install/rpm.sh' %}
```
<details markdown="1">
<summary>Deprecated Method for installing rpm-packages</summary>
``` bash
{% include 'install/rpm_repo.sh' %}
```
</details>
If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). `prestable` is sometimes also available.
Then run these commands to install packages:
@ -62,36 +76,27 @@ Then run these commands to install packages:
sudo yum install clickhouse-server clickhouse-client
```
You can also download and install packages manually from [here](https://repo.clickhouse.com/rpm/stable/x86_64).
You can also download and install packages manually from [here](https://packages.clickhouse.com/rpm/stable).
### From Tgz Archives {#from-tgz-archives}
It is recommended to use official pre-compiled `tgz` archives for all Linux distributions, where installation of `deb` or `rpm` packages is not possible.
The required version can be downloaded with `curl` or `wget` from repository https://repo.clickhouse.com/tgz/.
The required version can be downloaded with `curl` or `wget` from repository https://packages.clickhouse.com/tgz/.
After that downloaded archives should be unpacked and installed with installation scripts. Example for the latest stable version:
``` bash
export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep stable | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1`
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz
tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh
tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz
sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh
tar -xzvf clickhouse-server-$LATEST_VERSION.tgz
sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh
sudo /etc/init.d/clickhouse-server start
tar -xzvf clickhouse-client-$LATEST_VERSION.tgz
sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh
{% include 'install/tgz.sh' %}
```
<details markdown="1">
<summary>Deprecated Method for installing tgz archives</summary>
``` bash
{% include 'install/tgz_repo.sh' %}
```
</details>
For production environments, its recommended to use the latest `stable`-version. You can find its number on GitHub page https://github.com/ClickHouse/ClickHouse/tags with postfix `-stable`.
### From Docker Image {#from-docker-image}

View File

@ -256,7 +256,7 @@ Possible values:
Default value: 161061273600 (150 GB).
The merge scheduler periodically analyzes the sizes and number of parts in partitions, and if there is enough free resources in the pool, it starts background merges. Merges occur until the total size of the source parts is less than `max_bytes_to_merge_at_max_space_in_pool`.
The merge scheduler periodically analyzes the sizes and number of parts in partitions, and if there is enough free resources in the pool, it starts background merges. Merges occur until the total size of the source parts is larger than `max_bytes_to_merge_at_max_space_in_pool`.
Merges initiated by [OPTIMIZE FINAL](../../sql-reference/statements/optimize.md) ignore `max_bytes_to_merge_at_max_space_in_pool` and merge parts only taking into account available resources (free disk's space) until one part remains in the partition.
@ -346,7 +346,7 @@ Default value: `0`.
**Usage**
The value of the `min_bytes_to_rebalance_partition_over_jbod` setting should be less than the value of the [max_bytes_to_merge_at_max_space_in_pool](../../operations/settings/merge-tree-settings.md#max-bytes-to-merge-at-max-space-in-pool) setting. Otherwise, ClickHouse throws an exception.
The value of the `min_bytes_to_rebalance_partition_over_jbod` setting should not be less than the value of the [max_bytes_to_merge_at_max_space_in_pool](../../operations/settings/merge-tree-settings.md#max-bytes-to-merge-at-max-space-in-pool) / 1024. Otherwise, ClickHouse throws an exception.
## detach_not_byte_identical_parts {#detach_not_byte_identical_parts}

View File

@ -1,11 +1,11 @@
---
toc_priority: 59
toc_title: Yandex.Metrica Dictionaries
toc_title: Embedded Dictionaries
---
# Functions for Working with Yandex.Metrica Dictionaries {#functions-for-working-with-yandex-metrica-dictionaries}
# Functions for Working with Embedded Dictionaries
In order for the functions below to work, the server config must specify the paths and addresses for getting all the Yandex.Metrica dictionaries. The dictionaries are loaded at the first call of any of these functions. If the reference lists cant be loaded, an exception is thrown.
In order for the functions below to work, the server config must specify the paths and addresses for getting all the embedded dictionaries. The dictionaries are loaded at the first call of any of these functions. If the reference lists cant be loaded, an exception is thrown.
For information about creating reference lists, see the section “Dictionaries”.
@ -33,7 +33,7 @@ regionToCountry(RegionID, 'ua') Uses the dictionary for the 'ua' key: /opt/g
### regionToCity(id\[, geobase\]) {#regiontocityid-geobase}
Accepts a UInt32 number the region ID from the Yandex geobase. If this region is a city or part of a city, it returns the region ID for the appropriate city. Otherwise, returns 0.
Accepts a UInt32 number the region ID from the geobase. If this region is a city or part of a city, it returns the region ID for the appropriate city. Otherwise, returns 0.
### regionToArea(id\[, geobase\]) {#regiontoareaid-geobase}
@ -117,7 +117,7 @@ regionToTopContinent(id[, geobase])
**Arguments**
- `id` — Region ID from the Yandex geobase. [UInt32](../../sql-reference/data-types/int-uint.md).
- `id` — Region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint.md).
- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../../sql-reference/data-types/string.md). Optional.
**Returned value**
@ -132,7 +132,7 @@ Type: `UInt32`.
Gets the population for a region.
The population can be recorded in files with the geobase. See the section “External dictionaries”.
If the population is not recorded for the region, it returns 0.
In the Yandex geobase, the population might be recorded for child regions, but not for parent regions.
In the geobase, the population might be recorded for child regions, but not for parent regions.
### regionIn(lhs, rhs\[, geobase\]) {#regioninlhs-rhs-geobase}
@ -141,12 +141,12 @@ The relationship is reflexive any region also belongs to itself.
### regionHierarchy(id\[, geobase\]) {#regionhierarchyid-geobase}
Accepts a UInt32 number the region ID from the Yandex geobase. Returns an array of region IDs consisting of the passed region and all parents along the chain.
Accepts a UInt32 number the region ID from the geobase. Returns an array of region IDs consisting of the passed region and all parents along the chain.
Example: `regionHierarchy(toUInt32(213)) = [213,1,3,225,10001,10000]`.
### regionToName(id\[, lang\]) {#regiontonameid-lang}
Accepts a UInt32 number the region ID from the Yandex geobase. A string with the name of the language can be passed as a second argument. Supported languages are: ru, en, ua, uk, by, kz, tr. If the second argument is omitted, the language ru is used. If the language is not supported, an exception is thrown. Returns a string the name of the region in the corresponding language. If the region with the specified ID does not exist, an empty string is returned.
Accepts a UInt32 number the region ID from the geobase. A string with the name of the language can be passed as a second argument. Supported languages are: ru, en, ua, uk, by, kz, tr. If the second argument is omitted, the language ru is used. If the language is not supported, an exception is thrown. Returns a string the name of the region in the corresponding language. If the region with the specified ID does not exist, an empty string is returned.
`ua` and `uk` both mean Ukrainian.

View File

@ -11,7 +11,7 @@ ClickHouse supports the following syntax variants:
- `LIMIT [offset_value, ]n BY expressions`
- `LIMIT n OFFSET offset_value BY expressions`
During query processing, ClickHouse selects data ordered by sorting key. The sorting key is set explicitly using an [ORDER BY](../../../sql-reference/statements/select/order-by.md) clause or implicitly as a property of the table engine. Then ClickHouse applies `LIMIT n BY expressions` and returns the first `n` rows for each distinct combination of `expressions`. If `OFFSET` is specified, then for each data block that belongs to a distinct combination of `expressions`, ClickHouse skips `offset_value` number of rows from the beginning of the block and returns a maximum of `n` rows as a result. If `offset_value` is bigger than the number of rows in the data block, ClickHouse returns zero rows from the block.
During query processing, ClickHouse selects data ordered by sorting key. The sorting key is set explicitly using an [ORDER BY](order-by.md#select-order-by) clause or implicitly as a property of the table engine (row order is only guaranteed when using [ORDER BY](order-by.md#select-order-by), otherwise the row blocks will not be ordered due to multi-threading). Then ClickHouse applies `LIMIT n BY expressions` and returns the first `n` rows for each distinct combination of `expressions`. If `OFFSET` is specified, then for each data block that belongs to a distinct combination of `expressions`, ClickHouse skips `offset_value` number of rows from the beginning of the block and returns a maximum of `n` rows as a result. If `offset_value` is bigger than the number of rows in the data block, ClickHouse returns zero rows from the block.
!!! note "Note"
`LIMIT BY` is not related to [LIMIT](../../../sql-reference/statements/select/limit.md). They can both be used in the same query.

View File

@ -28,9 +28,17 @@ Debian や Ubuntu 用にコンパイル済みの公式パッケージ `deb` を
{% include 'install/deb.sh' %}
```
<details markdown="1">
<summary>Deprecated Method for installing deb-packages</summary>
``` bash
{% include 'install/deb_repo.sh' %}
```
</details>
最新版を使いたい場合は、`stable`を`testing`に置き換えてください。(テスト環境ではこれを推奨します)
同様に、[こちら](https://repo.clickhouse.com/deb/stable/main/)からパッケージをダウンロードして、手動でインストールすることもできます。
同様に、[こちら](https://packages.clickhouse.com/deb/pool/stable)からパッケージをダウンロードして、手動でインストールすることもできます。
#### パッケージ {#packages}
@ -46,11 +54,17 @@ CentOS、RedHat、その他すべてのrpmベースのLinuxディストリビュ
まず、公式リポジトリを追加する必要があります:
``` bash
sudo yum install yum-utils
sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64
{% include 'install/rpm.sh' %}
```
<details markdown="1">
<summary>Deprecated Method for installing rpm-packages</summary>
``` bash
{% include 'install/rpm_repo.sh' %}
```
</details>
最新版を使いたい場合は `stable``testing` に置き換えてください。(テスト環境ではこれが推奨されています)。`prestable` もしばしば同様に利用できます。
そして、以下のコマンドを実行してパッケージをインストールします:
@ -59,35 +73,26 @@ sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64
sudo yum install clickhouse-server clickhouse-client
```
同様に、[こちら](https://repo.clickhouse.com/rpm/stable/x86_64) からパッケージをダウンロードして、手動でインストールすることもできます。
同様に、[こちら](https://packages.clickhouse.com/rpm/stable) からパッケージをダウンロードして、手動でインストールすることもできます。
### Tgzアーカイブから {#from-tgz-archives}
すべての Linux ディストリビューションで、`deb` や `rpm` パッケージがインストールできない場合は、公式のコンパイル済み `tgz` アーカイブを使用することをお勧めします。
必要なバージョンは、リポジトリ https://repo.clickhouse.com/tgz/ から `curl` または `wget` でダウンロードできます。その後、ダウンロードしたアーカイブを解凍し、インストールスクリプトでインストールしてください。最新版の例は以下です:
必要なバージョンは、リポジトリ https://packages.clickhouse.com/tgz/ から `curl` または `wget` でダウンロードできます。その後、ダウンロードしたアーカイブを解凍し、インストールスクリプトでインストールしてください。最新版の例は以下です:
``` bash
export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1`
curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-server-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-client-$LATEST_VERSION.tgz
tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh
tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz
sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh
tar -xzvf clickhouse-server-$LATEST_VERSION.tgz
sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh
sudo /etc/init.d/clickhouse-server start
tar -xzvf clickhouse-client-$LATEST_VERSION.tgz
sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh
{% include 'install/tgz.sh' %}
```
<details markdown="1">
<summary>Deprecated Method for installing tgz archives</summary>
``` bash
{% include 'install/tgz_repo.sh' %}
```
</details>
本番環境では、最新の `stable` バージョンを使うことをお勧めします。GitHub のページ https://github.com/ClickHouse/ClickHouse/tags で 接尾辞 `-stable` となっているバージョン番号として確認できます。
### Dockerイメージから {#from-docker-image}

View File

@ -6,6 +6,7 @@ changelog/2017.md whats-new/changelog/2017.md
changelog/2018.md whats-new/changelog/2018.md
changelog/2019.md whats-new/changelog/2019.md
changelog/index.md whats-new/changelog/index.md
commercial/cloud.md https://clickhouse.com/cloud/
data_types/array.md sql-reference/data-types/array.md
data_types/boolean.md sql-reference/data-types/boolean.md
data_types/date.md sql-reference/data-types/date.md

View File

@ -27,11 +27,17 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su
{% include 'install/deb.sh' %}
```
Также эти пакеты можно скачать и установить вручную отсюда: https://repo.clickhouse.com/deb/stable/main/.
<details markdown="1">
<summary>Устаревший способ установки deb-пакетов</summary>
``` bash
{% include 'install/deb_repo.sh' %}
```
</details>
Чтобы использовать различные [версии ClickHouse](../faq/operations/production.md) в зависимости от ваших потребностей, вы можете заменить `stable` на `lts` или `testing`.
Также вы можете вручную скачать и установить пакеты из [репозитория](https://repo.clickhouse.com/deb/stable/main/).
Также вы можете вручную скачать и установить пакеты из [репозитория](https://packages.clickhouse.com/deb/pool/stable).
#### Пакеты {#packages}
@ -51,11 +57,17 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su
Сначала нужно подключить официальный репозиторий:
``` bash
sudo yum install yum-utils
sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64
{% include 'install/rpm.sh' %}
```
<details markdown="1">
<summary>Устаревший способ установки rpm-пакетов</summary>
``` bash
{% include 'install/rpm_repo.sh' %}
```
</details>
Для использования наиболее свежих версий нужно заменить `stable` на `testing` (рекомендуется для тестовых окружений). Также иногда доступен `prestable`.
Для, собственно, установки пакетов необходимо выполнить следующие команды:
@ -64,36 +76,27 @@ sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64
sudo yum install clickhouse-server clickhouse-client
```
Также есть возможность установить пакеты вручную, скачав отсюда: https://repo.clickhouse.com/rpm/stable/x86_64.
Также есть возможность установить пакеты вручную, скачав отсюда: https://packages.clickhouse.com/rpm/stable.
### Из Tgz архивов {#from-tgz-archives}
Команда ClickHouse в Яндексе рекомендует использовать предкомпилированные бинарники из `tgz` архивов для всех дистрибутивов, где невозможна установка `deb` и `rpm` пакетов.
Интересующую версию архивов можно скачать вручную с помощью `curl` или `wget` из репозитория https://repo.clickhouse.com/tgz/.
Интересующую версию архивов можно скачать вручную с помощью `curl` или `wget` из репозитория https://packages.clickhouse.com/tgz/.
После этого архивы нужно распаковать и воспользоваться скриптами установки. Пример установки самой свежей версии:
``` bash
export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1`
curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-server-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/clickhouse-client-$LATEST_VERSION.tgz
tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh
tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz
sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh
tar -xzvf clickhouse-server-$LATEST_VERSION.tgz
sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh
sudo /etc/init.d/clickhouse-server start
tar -xzvf clickhouse-client-$LATEST_VERSION.tgz
sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh
{% include 'install/tgz.sh' %}
```
<details markdown="1">
<summary>Устаревший способ установки из архивов tgz</summary>
``` bash
{% include 'install/tgz_repo.sh' %}
```
</details>
Для production окружений рекомендуется использовать последнюю `stable`-версию. Её номер также можно найти на github с на вкладке https://github.com/ClickHouse/ClickHouse/tags c постфиксом `-stable`.
### Из Docker образа {#from-docker-image}

View File

@ -11,7 +11,7 @@ ClickHouse поддерживает следующий синтаксис:
- `LIMIT [offset_value, ]n BY expressions`
- `LIMIT n OFFSET offset_value BY expressions`
Во время обработки запроса, ClickHouse выбирает данные, упорядоченные по ключу сортировки. Ключ сортировки задаётся явно в секции [ORDER BY](order-by.md#select-order-by) или неявно в свойствах движка таблицы. Затем ClickHouse применяет `LIMIT n BY expressions` и возвращает первые `n` для каждой отличной комбинации `expressions`. Если указан `OFFSET`, то для каждого блока данных, который принадлежит отдельной комбинации `expressions`, ClickHouse отступает `offset_value` строк от начала блока и возвращает не более `n`. Если `offset_value` больше, чем количество строк в блоке данных, ClickHouse не возвращает ни одной строки.
Во время обработки запроса, ClickHouse выбирает данные, упорядоченные по ключу сортировки. Ключ сортировки задаётся явно в секции [ORDER BY](order-by.md#select-order-by) или неявно в свойствах движка таблицы (порядок строк гарантирован только при использовании [ORDER BY](order-by.md#select-order-by), в ином случае блоки строк не будут упорядочены из-за многопоточной обработки). Затем ClickHouse применяет `LIMIT n BY expressions` и возвращает первые `n` для каждой отличной комбинации `expressions`. Если указан `OFFSET`, то для каждого блока данных, который принадлежит отдельной комбинации `expressions`, ClickHouse отступает `offset_value` строк от начала блока и возвращает не более `n`. Если `offset_value` больше, чем количество строк в блоке данных, ClickHouse не возвращает ни одной строки.
`LIMIT BY` не связана с секцией `LIMIT`. Их можно использовать в одном запросе.

View File

@ -31,7 +31,12 @@ def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path)
from_path.replace('/index.md', '/index.html').replace('.md', '/index.html')
)
target_path = to_path.replace('/index.md', '/').replace('.md', '/')
if target_path[0:7] != 'http://' and target_path[0:8] != 'https://':
to_url = f'/{base_prefix}/{lang}/{target_path}'
else:
to_url = target_path
to_url = to_url.strip()
write_redirect_html(out_path, to_url)

View File

@ -14,7 +14,6 @@ module.exports = {
entry: [
path.resolve(scssPath, 'bootstrap.scss'),
path.resolve(scssPath, 'greenhouse.scss'),
path.resolve(scssPath, 'main.scss'),
path.resolve(jsPath, 'main.js'),
],

View File

@ -151,6 +151,11 @@ def build_website(args):
)
)
shutil.copytree(
os.path.join(args.website_dir, 'images'),
os.path.join(args.output_dir, 'docs', 'images')
)
# This file can be requested to check for available ClickHouse releases.
shutil.copy2(
os.path.join(args.src_dir, 'utils', 'list-versions', 'version_date.tsv'),
@ -231,28 +236,31 @@ def minify_file(path, css_digest, js_digest):
def minify_website(args):
# Output greenhouse css separately from main bundle to be included via the greenhouse iframe
command = f"cat '{args.website_dir}/css/greenhouse.css' > '{args.output_dir}/css/greenhouse.css'"
logging.info(command)
output = subprocess.check_output(command, shell=True)
logging.debug(output)
css_in = ' '.join(get_css_in(args))
css_out = f'{args.output_dir}/css/base.css'
if args.minify:
css_out = f'{args.output_dir}/docs/css/base.css'
os.makedirs(f'{args.output_dir}/docs/css')
if args.minify and False: # TODO: return closure
command = f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' " \
f"'{args.output_dir}/docs/en/**/*.html' '{args.website_dir}/js/**/*.js' > {css_out}"
else:
command = f'cat {css_in} > {css_out}'
logging.info(css_in)
logging.info(command)
output = subprocess.check_output(command, shell=True)
logging.debug(output)
else:
command = f"cat {css_in}"
output = subprocess.check_output(command, shell=True)
with open(css_out, 'wb+') as f:
f.write(output)
with open(css_out, 'rb') as f:
css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
js_in = get_js_in(args)
js_out = f'{args.output_dir}/js/base.js'
js_in = ' '.join(get_js_in(args))
js_out = f'{args.output_dir}/docs/js/base.js'
os.makedirs(f'{args.output_dir}/docs/js')
if args.minify and False: # TODO: return closure
js_in = [js[1:-1] for js in js_in]
closure_args = [
@ -271,11 +279,11 @@ def minify_website(args):
f.write(js_content)
else:
js_in = ' '.join(js_in)
command = f'cat {js_in} > {js_out}'
logging.info(command)
command = f"cat {js_in}"
output = subprocess.check_output(command, shell=True)
logging.debug(output)
with open(js_out, 'wb+') as f:
f.write(output)
with open(js_out, 'rb') as f:
js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
logging.info(js_digest)

View File

@ -38,5 +38,46 @@ CREATE TABLE test
ENGINE = EmbeddedRocksDB
PRIMARY KEY key
```
## 指标
还有一个`system.rocksdb` 表, 公开rocksdb的统计信息:
```sql
SELECT
name,
value
FROM system.rocksdb
┌─name──────────────────────┬─value─┐
│ no.file.opens │ 1 │
│ number.block.decompressed │ 1 │
└───────────────────────────┴───────┘
```
## 配置
你能修改任何[rocksdb options](https://github.com/facebook/rocksdb/wiki/Option-String-and-Option-Map) 配置,使用配置文件:
```xml
<rocksdb>
<options>
<max_background_jobs>8</max_background_jobs>
</options>
<column_family_options>
<num_levels>2</num_levels>
</column_family_options>
<tables>
<table>
<name>TABLE</name>
<options>
<max_background_jobs>8</max_background_jobs>
</options>
<column_family_options>
<num_levels>2</num_levels>
</column_family_options>
</table>
</tables>
</rocksdb>
```
[原始文章](https://clickhouse.com/docs/en/engines/table-engines/integrations/embedded-rocksdb/) <!--hide-->

View File

@ -27,9 +27,17 @@ $ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not
{% include 'install/deb.sh' %}
```
<details markdown="1">
<summary>Deprecated Method for installing deb-packages</summary>
``` bash
{% include 'install/deb_repo.sh' %}
```
</details>
如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。
你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.com/deb/stable/main/)。
你也可以从这里手动下载安装包:[下载](https://packages.clickhouse.com/deb/pool/stable)。
安装包列表:
@ -45,11 +53,17 @@ $ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not
首先,您需要添加官方存储库:
``` bash
sudo yum install yum-utils
sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64
{% include 'install/rpm.sh' %}
```
<details markdown="1">
<summary>Deprecated Method for installing rpm-packages</summary>
``` bash
{% include 'install/rpm_repo.sh' %}
```
</details>
如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。`prestable`有时也可用。
然后运行命令安装:
@ -58,37 +72,28 @@ sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64
sudo yum install clickhouse-server clickhouse-client
```
你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.com/rpm/stable/x86_64)。
你也可以从这里手动下载安装包:[下载](https://packages.clickhouse.com/rpm/stable)。
### `Tgz`安装包 {#from-tgz-archives}
如果您的操作系统不支持安装`deb`或`rpm`包,建议使用官方预编译的`tgz`软件包。
所需的版本可以通过`curl`或`wget`从存储库`https://repo.clickhouse.com/tgz/`下载。
所需的版本可以通过`curl`或`wget`从存储库`https://packages.clickhouse.com/tgz/`下载。
下载后解压缩下载资源文件并使用安装脚本进行安装。以下是一个最新稳定版本的安装示例:
``` bash
export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep stable | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1`
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz
tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh
tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz
sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh
tar -xzvf clickhouse-server-$LATEST_VERSION.tgz
sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh
sudo /etc/init.d/clickhouse-server start
tar -xzvf clickhouse-client-$LATEST_VERSION.tgz
sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh
{% include 'install/tgz.sh' %}
```
<details markdown="1">
<summary>Deprecated Method for installing tgz archives</summary>
``` bash
{% include 'install/tgz_repo.sh' %}
```
</details>
对于生产环境,建议使用最新的`stable`版本。你可以在GitHub页面https://github.com/ClickHouse/ClickHouse/tags找到它它以后缀`-stable`标志。
### `Docker`安装包 {#from-docker-image}

View File

@ -1 +0,0 @@
../../../../en/sql-reference/statements/alter/row-policy.md

View File

@ -0,0 +1,19 @@
---
toc_priority: 47
toc_title: 行策略
---
# 操作行策略 {#alter-row-policy-statement}
修改行策略.
语法:
``` sql
ALTER [ROW] POLICY [IF EXISTS] name1 [ON CLUSTER cluster_name1] ON [database1.]table1 [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] ON [database2.]table2 [RENAME TO new_name2] ...]
[AS {PERMISSIVE | RESTRICTIVE}]
[FOR SELECT]
[USING {condition | NONE}][,...]
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
```

View File

@ -1 +0,0 @@
../../../../en/sql-reference/statements/alter/settings-profile.md

View File

@ -0,0 +1,16 @@
---
toc_priority: 48
toc_title: 配置文件设置
---
## 更改配置文件设置 {#alter-settings-profile-statement}
更改配置文件设置。
语法:
``` sql
ALTER SETTINGS PROFILE [IF EXISTS] TO name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...]
```

View File

@ -11,7 +11,7 @@ ClickHouse支持以下语法变体:
- `LIMIT [offset_value, ]n BY expressions`
- `LIMIT n OFFSET offset_value BY expressions`
查询处理过程中ClickHouse会选择按排序键排序的数据。 排序键使用以下命令显式设置 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 子句或隐式作为表引擎的属性。 然后ClickHouse应用 `LIMIT n BY expressions` 并返回第一 `n` 每个不同组合的行 `expressions`. 如果 `OFFSET` 被指定,则对于每个数据块属于一个不同的组合 `expressions`ClickHouse跳过 `offset_value` 从块开始的行数,并返回最大值 `n` 行的结果。 如果 `offset_value` 如果数据块中的行数大于数据块中的行数ClickHouse将从该块返回零行。
进行查询处理时ClickHouse选择按排序键排序的数据。排序键设置显式地使用一个[ORDER BY](order-by.md#select-order-by)条款或隐式属性表的引擎(行顺序只是保证在使用[ORDER BY](order-by.md#select-order-by),否则不会命令行块由于多线程)。然后ClickHouse应用`LIMIT n BY 表达式`,并为每个不同的`表达式`组合返回前n行。如果指定了`OFFSET`,那么对于每个属于不同`表达式`组合的数据块ClickHouse将跳过`offset_value`从块开始的行数,并最终返回最多`n`行的结果。如果`offset_value`大于数据块中的行数则ClickHouse从数据块中返回零行。
!!! note "注"
`LIMIT BY` 是不相关的 [LIMIT](../../../sql-reference/statements/select/limit.md). 它们都可以在同一个查询中使用。

View File

@ -0,0 +1,31 @@
[clickhouse-stable]
name=ClickHouse - Stable Repository
baseurl=https://packages.clickhouse.com/rpm/stable/
gpgkey=https://packages.clickhouse.com/rpm/stable/repodata/repomd.xml.key
gpgcheck=0
repo_gpgcheck=1
enabled=0
[clickhouse-lts]
name=ClickHouse - LTS Repository
baseurl=https://packages.clickhouse.com/rpm/lts/
gpgkey=https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key
gpgcheck=0
repo_gpgcheck=1
enabled=0
[clickhouse-prestable]
name=ClickHouse - Pre-stable Repository
baseurl=https://packages.clickhouse.com/rpm/prestable/
gpgkey=https://packages.clickhouse.com/rpm/prestable/repodata/repomd.xml.key
gpgcheck=0
repo_gpgcheck=1
enabled=0
[clickhouse-testing]
name=ClickHouse - Testing Repository
baseurl=https://packages.clickhouse.com/rpm/testing/
gpgkey=https://packages.clickhouse.com/rpm/testing/repodata/repomd.xml.key
gpgcheck=0
repo_gpgcheck=1
enabled=1

View File

@ -435,6 +435,8 @@ private:
Progress progress;
executor.setProgressCallback([&progress](const Progress & value) { progress.incrementPiecewiseAtomically(value); });
executor.sendQuery(ClientInfo::QueryKind::INITIAL_QUERY);
ProfileInfo info;
while (Block block = executor.read())
info.update(block);

View File

@ -1126,8 +1126,13 @@ void Client::processOptions(const OptionsDescription & options_description,
{
const auto & name = setting.getName();
if (options.count(name))
{
if (allow_repeated_settings)
config().setString(name, options[name].as<Strings>().back());
else
config().setString(name, options[name].as<String>());
}
}
if (options.count("config-file") && options.count("config"))
throw Exception("Two or more configuration files referenced in arguments", ErrorCodes::BAD_ARGUMENTS);

View File

@ -411,7 +411,8 @@ void LocalServer::setupUsers()
void LocalServer::connect()
{
connection_parameters = ConnectionParameters(config());
connection = LocalConnection::createConnection(connection_parameters, global_context, need_render_progress);
connection = LocalConnection::createConnection(
connection_parameters, global_context, need_render_progress, need_render_profile_events, server_display_name);
}

View File

@ -87,13 +87,6 @@
color: var(--text-color);
}
/* Otherwise scrollbar may appear dynamically and it will alter viewport height,
then relative heights of elements will change suddenly, and it will break overall impression. */
/* html
{
overflow-x: scroll;
}*/
div
{
width: 100%;
@ -382,7 +375,7 @@
<script type="text/javascript">
/// Incremental request number. When response is received,
/// if it's request number does not equal to the current request number, response will be ignored.
/// if its request number does not equal to the current request number, response will be ignored.
/// This is to avoid race conditions.
let request_num = 0;
@ -759,20 +752,25 @@
svg.style.height = graph.graph().height;
}
function setColorTheme(theme)
{
function setColorTheme(theme) {
window.localStorage.setItem('theme', theme);
document.documentElement.setAttribute('data-theme', theme);
}
/// The choice of color theme is saved in browser.
let theme = window.localStorage.getItem('theme');
/**
* First we check if theme is set via the 'theme' GET parameter, if not, we check localStorage,
* otherwise we check OS preference
*/
let theme = current_url.searchParams.get('theme');
if (['dark', 'light'].indexOf(theme) === -1) {
theme = window.localStorage.getItem('theme');
}
if (theme) {
setColorTheme(theme);
document.documentElement.setAttribute('data-theme', theme);
} else {
/// Obtain system-level user preference
let media_query_list = window.matchMedia('prefers-color-scheme: dark')
const media_query_list = window.matchMedia('(prefers-color-scheme: dark)');
if (media_query_list.matches) {
/// Set without saving to localstorage
document.documentElement.setAttribute('data-theme', 'dark');
@ -788,13 +786,11 @@
});
}
document.getElementById('toggle-light').onclick = function()
{
document.getElementById('toggle-light').onclick = function() {
setColorTheme('light');
}
document.getElementById('toggle-dark').onclick = function()
{
document.getElementById('toggle-dark').onclick = function() {
setColorTheme('dark');
}
</script>

View File

@ -91,7 +91,7 @@ String serializeAccessEntity(const IAccessEntity & entity)
return buf.str();
}
AccessEntityPtr deserializeAccessEntity(const String & definition, const String & path)
AccessEntityPtr deserializeAccessEntityImpl(const String & definition)
{
ASTs queries;
ParserAttachAccessEntity parser;
@ -118,43 +118,42 @@ AccessEntityPtr deserializeAccessEntity(const String & definition, const String
if (auto * create_user_query = query->as<ASTCreateUserQuery>())
{
if (res)
throw Exception("Two access entities attached in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
throw Exception("Two access entities attached in the same file", ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = user = std::make_unique<User>();
InterpreterCreateUserQuery::updateUserFromQuery(*user, *create_user_query);
}
else if (auto * create_role_query = query->as<ASTCreateRoleQuery>())
{
if (res)
throw Exception("Two access entities attached in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
throw Exception("Two access entities attached in the same file", ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = role = std::make_unique<Role>();
InterpreterCreateRoleQuery::updateRoleFromQuery(*role, *create_role_query);
}
else if (auto * create_policy_query = query->as<ASTCreateRowPolicyQuery>())
{
if (res)
throw Exception("Two access entities attached in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
throw Exception("Two access entities attached in the same file", ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = policy = std::make_unique<RowPolicy>();
InterpreterCreateRowPolicyQuery::updateRowPolicyFromQuery(*policy, *create_policy_query);
}
else if (auto * create_quota_query = query->as<ASTCreateQuotaQuery>())
{
if (res)
throw Exception("Two access entities attached in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
throw Exception("Two access entities attached in the same file", ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = quota = std::make_unique<Quota>();
InterpreterCreateQuotaQuery::updateQuotaFromQuery(*quota, *create_quota_query);
}
else if (auto * create_profile_query = query->as<ASTCreateSettingsProfileQuery>())
{
if (res)
throw Exception("Two access entities attached in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
throw Exception("Two access entities attached in the same file", ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = profile = std::make_unique<SettingsProfile>();
InterpreterCreateSettingsProfileQuery::updateSettingsProfileFromQuery(*profile, *create_profile_query);
}
else if (auto * grant_query = query->as<ASTGrantQuery>())
{
if (!user && !role)
throw Exception(
"A user or role should be attached before grant in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
throw Exception("A user or role should be attached before grant", ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
if (user)
InterpreterGrantQuery::updateUserFromQuery(*user, *grant_query);
else
@ -165,9 +164,27 @@ AccessEntityPtr deserializeAccessEntity(const String & definition, const String
}
if (!res)
throw Exception("No access entities attached in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
throw Exception("No access entities attached", ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
return res;
}
AccessEntityPtr deserializeAccessEntity(const String & definition, const String & file_path)
{
if (file_path.empty())
return deserializeAccessEntityImpl(definition);
try
{
return deserializeAccessEntityImpl(definition);
}
catch (Exception & e)
{
e.addMessage("Could not parse " + file_path);
e.rethrow();
__builtin_unreachable();
}
}
}

View File

@ -10,6 +10,6 @@ using AccessEntityPtr = std::shared_ptr<const IAccessEntity>;
String serializeAccessEntity(const IAccessEntity & entity);
AccessEntityPtr deserializeAccessEntity(const String & definition, const String & path);
AccessEntityPtr deserializeAccessEntity(const String & definition, const String & file_path = "");
}

View File

@ -48,7 +48,7 @@ namespace
}
catch (...)
{
tryLogCurrentException(&log, "Could not parse " + file_path);
tryLogCurrentException(&log);
return nullptr;
}
}

View File

@ -144,7 +144,6 @@ list (APPEND dbms_sources
AggregateFunctions/AggregateFunctionState.cpp
AggregateFunctions/AggregateFunctionCount.cpp
AggregateFunctions/parseAggregateFunctionParameters.cpp)
list (APPEND dbms_headers
AggregateFunctions/IAggregateFunction.h
AggregateFunctions/IAggregateFunctionCombinator.h
@ -155,10 +154,25 @@ list (APPEND dbms_headers
AggregateFunctions/FactoryHelpers.h
AggregateFunctions/parseAggregateFunctionParameters.h)
list (APPEND dbms_sources TableFunctions/ITableFunction.cpp TableFunctions/TableFunctionFactory.cpp)
list (APPEND dbms_headers TableFunctions/ITableFunction.h TableFunctions/TableFunctionFactory.h)
list (APPEND dbms_sources Dictionaries/DictionaryFactory.cpp Dictionaries/DictionarySourceFactory.cpp Dictionaries/DictionaryStructure.cpp Dictionaries/getDictionaryConfigurationFromAST.cpp)
list (APPEND dbms_headers Dictionaries/DictionaryFactory.h Dictionaries/DictionarySourceFactory.h Dictionaries/DictionaryStructure.h Dictionaries/getDictionaryConfigurationFromAST.h)
list (APPEND dbms_sources
TableFunctions/ITableFunction.cpp
TableFunctions/TableFunctionView.cpp
TableFunctions/TableFunctionFactory.cpp)
list (APPEND dbms_headers
TableFunctions/ITableFunction.h
TableFunctions/TableFunctionView.h
TableFunctions/TableFunctionFactory.h)
list (APPEND dbms_sources
Dictionaries/DictionaryFactory.cpp
Dictionaries/DictionarySourceFactory.cpp
Dictionaries/DictionaryStructure.cpp
Dictionaries/getDictionaryConfigurationFromAST.cpp)
list (APPEND dbms_headers
Dictionaries/DictionaryFactory.h
Dictionaries/DictionarySourceFactory.h
Dictionaries/DictionaryStructure.h
Dictionaries/getDictionaryConfigurationFromAST.h)
if (NOT ENABLE_SSL)
list (REMOVE_ITEM clickhouse_common_io_sources Common/OpenSSLHelpers.cpp)
@ -253,18 +267,16 @@ if (TARGET ch_contrib::nuraft)
add_object_library(clickhouse_coordination Coordination)
endif()
set (DBMS_COMMON_LIBRARIES)
if (USE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES)
add_library (dbms STATIC ${dbms_headers} ${dbms_sources})
target_link_libraries (dbms PRIVATE ch_contrib::libdivide ${DBMS_COMMON_LIBRARIES})
target_link_libraries (dbms PRIVATE ch_contrib::libdivide)
if (TARGET ch_contrib::jemalloc)
target_link_libraries (dbms PRIVATE ch_contrib::jemalloc)
endif()
set (all_modules dbms)
else()
add_library (dbms SHARED ${dbms_headers} ${dbms_sources})
target_link_libraries (dbms PUBLIC ${all_modules} ${DBMS_COMMON_LIBRARIES})
target_link_libraries (dbms PUBLIC ${all_modules})
target_link_libraries (clickhouse_interpreters PRIVATE ch_contrib::libdivide)
if (TARGET ch_contrib::jemalloc)
target_link_libraries (clickhouse_interpreters PRIVATE ch_contrib::jemalloc)
@ -557,6 +569,10 @@ if (ENABLE_TESTS)
clickhouse_common_zookeeper
string_utils)
if (TARGET ch_contrib::yaml_cpp)
target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::yaml_cpp)
endif()
add_check(unit_tests_dbms)
endif ()

View File

@ -867,7 +867,7 @@ void ClientBase::onProfileEvents(Block & block)
if (rows == 0)
return;
if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS)
if (getName() == "local" || server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS)
{
const auto & array_thread_id = typeid_cast<const ColumnUInt64 &>(*block.getByName("thread_id").column).getData();
const auto & names = typeid_cast<const ColumnString &>(*block.getByName("name").column);
@ -1872,6 +1872,8 @@ void ClientBase::readArguments(
prev_port_arg = port_arg;
}
}
else if (arg == "--allow_repeated_settings"sv)
allow_repeated_settings = true;
else
common_arguments.emplace_back(arg);
}
@ -1884,6 +1886,9 @@ void ClientBase::readArguments(
void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
{
if (allow_repeated_settings)
cmd_settings.addProgramOptionsAsMultitokens(options_description.main_description.value());
else
cmd_settings.addProgramOptions(options_description.main_description.value());
/// Parse main commandline options.
auto parser = po::command_line_parser(arguments).options(options_description.main_description.value()).allow_unregistered();

View File

@ -219,6 +219,7 @@ protected:
ProgressIndication progress_indication;
bool need_render_progress = true;
bool need_render_profile_events = true;
bool written_first_block = false;
size_t processed_rows = 0; /// How many rows have been read or written.
@ -260,6 +261,8 @@ protected:
std::vector<HostAndPort> hosts_and_ports{};
bool allow_repeated_settings = false;
bool cancelled = false;
};

View File

@ -6,6 +6,8 @@
#include <Processors/Executors/PushingAsyncPipelineExecutor.h>
#include <Storages/IStorage.h>
#include <Core/Protocol.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
namespace DB
@ -18,10 +20,12 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
}
LocalConnection::LocalConnection(ContextPtr context_, bool send_progress_)
LocalConnection::LocalConnection(ContextPtr context_, bool send_progress_, bool send_profile_events_, const String & server_display_name_)
: WithContext(context_)
, session(getContext(), ClientInfo::Interface::LOCAL)
, send_progress(send_progress_)
, send_profile_events(send_profile_events_)
, server_display_name(server_display_name_)
{
/// Authenticate and create a context to execute queries.
session.authenticate("default", "", Poco::Net::SocketAddress{});
@ -58,6 +62,11 @@ void LocalConnection::updateProgress(const Progress & value)
state->progress.incrementPiecewiseAtomically(value);
}
void LocalConnection::getProfileEvents(Block & block)
{
ProfileEvents::getProfileEvents(server_display_name, state->profile_queue, block, last_sent_snapshots);
}
void LocalConnection::sendQuery(
const ConnectionTimeouts &,
const String & query,
@ -77,18 +86,23 @@ void LocalConnection::sendQuery(
if (!current_database.empty())
query_context->setCurrentDatabase(current_database);
CurrentThread::QueryScope query_scope_holder(query_context);
state.reset();
state.emplace();
state->query_id = query_id;
state->query = query;
state->query_scope_holder = std::make_unique<CurrentThread::QueryScope>(query_context);
state->stage = QueryProcessingStage::Enum(stage);
state->profile_queue = std::make_shared<InternalProfileEventsQueue>(std::numeric_limits<int>::max());
CurrentThread::attachInternalProfileEventsQueue(state->profile_queue);
if (send_progress)
state->after_send_progress.restart();
if (send_profile_events)
state->after_send_profile_events.restart();
next_packet_type.reset();
try
@ -231,6 +245,16 @@ bool LocalConnection::poll(size_t)
return true;
}
if (send_profile_events && (state->after_send_profile_events.elapsedMicroseconds() >= query_context->getSettingsRef().interactive_delay))
{
Block block;
state->after_send_profile_events.restart();
next_packet_type = Protocol::Server::ProfileEvents;
getProfileEvents(block);
state->block.emplace(std::move(block));
return true;
}
try
{
pollImpl();
@ -459,9 +483,14 @@ void LocalConnection::sendMergeTreeReadTaskResponse(const PartitionReadResponse
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented");
}
ServerConnectionPtr LocalConnection::createConnection(const ConnectionParameters &, ContextPtr current_context, bool send_progress)
ServerConnectionPtr LocalConnection::createConnection(
const ConnectionParameters &,
ContextPtr current_context,
bool send_progress,
bool send_profile_events,
const String & server_display_name)
{
return std::make_unique<LocalConnection>(current_context, send_progress);
return std::make_unique<LocalConnection>(current_context, send_progress, send_profile_events, server_display_name);
}

View File

@ -5,6 +5,7 @@
#include <QueryPipeline/BlockIO.h>
#include <IO/TimeoutSetter.h>
#include <Interpreters/Session.h>
#include <Interpreters/ProfileEventsExt.h>
#include <Storages/ColumnsDescription.h>
@ -29,6 +30,7 @@ struct LocalQueryState
std::unique_ptr<PullingAsyncPipelineExecutor> executor;
std::unique_ptr<PushingPipelineExecutor> pushing_executor;
std::unique_ptr<PushingAsyncPipelineExecutor> pushing_async_executor;
InternalProfileEventsQueuePtr profile_queue;
std::optional<Exception> exception;
@ -50,19 +52,28 @@ struct LocalQueryState
Progress progress;
/// Time after the last check to stop the request and send the progress.
Stopwatch after_send_progress;
Stopwatch after_send_profile_events;
std::unique_ptr<CurrentThread::QueryScope> query_scope_holder;
};
class LocalConnection : public IServerConnection, WithContext
{
public:
explicit LocalConnection(ContextPtr context_, bool send_progress_ = false);
explicit LocalConnection(
ContextPtr context_, bool send_progress_ = false, bool send_profile_events_ = false, const String & server_display_name_ = "");
~LocalConnection() override;
IServerConnection::Type getConnectionType() const override { return IServerConnection::Type::LOCAL; }
static ServerConnectionPtr createConnection(const ConnectionParameters & connection_parameters, ContextPtr current_context, bool send_progress = false);
static ServerConnectionPtr createConnection(
const ConnectionParameters & connection_parameters,
ContextPtr current_context,
bool send_progress = false,
bool send_profile_events = false,
const String & server_display_name = "");
void setDefaultDatabase(const String & database) override;
@ -129,12 +140,16 @@ private:
void updateProgress(const Progress & value);
void getProfileEvents(Block & block);
bool pollImpl();
ContextMutablePtr query_context;
Session session;
bool send_progress;
bool send_profile_events;
String server_display_name;
String description = "clickhouse-local";
std::optional<LocalQueryState> state;
@ -144,5 +159,7 @@ private:
std::optional<UInt64> next_packet_type;
String current_database;
ProfileEvents::ThreadIdToCountersSnapshot last_sent_snapshots;
};
}

View File

@ -133,7 +133,12 @@ void MultiplexedConnections::sendQuery(
modified_settings.group_by_two_level_threshold_bytes = 0;
}
if (settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas)
bool parallel_reading_from_replicas = settings.max_parallel_replicas > 1
&& settings.allow_experimental_parallel_reading_from_replicas
/// To avoid trying to coordinate with clickhouse-benchmark,
/// since it uses the same code.
&& client_info.query_kind != ClientInfo::QueryKind::INITIAL_QUERY;
if (parallel_reading_from_replicas)
{
client_info.collaborate_with_initiator = true;
client_info.count_participating_replicas = replica_info.all_replicas_count;

View File

@ -103,6 +103,7 @@ void Suggest::load(ContextPtr context, const ConnectionParameters & connection_p
{
loading_thread = std::thread([context=Context::createCopy(context), connection_parameters, suggestion_limit, this]
{
ThreadStatus thread_status;
for (size_t retry = 0; retry < 10; ++retry)
{
try

View File

@ -36,8 +36,8 @@ public:
static Ptr create(const ColumnPtr & column) { return ColumnMap::create(column->assumeMutable()); }
static Ptr create(ColumnPtr && arg) { return create(arg); }
template <typename Arg, typename = typename std::enable_if<std::is_rvalue_reference<Arg &&>::value>::type>
static MutablePtr create(Arg && arg) { return Base::create(std::forward<Arg>(arg)); }
template <typename ... Args, typename = typename std::enable_if<IsMutableColumns<Args ...>::value>::type>
static MutablePtr create(Args &&... args) { return Base::create(std::forward<Args>(args)...); }
std::string getName() const override;
const char * getFamilyName() const override { return "Map"; }

View File

@ -664,6 +664,10 @@ void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config,
new_path.erase(0, main_config_path.size());
std::replace(new_path.begin(), new_path.end(), '/', '_');
/// If we have config file in YAML format, the preprocessed config will inherit .yaml extension
/// but will contain config in XML format, so some tools like clickhouse extract-from-config won't work
new_path = fs::path(new_path).replace_extension(".xml").string();
if (preprocessed_dir.empty())
{
if (!loaded_config.configuration->has("path"))

View File

@ -3,13 +3,9 @@
#if USE_YAML_CPP
#include "YAMLParser.h"
#include <string>
#include <cstring>
#include <vector>
#include <Poco/DOM/Document.h>
#include <Poco/DOM/DOMParser.h>
#include <Poco/DOM/DOMWriter.h>
#include <Poco/DOM/NodeList.h>
#include <Poco/DOM/Element.h>
#include <Poco/DOM/AutoPtr.h>
@ -19,8 +15,6 @@
#include <yaml-cpp/yaml.h>
#include <base/logger_useful.h>
using namespace Poco::XML;
namespace DB
@ -74,30 +68,35 @@ void processNode(const YAML::Node & node, Poco::XML::Element & parent_xml_elemen
case YAML::NodeType::Sequence:
{
for (const auto & child_node : node)
if (parent_xml_element.hasChildNodes())
{
/// We want to process sequences like that:
/// For sequences it depends how we want to process them.
/// Sequences of key-value pairs such as:
/// seq:
/// - val1
/// - k1: val1
/// - k2: val2
/// - val3
/// - k4: val4
/// - val5
/// into xml like this:
/// <seq>val1</seq>
/// <seq>
/// <k1>val1</k1>
/// <k2>val2</k2>
/// </seq>
/// <seq>val3</seq>
/// <seq>
/// <k4>val4</k4>
/// </seq>
/// <seq>val5</seq>
/// So, we create a new parent node with same tag for each child node
///
/// But, if the sequence is just a list, the root-node needs to be repeated, such as:
/// seq:
/// - val1
/// - val2
/// into xml like this:
/// <seq>val1</seq>
/// <seq>val2</seq>
///
/// Therefore check what type the child is, for further processing.
/// Mixing types (values list or map) will lead to strange results but should not happen.
if (parent_xml_element.hasChildNodes() && !child_node.IsMap())
{
/// Create a new parent node with same tag for each child node
processNode(child_node, *createCloneNode(parent_xml_element));
}
else
{
/// Map, so don't recreate the parent node but add directly
processNode(child_node, parent_xml_element);
}
break;

View File

@ -9,6 +9,7 @@
#include <boost/noncopyable.hpp>
#include <base/strong_typedef.h>
#include <base/getPageSize.h>
#include <Common/Allocator.h>
#include <Common/Exception.h>
@ -196,7 +197,7 @@ protected:
/// The operation is slow and performed only for debug builds.
void protectImpl(int prot)
{
static constexpr size_t PROTECT_PAGE_SIZE = 4096;
static size_t PROTECT_PAGE_SIZE = ::getPageSize();
char * left_rounded_up = reinterpret_cast<char *>((reinterpret_cast<intptr_t>(c_start) - pad_left + PROTECT_PAGE_SIZE - 1) / PROTECT_PAGE_SIZE * PROTECT_PAGE_SIZE);
char * right_rounded_down = reinterpret_cast<char *>((reinterpret_cast<intptr_t>(c_end_of_storage) + pad_right) / PROTECT_PAGE_SIZE * PROTECT_PAGE_SIZE);

View File

@ -48,13 +48,13 @@ namespace
if (overrun_count)
{
/// But pass with some frequency to avoid drop of all traces.
if (write_trace_iteration % (overrun_count + 1) == 0)
if (overrun_count > 0 && write_trace_iteration % (overrun_count + 1) == 0)
{
ProfileEvents::increment(ProfileEvents::QueryProfilerSignalOverruns, overrun_count);
}
else
{
ProfileEvents::increment(ProfileEvents::QueryProfilerSignalOverruns, overrun_count + 1);
ProfileEvents::increment(ProfileEvents::QueryProfilerSignalOverruns, std::max(0, overrun_count) + 1);
return;
}
}

View File

@ -0,0 +1,75 @@
#pragma once
#include <filesystem>
#include <Common/filesystemHelpers.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteHelpers.h>
#include <Common/Config/ConfigProcessor.h>
#include <Poco/AutoPtr.h>
#include "Poco/DOM/Document.h"
#include "Poco/DOM/NodeList.h"
#include "Poco/DOM/NamedNodeMap.h"
const std::string tmp_path = "/tmp/";
inline std::unique_ptr<Poco::File> getFileWithContents(const char *fileName, const char *fileContents)
{
using namespace DB;
namespace fs = std::filesystem;
using File = Poco::File;
fs::create_directories(fs::path(tmp_path));
auto config_file = std::make_unique<File>(tmp_path + fileName);
{
WriteBufferFromFile out(config_file->path());
writeString(fileContents, out);
}
return config_file;
}
inline std::string xmlNodeAsString(Poco::XML::Node *pNode)
{
const auto& node_name = pNode->nodeName();
Poco::XML::XMLString result = "<" + node_name ;
auto *attributes = pNode->attributes();
for (auto i = 0; i<attributes->length();i++)
{
auto *item = attributes->item(i);
auto name = item->nodeName();
auto text = item->innerText();
result += (" " + name + "=\"" + text + "\"");
}
result += ">";
if (pNode->hasChildNodes() && pNode->firstChild()->nodeType() != Poco::XML::Node::TEXT_NODE)
{
result += "\n";
}
attributes->release();
auto *list = pNode->childNodes();
for (auto i = 0; i<list->length();i++)
{
auto *item = list->item(i);
auto type = item->nodeType();
if (type == Poco::XML::Node::ELEMENT_NODE)
{
result += xmlNodeAsString(item);
}
else if (type == Poco::XML::Node::TEXT_NODE)
{
result += item->innerText();
}
}
list->release();
result += ("</"+ node_name + ">\n");
return Poco::XML::fromXMLString(result);
}

View File

@ -0,0 +1,78 @@
#include <Common/config.h>
#if USE_YAML_CPP
#include "gtest_helper_functions.h"
#include <base/scope_guard.h>
#include <Common/Config/YAMLParser.h>
#include <Common/Config/ConfigHelper.h>
#include <Poco/AutoPtr.h>
#include "Poco/DOM/Document.h"
#include <gtest/gtest.h>
using namespace DB;
TEST(Common, YamlParserInvalidFile)
{
ASSERT_THROW(YAMLParser::parse("some-non-existing-file.yaml"), Exception);
}
TEST(Common, YamlParserProcessKeysList)
{
auto yaml_file = getFileWithContents("keys-list.yaml", R"YAML(
operator:
access_management: "1"
networks:
- ip: "10.1.6.168"
- ip: "::1"
- ip: "127.0.0.1"
)YAML");
SCOPE_EXIT({ yaml_file->remove(); });
Poco::AutoPtr<Poco::XML::Document> xml = YAMLParser::parse(yaml_file->path());
auto *p_node = xml->getNodeByPath("/clickhouse");
EXPECT_EQ(xmlNodeAsString(p_node), R"CONFIG(<clickhouse>
<operator>
<access_management>1</access_management>
<networks>
<ip>10.1.6.168</ip>
<ip>::1</ip>
<ip>127.0.0.1</ip>
</networks>
</operator>
</clickhouse>
)CONFIG");
}
TEST(Common, YamlParserProcessValuesList)
{
auto yaml_file = getFileWithContents("values-list.yaml", R"YAML(
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
)YAML");
SCOPE_EXIT({ yaml_file->remove(); });
Poco::AutoPtr<Poco::XML::Document> xml = YAMLParser::parse(yaml_file->path());
auto *p_node = xml->getNodeByPath("/clickhouse");
EXPECT_EQ(xmlNodeAsString(p_node), R"CONFIG(<clickhouse>
<rules>
<apiGroups></apiGroups>
<resources>nodes</resources>
<resources>nodes/proxy</resources>
<resources>services</resources>
<resources>endpoints</resources>
<resources>pods</resources>
</rules>
</clickhouse>
)CONFIG");
}
#endif

View File

@ -202,7 +202,7 @@ void print(IFourLetterCommand::StringBuffer & buf, const String & key, uint64_t
String MonitorCommand::run()
{
KeeperConnectionStats stats = keeper_dispatcher.getKeeperConnectionStats();
auto & stats = keeper_dispatcher.getKeeperConnectionStats();
Keeper4LWInfo keeper_info = keeper_dispatcher.getKeeper4LWInfo();
if (!keeper_info.has_leader)
@ -288,7 +288,7 @@ String ServerStatCommand::run()
writeText('\n', buf);
};
KeeperConnectionStats stats = keeper_dispatcher.getKeeperConnectionStats();
auto & stats = keeper_dispatcher.getKeeperConnectionStats();
Keeper4LWInfo keeper_info = keeper_dispatcher.getKeeper4LWInfo();
write("ClickHouse Keeper version", String(VERSION_DESCRIBE) + "-" + VERSION_GITHASH);
@ -314,7 +314,7 @@ String StatCommand::run()
auto write = [&buf] (const String & key, const String & value) { buf << key << ": " << value << '\n'; };
KeeperConnectionStats stats = keeper_dispatcher.getKeeperConnectionStats();
auto & stats = keeper_dispatcher.getKeeperConnectionStats();
Keeper4LWInfo keeper_info = keeper_dispatcher.getKeeper4LWInfo();
write("ClickHouse Keeper version", String(VERSION_DESCRIBE) + "-" + VERSION_GITHASH);

View File

@ -1,3 +1,4 @@
#include <atomic>
#include <Coordination/KeeperConnectionStats.h>
namespace DB
@ -5,61 +6,58 @@ namespace DB
uint64_t KeeperConnectionStats::getMinLatency() const
{
return min_latency;
return min_latency.load(std::memory_order_relaxed);
}
uint64_t KeeperConnectionStats::getMaxLatency() const
{
return max_latency;
return max_latency.load(std::memory_order_relaxed);
}
uint64_t KeeperConnectionStats::getAvgLatency() const
{
if (count != 0)
return total_latency / count;
auto cnt = count.load(std::memory_order_relaxed);
if (cnt)
return total_latency.load(std::memory_order_relaxed) / cnt;
return 0;
}
uint64_t KeeperConnectionStats::getLastLatency() const
{
return last_latency;
return last_latency.load(std::memory_order_relaxed);
}
uint64_t KeeperConnectionStats::getPacketsReceived() const
{
return packets_received;
return packets_received.load(std::memory_order_relaxed);
}
uint64_t KeeperConnectionStats::getPacketsSent() const
{
return packets_sent;
return packets_sent.load(std::memory_order_relaxed);
}
void KeeperConnectionStats::incrementPacketsReceived()
{
packets_received++;
packets_received.fetch_add(1, std::memory_order_relaxed);
}
void KeeperConnectionStats::incrementPacketsSent()
{
packets_sent++;
packets_sent.fetch_add(1, std::memory_order_relaxed);
}
void KeeperConnectionStats::updateLatency(uint64_t latency_ms)
{
last_latency = latency_ms;
total_latency += (latency_ms);
count++;
last_latency.store(latency_ms, std::memory_order_relaxed);
total_latency.fetch_add(latency_ms, std::memory_order_relaxed);
count.fetch_add(1, std::memory_order_relaxed);
if (latency_ms < min_latency)
{
min_latency = latency_ms;
}
uint64_t prev_val = min_latency.load(std::memory_order_relaxed);
while (prev_val > latency_ms && !min_latency.compare_exchange_weak(prev_val, latency_ms, std::memory_order_relaxed)) {}
if (latency_ms > max_latency)
{
max_latency = latency_ms;
}
prev_val = max_latency.load(std::memory_order_relaxed);
while (prev_val < latency_ms && !max_latency.compare_exchange_weak(prev_val, latency_ms, std::memory_order_relaxed)) {}
}
void KeeperConnectionStats::reset()
@ -70,17 +68,17 @@ void KeeperConnectionStats::reset()
void KeeperConnectionStats::resetLatency()
{
total_latency = 0;
count = 0;
max_latency = 0;
min_latency = 0;
last_latency = 0;
total_latency.store(0, std::memory_order_relaxed);
count.store(0, std::memory_order_relaxed);
max_latency.store(0, std::memory_order_relaxed);
min_latency.store(0, std::memory_order_relaxed);
last_latency.store(0, std::memory_order_relaxed);
}
void KeeperConnectionStats::resetRequestCounters()
{
packets_received = 0;
packets_sent = 0;
packets_received.store(0, std::memory_order_relaxed);
packets_sent.store(0, std::memory_order_relaxed);
}
}

View File

@ -1,5 +1,6 @@
#pragma once
#include <atomic>
#include <base/types.h>
#include <memory>
#include <cstdint>
@ -11,7 +12,10 @@ namespace DB
class KeeperConnectionStats
{
public:
KeeperConnectionStats() = default;
KeeperConnectionStats()
{
reset();
}
uint64_t getMinLatency() const;
uint64_t getMaxLatency() const;
@ -33,20 +37,20 @@ private:
void resetRequestCounters();
/// all response with watch response included
uint64_t packets_sent = 0;
std::atomic_uint64_t packets_sent;
/// All user requests
uint64_t packets_received = 0;
std::atomic_uint64_t packets_received;
/// For consistent with zookeeper measured by millisecond,
/// otherwise maybe microsecond is better
uint64_t total_latency = 0;
uint64_t max_latency = 0;
uint64_t min_latency = 0;
std::atomic_uint64_t total_latency;
std::atomic_uint64_t max_latency;
std::atomic_uint64_t min_latency;
/// last operation latency
uint64_t last_latency = 0;
std::atomic_uint64_t last_latency;
uint64_t count = 0;
std::atomic_uint64_t count;
};
}

View File

@ -594,7 +594,6 @@ void KeeperDispatcher::updateConfiguration(const Poco::Util::AbstractConfigurati
void KeeperDispatcher::updateKeeperStatLatency(uint64_t process_time_ms)
{
std::lock_guard lock(keeper_stats_mutex);
keeper_stats.updateLatency(process_time_ms);
}

View File

@ -68,7 +68,6 @@ private:
/// RAFT wrapper.
std::unique_ptr<KeeperServer> server;
mutable std::mutex keeper_stats_mutex;
KeeperConnectionStats keeper_stats;
KeeperConfigurationAndSettingsPtr configuration_and_settings;
@ -159,9 +158,8 @@ public:
uint64_t getSnapDirSize() const;
/// Request statistics such as qps, latency etc.
KeeperConnectionStats getKeeperConnectionStats() const
KeeperConnectionStats & getKeeperConnectionStats()
{
std::lock_guard lock(keeper_stats_mutex);
return keeper_stats;
}
@ -179,19 +177,16 @@ public:
void incrementPacketsSent()
{
std::lock_guard lock(keeper_stats_mutex);
keeper_stats.incrementPacketsSent();
}
void incrementPacketsReceived()
{
std::lock_guard lock(keeper_stats_mutex);
keeper_stats.incrementPacketsReceived();
}
void resetConnectionStats()
{
std::lock_guard lock(keeper_stats_mutex);
keeper_stats.reset();
}
};

View File

@ -39,7 +39,12 @@ namespace
request_for_session.request->xid = xid;
request_for_session.request->readImpl(buffer);
if (!buffer.eof())
readIntBinary(request_for_session.time, buffer);
else /// backward compatibility
request_for_session.time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
return request_for_session;
}
}

View File

@ -3,6 +3,8 @@
#include <Coordination/Defines.h>
#include <Common/Exception.h>
#include <filesystem>
#include <Common/isLocalAddress.h>
#include <Common/DNSResolver.h>
namespace DB
{
@ -12,6 +14,70 @@ namespace ErrorCodes
extern const int RAFT_ERROR;
}
namespace
{
bool isLoopback(const std::string & hostname)
{
try
{
return DNSResolver::instance().resolveHost(hostname).isLoopback();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
return false;
}
bool isLocalhost(const std::string & hostname)
{
try
{
return isLocalAddress(DNSResolver::instance().resolveHost(hostname));
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
return false;
}
std::unordered_map<UInt64, std::string> getClientPorts(const Poco::Util::AbstractConfiguration & config)
{
static const char * config_port_names[] = {
"keeper_server.tcp_port",
"keeper_server.tcp_port_secure",
"interserver_http_port",
"interserver_https_port",
"tcp_port",
"tcp_with_proxy_port",
"tcp_port_secure",
"mysql_port",
"postgresql_port",
"grpc_port",
"prometheus.port",
};
std::unordered_map<UInt64, std::string> ports;
for (const auto & config_port_name : config_port_names)
{
if (config.has(config_port_name))
ports[config.getUInt64(config_port_name)] = config_port_name;
}
return ports;
}
}
/// this function quite long because contains a lot of sanity checks in config:
/// 1. No duplicate endpoints
/// 2. No "localhost" or "127.0.0.1" or another local addresses mixed with normal addresses
/// 3. Raft internal port is not equal to any other port for client
/// 4. No duplicate IDs
/// 5. Our ID present in hostnames list
KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersConfiguration(const Poco::Util::AbstractConfiguration & config, bool allow_without_us) const
{
KeeperConfigurationWrapper result;
@ -19,12 +85,17 @@ KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersC
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_prefix + ".raft_configuration", keys);
auto client_ports = getClientPorts(config);
/// Sometimes (especially in cloud envs) users can provide incorrect
/// configuration with duplicated raft ids or endpoints. We check them
/// on config parsing stage and never commit to quorum.
std::unordered_map<std::string, int> check_duplicated_hostnames;
size_t total_servers = 0;
std::string loopback_hostname;
std::string non_local_hostname;
size_t local_address_counter = 0;
for (const auto & server_key : keys)
{
if (!startsWith(server_key, "server"))
@ -38,13 +109,33 @@ KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersC
int32_t priority = config.getInt(full_prefix + ".priority", 1);
bool start_as_follower = config.getBool(full_prefix + ".start_as_follower", false);
if (client_ports.count(port) != 0)
{
throw Exception(ErrorCodes::RAFT_ERROR, "Raft configuration contains hostname '{}' with port '{}' which is equal to '{}' in server configuration",
hostname, port, client_ports[port]);
}
if (isLoopback(hostname))
{
loopback_hostname = hostname;
local_address_counter++;
}
else if (isLocalhost(hostname))
{
local_address_counter++;
}
else
{
non_local_hostname = hostname;
}
if (start_as_follower)
result.servers_start_as_followers.insert(new_server_id);
auto endpoint = hostname + ":" + std::to_string(port);
if (check_duplicated_hostnames.count(endpoint))
{
throw Exception(ErrorCodes::RAFT_ERROR, "Raft config contain duplicate endpoints: "
throw Exception(ErrorCodes::RAFT_ERROR, "Raft config contains duplicate endpoints: "
"endpoint {} has been already added with id {}, but going to add it one more time with id {}",
endpoint, check_duplicated_hostnames[endpoint], new_server_id);
}
@ -54,7 +145,7 @@ KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersC
for (const auto & [id_endpoint, id] : check_duplicated_hostnames)
{
if (new_server_id == id)
throw Exception(ErrorCodes::RAFT_ERROR, "Raft config contain duplicate ids: id {} has been already added with endpoint {}, "
throw Exception(ErrorCodes::RAFT_ERROR, "Raft config contains duplicate ids: id {} has been already added with endpoint {}, "
"but going to add it one more time with endpoint {}", id, id_endpoint, endpoint);
}
check_duplicated_hostnames.emplace(endpoint, new_server_id);
@ -77,6 +168,24 @@ KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersC
if (result.servers_start_as_followers.size() == total_servers)
throw Exception(ErrorCodes::RAFT_ERROR, "At least one of servers should be able to start as leader (without <start_as_follower>)");
if (!loopback_hostname.empty() && !non_local_hostname.empty())
{
throw Exception(
ErrorCodes::RAFT_ERROR,
"Mixing loopback and non-local hostnames ('{}' and '{}') in raft_configuration is not allowed. "
"Different hosts can resolve it to themselves so it's not allowed.",
loopback_hostname, non_local_hostname);
}
if (!non_local_hostname.empty() && local_address_counter > 1)
{
throw Exception(
ErrorCodes::RAFT_ERROR,
"Local address specified more than once ({} times) and non-local hostnames also exists ('{}') in raft_configuration. "
"Such configuration is not allowed because single host can vote multiple times.",
local_address_counter, non_local_hostname);
}
return result;
}

View File

@ -8,7 +8,6 @@
#define DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME 54372
#define DBMS_MIN_REVISION_WITH_VERSION_PATCH 54401
#define DBMS_MIN_REVISION_WITH_SERVER_LOGS 54406
#define DBMS_MIN_REVISION_WITH_CLIENT_SUPPORT_EMBEDDED_DATA 54415
/// Minimum revision with exactly the same set of aggregation methods and rules to select them.
/// Two-level (bucketed) aggregation is incompatible if servers are inconsistent in these rules
/// (keys will be placed in different buckets and result will not be fully aggregated).

View File

@ -89,6 +89,14 @@ void Settings::addProgramOptions(boost::program_options::options_description & o
}
}
void Settings::addProgramOptionsAsMultitokens(boost::program_options::options_description & options)
{
for (const auto & field : all())
{
addProgramOptionAsMultitoken(options, field);
}
}
void Settings::addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field)
{
const std::string_view name = field.getName();
@ -97,6 +105,14 @@ void Settings::addProgramOption(boost::program_options::options_description & op
name.data(), boost::program_options::value<std::string>()->composing()->notifier(on_program_option), field.getDescription())));
}
void Settings::addProgramOptionAsMultitoken(boost::program_options::options_description & options, const SettingFieldRef & field)
{
const std::string_view name = field.getName();
auto on_program_option = boost::function1<void, const Strings &>([this, name](const Strings & values) { set(name, values.back()); });
options.add(boost::shared_ptr<boost::program_options::option_description>(new boost::program_options::option_description(
name.data(), boost::program_options::value<Strings>()->multitoken()->composing()->notifier(on_program_option), field.getDescription())));
}
void Settings::checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path)
{
if (config.getBool("skip_check_for_incorrect_settings", false))

View File

@ -722,6 +722,11 @@ struct Settings : public BaseSettings<SettingsTraits>, public IHints<2, Settings
/// (Don't forget to call notify() on the `variables_map` after parsing it!)
void addProgramOptions(boost::program_options::options_description & options);
/// Adds program options as to set the settings from a command line.
/// Allows to set one setting multiple times, the last value will be used.
/// (Don't forget to call notify() on the `variables_map` after parsing it!)
void addProgramOptionsAsMultitokens(boost::program_options::options_description & options);
/// Check that there is no user-level settings at the top level in config.
/// This is a common source of mistake (user don't know where to write user-level setting).
static void checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path);
@ -729,6 +734,8 @@ struct Settings : public BaseSettings<SettingsTraits>, public IHints<2, Settings
std::vector<String> getAllRegisteredNames() const override;
void addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field);
void addProgramOptionAsMultitoken(boost::program_options::options_description & options, const SettingFieldRef & field);
};
/*

View File

@ -34,6 +34,7 @@ namespace ErrorCodes
extern const int QUERY_NOT_ALLOWED;
extern const int UNKNOWN_TABLE;
extern const int BAD_ARGUMENTS;
extern const int NOT_IMPLEMENTED;
}
DatabaseMaterializedPostgreSQL::DatabaseMaterializedPostgreSQL(
@ -309,8 +310,12 @@ void DatabaseMaterializedPostgreSQL::attachTable(ContextPtr context_, const Stri
}
}
StoragePtr DatabaseMaterializedPostgreSQL::detachTable(ContextPtr, const String &)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DETACH TABLE not allowed, use DETACH PERMANENTLY");
}
StoragePtr DatabaseMaterializedPostgreSQL::detachTable(ContextPtr context_, const String & table_name)
void DatabaseMaterializedPostgreSQL::detachTablePermanently(ContextPtr, const String & table_name)
{
/// If there is query context then we need to detach materialized storage.
/// If there is no query context then we need to detach internal storage from atomic database.
@ -360,11 +365,6 @@ StoragePtr DatabaseMaterializedPostgreSQL::detachTable(ContextPtr context_, cons
}
materialized_tables.erase(table_name);
return nullptr;
}
else
{
return DatabaseAtomic::detachTable(context_, table_name);
}
}

View File

@ -51,6 +51,8 @@ public:
void attachTable(ContextPtr context, const String & table_name, const StoragePtr & table, const String & relative_table_path) override;
void detachTablePermanently(ContextPtr context, const String & table_name) override;
StoragePtr detachTable(ContextPtr context, const String & table_name) override;
void dropTable(ContextPtr local_context, const String & name, bool no_delay) override;

View File

@ -30,7 +30,7 @@ namespace ErrorCodes
static const std::unordered_set<std::string_view> dictionary_allowed_keys = {
"host", "port", "user", "password", "db", "database", "table",
"update_field", "update_tag", "invalidate_query", "query", "where", "name", "secure"};
"update_field", "update_lag", "invalidate_query", "query", "where", "name", "secure"};
namespace
{

View File

@ -34,7 +34,7 @@ static const std::unordered_set<std::string_view> dictionary_allowed_keys = {
"host", "port", "user", "password",
"db", "database", "table", "schema",
"update_field", "invalidate_query", "priority",
"update_tag", "dont_check_update_time",
"update_lag", "dont_check_update_time",
"query", "where", "name" /* name_collection */, "socket",
"share_connection", "fail_on_connection_loss", "close_connection",
"ssl_ca", "ssl_cert", "ssl_key",

View File

@ -30,7 +30,7 @@ static const UInt64 max_block_size = 8192;
static const std::unordered_set<std::string_view> dictionary_allowed_keys = {
"host", "port", "user", "password", "db", "database", "table", "schema",
"update_field", "update_tag", "invalidate_query", "query", "where", "name", "priority"};
"update_field", "update_lag", "invalidate_query", "query", "where", "name", "priority"};
namespace
{

View File

@ -144,6 +144,12 @@ DiskCacheWrapper::readFile(
}
}
/// Do not use RemoteFSReadMethod::threadpool for index and mark files.
/// Here it does not make sense since the files are small.
/// Note: enabling `threadpool` read requires to call setReadUntilEnd().
auto current_read_settings = settings;
current_read_settings.remote_fs_method = RemoteFSReadMethod::read;
if (metadata->status == DOWNLOADING)
{
FileDownloadStatus result_status = DOWNLOADED;
@ -158,7 +164,7 @@ DiskCacheWrapper::readFile(
auto tmp_path = path + ".tmp";
{
auto src_buffer = DiskDecorator::readFile(path, settings, read_hint, file_size);
auto src_buffer = DiskDecorator::readFile(path, current_read_settings, read_hint, file_size);
auto dst_buffer = cache_disk->writeFile(tmp_path, settings.local_fs_buffer_size, WriteMode::Rewrite);
copyData(*src_buffer, *dst_buffer);
}
@ -184,7 +190,7 @@ DiskCacheWrapper::readFile(
if (metadata->status == DOWNLOADED)
return cache_disk->readFile(path, settings, read_hint, file_size);
return DiskDecorator::readFile(path, settings, read_hint, file_size);
return DiskDecorator::readFile(path, current_read_settings, read_hint, file_size);
}
std::unique_ptr<WriteBufferFromFileBase>

View File

@ -6,6 +6,7 @@
#include <Interpreters/Context.h>
#include <Common/filesystemHelpers.h>
#include <Common/quoteString.h>
#include <Common/renameat2.h>
#include <IO/createReadBufferFromFileBase.h>
#include <fstream>
@ -325,7 +326,7 @@ DiskDirectoryIteratorPtr DiskLocal::iterateDirectory(const String & path)
void DiskLocal::moveFile(const String & from_path, const String & to_path)
{
fs::rename(fs::path(disk_path) / from_path, fs::path(disk_path) / to_path);
renameNoReplace(fs::path(disk_path) / from_path, fs::path(disk_path) / to_path);
}
void DiskLocal::replaceFile(const String & from_path, const String & to_path)

View File

@ -286,7 +286,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
settings->s3_upload_part_size_multiply_parts_count_threshold,
settings->s3_max_single_part_upload_size,
std::move(object_metadata),
buf_size /*, std::move(schedule) */);
buf_size, std::move(schedule));
auto create_metadata_callback = [this, path, blob_name, mode] (size_t count)
{

View File

@ -1,21 +1,2 @@
if (TARGET ch_contrib::avrocpp)
set(USE_AVRO 1)
endif()
if (TARGET ch_contrib::parquet)
set(USE_PARQUET 1)
set(USE_ARROW 1)
set(USE_ORC 1)
endif()
if (TARGET ch_contrib::snappy)
set(USE_SNAPPY 1)
endif()
if (TARGET ch_contrib::protobuf)
set(USE_PROTOBUF 1)
endif()
if (TARGET ch_contrib::msgpack)
set(USE_MSGPACK 1)
endif()
if (TARGET ch_contrib::capnp)
set(USE_CAPNP 1)
endif()
include(configure_config.cmake)
configure_file(config_formats.h.in ${ConfigIncludePath}/config_formats.h)

View File

@ -0,0 +1,20 @@
if (TARGET ch_contrib::avrocpp)
set(USE_AVRO 1)
endif()
if (TARGET ch_contrib::parquet)
set(USE_PARQUET 1)
set(USE_ARROW 1)
set(USE_ORC 1)
endif()
if (TARGET ch_contrib::snappy)
set(USE_SNAPPY 1)
endif()
if (TARGET ch_contrib::protobuf)
set(USE_PROTOBUF 1)
endif()
if (TARGET ch_contrib::msgpack)
set(USE_MSGPACK 1)
endif()
if (TARGET ch_contrib::capnp)
set(USE_CAPNP 1)
endif()

View File

@ -90,6 +90,22 @@ public:
return getDictionary(dict_name_col->getValue<String>());
}
static const DictionaryAttribute & getDictionaryHierarchicalAttribute(const std::shared_ptr<const IDictionary> & dictionary)
{
const auto & dictionary_structure = dictionary->getStructure();
auto hierarchical_attribute_index_optional = dictionary_structure.hierarchical_attribute_index;
if (!dictionary->hasHierarchy() || !hierarchical_attribute_index_optional.has_value())
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Dictionary {} does not support hierarchy",
dictionary->getFullName());
size_t hierarchical_attribute_index = *hierarchical_attribute_index_optional;
const auto & hierarchical_attribute = dictionary_structure.attributes[hierarchical_attribute_index];
return hierarchical_attribute;
}
bool isDictGetFunctionInjective(const Block & sample_columns)
{
/// Assume non-injective by default
@ -939,39 +955,38 @@ private:
bool useDefaultImplementationForConstants() const final { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0}; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isString(arguments[0]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type of first argument of function {}. Expected String. Actual type {}",
getName(),
arguments[0]->getName());
if (!WhichDataType(arguments[1]).isUInt64())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type of second argument of function {}. Expected UInt64. Actual type {}",
getName(),
arguments[1]->getName());
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
}
bool isDeterministic() const override { return false; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
String dictionary_name;
if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
dictionary_name = name_col->getValue<String>();
else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of first argument of function {}, expected a const string.",
arguments[0].type->getName(),
getName());
auto dictionary = helper.getDictionary(arguments[0].column);
const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary);
return std::make_shared<DataTypeArray>(hierarchical_attribute.type);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
if (input_rows_count == 0)
return result_type->createColumn();
auto dictionary = helper.getDictionary(arguments[0].column);
const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary);
if (!dictionary->hasHierarchy())
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Dictionary {} does not support hierarchy",
dictionary->getFullName());
auto key_column = ColumnWithTypeAndName{arguments[1].column, arguments[1].type, arguments[1].name};
auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type);
ColumnPtr result = dictionary->getHierarchy(key_column_casted, hierarchical_attribute.type);
ColumnPtr result = dictionary->getHierarchy(arguments[1].column, std::make_shared<DataTypeUInt64>());
return result;
}
@ -1009,18 +1024,6 @@ private:
getName(),
arguments[0]->getName());
if (!WhichDataType(arguments[1]).isUInt64())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type of second argument of function {}. Expected UInt64. Actual type {}",
getName(),
arguments[1]->getName());
if (!WhichDataType(arguments[2]).isUInt64())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type of third argument of function {}. Expected UInt64. Actual type {}",
getName(),
arguments[2]->getName());
return std::make_shared<DataTypeUInt8>();
}
@ -1031,16 +1034,18 @@ private:
if (input_rows_count == 0)
return result_type->createColumn();
auto dict = helper.getDictionary(arguments[0].column);
auto dictionary = helper.getDictionary(arguments[0].column);
const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary);
if (!dict->hasHierarchy())
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Dictionary {} does not support hierarchy",
dict->getFullName());
auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[2].name};
auto in_key_column = ColumnWithTypeAndName{arguments[2].column->convertToFullColumnIfConst(), arguments[2].type, arguments[2].name};
ColumnPtr res = dict->isInHierarchy(arguments[1].column, arguments[2].column, std::make_shared<DataTypeUInt64>());
auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type);
auto in_key_column_casted = castColumnAccurate(in_key_column, hierarchical_attribute.type);
return res;
ColumnPtr result = dictionary->isInHierarchy(key_column_casted, in_key_column_casted, hierarchical_attribute.type);
return result;
}
mutable FunctionDictHelper helper;
@ -1069,21 +1074,18 @@ private:
bool isDeterministic() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (!isString(arguments[0]))
if (!isString(arguments[0].type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type of first argument of function {}. Expected String. Actual type {}",
getName(),
arguments[0]->getName());
arguments[0].type->getName());
if (!WhichDataType(arguments[1]).isUInt64())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type of second argument of function {}. Expected UInt64. Actual type {}",
getName(),
arguments[1]->getName());
auto dictionary = helper.getDictionary(arguments[0].column);
const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary);
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
return std::make_shared<DataTypeArray>(hierarchical_attribute.type);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
@ -1092,13 +1094,12 @@ private:
return result_type->createColumn();
auto dictionary = helper.getDictionary(arguments[0].column);
const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary);
if (!dictionary->hasHierarchy())
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Dictionary {} does not support hierarchy",
dictionary->getFullName());
auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[1].name};
auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type);
ColumnPtr result = dictionary->getDescendants(arguments[1].column, std::make_shared<DataTypeUInt64>(), 1);
ColumnPtr result = dictionary->getDescendants(key_column_casted, hierarchical_attribute.type, 1);
return result;
}
@ -1126,12 +1127,11 @@ private:
bool isVariadic() const override { return true; }
bool useDefaultImplementationForConstants() const final { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0}; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 2}; }
bool isDeterministic() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
size_t arguments_size = arguments.size();
if (arguments_size < 2 || arguments_size > 3)
@ -1142,27 +1142,24 @@ private:
arguments_size);
}
if (!isString(arguments[0]))
if (!isString(arguments[0].type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type of first argument of function {}. Expected const String. Actual type {}",
getName(),
arguments[0]->getName());
arguments[0].type->getName());
if (!WhichDataType(arguments[1]).isUInt64())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type of second argument of function {}. Expected UInt64. Actual type {}",
getName(),
arguments[1]->getName());
if (arguments.size() == 3 && !isUnsignedInteger(arguments[2]))
if (arguments.size() == 3 && !isInteger(arguments[2].type))
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type of third argument of function {}. Expected const unsigned integer. Actual type {}",
getName(),
arguments[2]->getName());
arguments[2].type->getName());
}
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
auto dictionary = helper.getDictionary(arguments[0].column);
const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary);
return std::make_shared<DataTypeArray>(hierarchical_attribute.type);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
@ -1171,6 +1168,7 @@ private:
return result_type->createColumn();
auto dictionary = helper.getDictionary(arguments[0].column);
const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary);
size_t level = 0;
@ -1181,17 +1179,21 @@ private:
"Illegal type of third argument of function {}. Expected const unsigned integer.",
getName());
level = static_cast<size_t>(arguments[2].column->get64(0));
auto value = static_cast<Int64>(arguments[2].column->getInt(0));
if (value < 0)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type of third argument of function {}. Expected const unsigned integer.",
getName());
level = static_cast<size_t>(value);
}
if (!dictionary->hasHierarchy())
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Dictionary {} does not support hierarchy",
dictionary->getFullName());
auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[1].name};
auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type);
ColumnPtr res = dictionary->getDescendants(arguments[1].column, std::make_shared<DataTypeUInt64>(), level);
ColumnPtr result = dictionary->getDescendants(key_column_casted, hierarchical_attribute.type, level);
return res;
return result;
}
mutable FunctionDictHelper helper;

View File

@ -267,7 +267,7 @@ public:
*/
virtual Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const
{
throw Exception("Function " + getName() + " has no information about its monotonicity.", ErrorCodes::NOT_IMPLEMENTED);
throw Exception("Function " + getName() + " has no information about its monotonicity", ErrorCodes::NOT_IMPLEMENTED);
}
};
@ -452,7 +452,7 @@ public:
using Monotonicity = IFunctionBase::Monotonicity;
virtual Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const
{
throw Exception("Function " + getName() + " has no information about its monotonicity.", ErrorCodes::NOT_IMPLEMENTED);
throw Exception("Function " + getName() + " has no information about its monotonicity", ErrorCodes::NOT_IMPLEMENTED);
}
/// For non-variadic functions, return number of arguments; otherwise return zero (that should be ignored).

View File

@ -1,18 +1,29 @@
#pragma once
#include <type_traits>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnFunction.h>
#include <Columns/ColumnMap.h>
#include <Columns/IColumn.h>
#include <Common/Exception.h>
#include <Common/assert_cast.h>
#include <Common/typeid_cast.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeFunction.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnFunction.h>
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
#include <Functions/IFunction.h>
#include <DataTypes/DataTypeMap.h>
#include <Functions/FunctionHelpers.h>
#include <IO/WriteHelpers.h>
#include <Functions/IFunction.h>
#include <Interpreters/Context_fwd.h>
#include <IO/WriteHelpers.h>
namespace DB
{
@ -21,11 +32,38 @@ namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int LOGICAL_ERROR;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
template <typename T>
ColumnPtr getOffsetsPtr(const T & column)
{
if constexpr (std::is_same_v<T, ColumnArray>)
{
return column.getOffsetsPtr();
}
else // ColumnMap
{
return column.getNestedColumn().getOffsetsPtr();
}
}
template <typename T>
const IColumn::Offsets & getOffsets(const T & column)
{
if constexpr (std::is_same_v<T, ColumnArray>)
{
return column.getOffsets();
}
else // ColumnMap
{
return column.getNestedColumn().getOffsets();
}
}
/** Higher-order functions for arrays.
* These functions optionally apply a map (transform) to array (or multiple arrays of identical size) by lambda function,
* and return some result based on that transformation.
@ -60,29 +98,42 @@ public:
void getLambdaArgumentTypes(DataTypes & arguments) const override
{
if (arguments.empty())
throw Exception("Function " + getName() + " needs at least one argument; passed "
+ toString(arguments.size()) + ".",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} needs at least one argument, passed {}", getName(), arguments.size());
if (arguments.size() == 1)
throw Exception("Function " + getName() + " needs at least one array argument.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} needs at least one argument with data", getName());
DataTypes nested_types(arguments.size() - 1);
for (size_t i = 0; i < nested_types.size(); ++i)
if (arguments.size() > 2 && Impl::needOneArray())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} needs one argument with data", getName());
size_t nested_types_count = std::is_same_v<typename Impl::data_type, DataTypeMap> ? (arguments.size() - 1) * 2 : (arguments.size() - 1);
DataTypes nested_types(nested_types_count);
for (size_t i = 0; i < arguments.size() - 1; ++i)
{
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(&*arguments[i + 1]);
const auto * array_type = checkAndGetDataType<typename Impl::data_type>(&*arguments[i + 1]);
if (!array_type)
throw Exception("Argument " + toString(i + 2) + " of function " + getName() + " must be array. Found "
+ arguments[i + 1]->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if constexpr (std::is_same_v<typename Impl::data_type, DataTypeMap>)
{
nested_types[2 * i] = recursiveRemoveLowCardinality(array_type->getKeyType());
nested_types[2 * i + 1] = recursiveRemoveLowCardinality(array_type->getValueType());
}
else if constexpr (std::is_same_v<typename Impl::data_type, DataTypeArray>)
{
nested_types[i] = recursiveRemoveLowCardinality(array_type->getNestedType());
}
}
const DataTypeFunction * function_type = checkAndGetDataType<DataTypeFunction>(arguments[0].get());
if (!function_type || function_type->getArgumentTypes().size() != nested_types.size())
throw Exception("First argument for this overload of " + getName() + " must be a function with "
+ toString(nested_types.size()) + " arguments. Found "
+ arguments[0]->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"First argument for this overload of {} must be a function with {} arguments, found {} instead",
getName(), nested_types.size(), arguments[0]->getName());
arguments[0] = std::make_shared<DataTypeFunction>(nested_types);
}
@ -91,37 +142,39 @@ public:
{
size_t min_args = Impl::needExpression() ? 2 : 1;
if (arguments.size() < min_args)
throw Exception("Function " + getName() + " needs at least "
+ toString(min_args) + " argument; passed "
+ toString(arguments.size()) + ".",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} needs at least {} argument, passed {}",
getName(), min_args, arguments.size());
if (arguments.size() == 1)
if ((arguments.size() == 1) && std::is_same_v<typename Impl::data_type, DataTypeArray>)
{
const auto * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].type.get());
const auto * data_type = checkAndGetDataType<typename Impl::data_type>(arguments[0].type.get());
if (!array_type)
if (!data_type)
throw Exception("The only argument for function " + getName() + " must be array. Found "
+ arguments[0].type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+ arguments[0].type->getName() + " instead", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
DataTypePtr nested_type = array_type->getNestedType();
DataTypePtr nested_type = data_type->getNestedType();
if (Impl::needBoolean() && !WhichDataType(nested_type).isUInt8())
throw Exception("The only argument for function " + getName() + " must be array of UInt8. Found "
+ arguments[0].type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+ arguments[0].type->getName() + " instead", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if constexpr (std::is_same_v<typename Impl::data_type, DataTypeArray>)
return Impl::getReturnType(nested_type, nested_type);
else
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unreachable code reached");
}
else
{
if (arguments.size() > 2 && Impl::needOneArray())
throw Exception("Function " + getName() + " needs one array argument.",
throw Exception("Function " + getName() + " needs one argument with data",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
const auto * data_type_function = checkAndGetDataType<DataTypeFunction>(arguments[0].type.get());
if (!data_type_function)
throw Exception("First argument for function " + getName() + " must be a function.",
throw Exception("First argument for function " + getName() + " must be a function",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
/// The types of the remaining arguments are already checked in getLambdaArgumentTypes.
@ -131,9 +184,28 @@ public:
throw Exception("Expression for function " + getName() + " must return UInt8, found "
+ return_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const auto * first_array_type = checkAndGetDataType<DataTypeArray>(arguments[1].type.get());
static_assert(
std::is_same_v<typename Impl::data_type, DataTypeMap> ||
std::is_same_v<typename Impl::data_type, DataTypeArray>,
"unsupported type");
if (arguments.size() < 2)
{
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "{}", arguments.size());
}
const auto * first_array_type = checkAndGetDataType<typename Impl::data_type>(arguments[1].type.get());
if (!first_array_type)
throw DB::Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Unsupported type {}", arguments[1].type->getName());
if constexpr (std::is_same_v<typename Impl::data_type, DataTypeArray>)
return Impl::getReturnType(return_type, first_array_type->getNestedType());
if constexpr (std::is_same_v<typename Impl::data_type, DataTypeMap>)
return Impl::getReturnType(return_type, first_array_type->getKeyValueTypes());
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unreachable code reached");
}
}
@ -142,19 +214,26 @@ public:
if (arguments.size() == 1)
{
ColumnPtr column_array_ptr = arguments[0].column;
const auto * column_array = checkAndGetColumn<ColumnArray>(column_array_ptr.get());
const auto * column_array = checkAndGetColumn<typename Impl::column_type>(column_array_ptr.get());
if (!column_array)
{
const ColumnConst * column_const_array = checkAndGetColumnConst<ColumnArray>(column_array_ptr.get());
const ColumnConst * column_const_array = checkAndGetColumnConst<typename Impl::column_type>(column_array_ptr.get());
if (!column_const_array)
throw Exception("Expected array column, found " + column_array_ptr->getName(), ErrorCodes::ILLEGAL_COLUMN);
column_array_ptr = column_const_array->convertToFullColumn();
column_array = assert_cast<const ColumnArray *>(column_array_ptr.get());
column_array = assert_cast<const typename Impl::column_type *>(column_array_ptr.get());
}
if constexpr (std::is_same_v<typename Impl::column_type, ColumnMap>)
{
return Impl::execute(*column_array, column_array->getNestedColumn().getDataPtr());
}
else
{
return Impl::execute(*column_array, column_array->getDataPtr());
}
}
else
{
const auto & column_with_type_and_name = arguments[0];
@ -172,7 +251,7 @@ public:
ColumnPtr offsets_column;
ColumnPtr column_first_array_ptr;
const ColumnArray * column_first_array = nullptr;
const typename Impl::column_type * column_first_array = nullptr;
ColumnsWithTypeAndName arrays;
arrays.reserve(arguments.size() - 1);
@ -182,18 +261,18 @@ public:
const auto & array_with_type_and_name = arguments[i];
ColumnPtr column_array_ptr = array_with_type_and_name.column;
const auto * column_array = checkAndGetColumn<ColumnArray>(column_array_ptr.get());
const auto * column_array = checkAndGetColumn<typename Impl::column_type>(column_array_ptr.get());
const DataTypePtr & array_type_ptr = array_with_type_and_name.type;
const auto * array_type = checkAndGetDataType<DataTypeArray>(array_type_ptr.get());
const auto * array_type = checkAndGetDataType<typename Impl::data_type>(array_type_ptr.get());
if (!column_array)
{
const ColumnConst * column_const_array = checkAndGetColumnConst<ColumnArray>(column_array_ptr.get());
const ColumnConst * column_const_array = checkAndGetColumnConst<typename Impl::column_type>(column_array_ptr.get());
if (!column_const_array)
throw Exception("Expected array column, found " + column_array_ptr->getName(), ErrorCodes::ILLEGAL_COLUMN);
column_array_ptr = recursiveRemoveLowCardinality(column_const_array->convertToFullColumn());
column_array = checkAndGetColumn<ColumnArray>(column_array_ptr.get());
column_array = checkAndGetColumn<typename Impl::column_type>(column_array_ptr.get());
}
if (!array_type)
@ -201,13 +280,13 @@ public:
if (!offsets_column)
{
offsets_column = column_array->getOffsetsPtr();
offsets_column = getOffsetsPtr(*column_array);
}
else
{
/// The first condition is optimization: do not compare data if the pointers are equal.
if (column_array->getOffsetsPtr() != offsets_column
&& column_array->getOffsets() != typeid_cast<const ColumnArray::ColumnOffsets &>(*offsets_column).getData())
if (getOffsetsPtr(*column_array) != offsets_column
&& getOffsets(*column_array) != typeid_cast<const ColumnArray::ColumnOffsets &>(*offsets_column).getData())
throw Exception("Arrays passed to " + getName() + " must have equal size", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH);
}
@ -217,13 +296,23 @@ public:
column_first_array = column_array;
}
if constexpr (std::is_same_v<DataTypeMap, typename Impl::data_type>)
{
arrays.emplace_back(ColumnWithTypeAndName(
column_array->getNestedData().getColumnPtr(0), recursiveRemoveLowCardinality(array_type->getKeyType()), array_with_type_and_name.name+".key"));
arrays.emplace_back(ColumnWithTypeAndName(
column_array->getNestedData().getColumnPtr(1), recursiveRemoveLowCardinality(array_type->getValueType()), array_with_type_and_name.name+".value"));
}
else
{
arrays.emplace_back(ColumnWithTypeAndName(column_array->getDataPtr(),
recursiveRemoveLowCardinality(array_type->getNestedType()),
array_with_type_and_name.name));
}
}
/// Put all the necessary columns multiplied by the sizes of arrays into the columns.
auto replicated_column_function_ptr = IColumn::mutate(column_function->replicate(column_first_array->getOffsets()));
auto replicated_column_function_ptr = IColumn::mutate(column_function->replicate(getOffsets(*column_first_array)));
auto * replicated_column_function = typeid_cast<ColumnFunction *>(replicated_column_function_ptr.get());
replicated_column_function->appendArguments(arrays);

View File

@ -1,12 +1,18 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnDecimal.h>
#include "FunctionArrayMapped.h"
#include <Functions/FunctionFactory.h>
#include <base/defines.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include "FunctionArrayMapped.h"
namespace DB
{
@ -83,6 +89,9 @@ using ArrayAggregateResult = typename ArrayAggregateResultImpl<ArrayElement, ope
template<AggregateOperation aggregate_operation>
struct ArrayAggregateImpl
{
using column_type = ColumnArray;
using data_type = DataTypeArray;
static bool needBoolean() { return false; }
static bool needExpression() { return false; }
static bool needOneArray() { return false; }

View File

@ -1,8 +1,8 @@
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnsNumber.h>
#include "FunctionArrayMapped.h"
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include "FunctionArrayMapped.h"
namespace DB
{
@ -16,6 +16,9 @@ namespace ErrorCodes
*/
struct ArrayAllImpl
{
using column_type = ColumnArray;
using data_type = DataTypeArray;
static bool needBoolean() { return true; }
static bool needExpression() { return false; }
static bool needOneArray() { return false; }

View File

@ -1,10 +1,13 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnsNumber.h>
#include <Common/HashTable/HashTable.h>
#include <Functions/array/FunctionArrayMapped.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <Functions/array/FunctionArrayMapped.h>
namespace DB
@ -16,6 +19,9 @@ namespace ErrorCodes
struct ArrayCompactImpl
{
using column_type = ColumnArray;
using data_type = DataTypeArray;
static bool needBoolean() { return false; }
static bool needExpression() { return false; }
static bool needOneArray() { return false; }

View File

@ -1,8 +1,9 @@
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnsNumber.h>
#include "FunctionArrayMapped.h"
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include "FunctionArrayMapped.h"
namespace DB
{
@ -16,6 +17,9 @@ namespace ErrorCodes
*/
struct ArrayCountImpl
{
using column_type = ColumnArray;
using data_type = DataTypeArray;
static bool needBoolean() { return true; }
static bool needExpression() { return false; }
static bool needOneArray() { return false; }

View File

@ -1,10 +1,11 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnDecimal.h>
#include "FunctionArrayMapped.h"
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include "FunctionArrayMapped.h"
namespace DB
{
@ -17,6 +18,9 @@ namespace ErrorCodes
struct ArrayCumSumImpl
{
using column_type = ColumnArray;
using data_type = DataTypeArray;
static bool needBoolean() { return false; }
static bool needExpression() { return false; }
static bool needOneArray() { return false; }

View File

@ -1,10 +1,10 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnDecimal.h>
#include "FunctionArrayMapped.h"
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include "FunctionArrayMapped.h"
namespace DB
{
@ -19,6 +19,9 @@ namespace ErrorCodes
*/
struct ArrayCumSumNonNegativeImpl
{
using column_type = ColumnArray;
using data_type = DataTypeArray;
static bool needBoolean() { return false; }
static bool needExpression() { return false; }
static bool needOneArray() { return false; }

View File

@ -1,10 +1,11 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnDecimal.h>
#include "FunctionArrayMapped.h"
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include "FunctionArrayMapped.h"
namespace DB
{
@ -20,6 +21,9 @@ namespace ErrorCodes
*/
struct ArrayDifferenceImpl
{
using column_type = ColumnArray;
using data_type = DataTypeArray;
static bool needBoolean() { return false; }
static bool needExpression() { return false; }
static bool needOneArray() { return false; }

View File

@ -1,8 +1,9 @@
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnsNumber.h>
#include "FunctionArrayMapped.h"
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include "FunctionArrayMapped.h"
namespace DB
{
@ -16,6 +17,9 @@ namespace ErrorCodes
*/
struct ArrayExistsImpl
{
using column_type = ColumnArray;
using data_type = DataTypeArray;
static bool needBoolean() { return true; }
static bool needExpression() { return false; }
static bool needOneArray() { return false; }

View File

@ -1,8 +1,9 @@
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnsNumber.h>
#include "FunctionArrayMapped.h"
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include "FunctionArrayMapped.h"
namespace DB
{
@ -19,6 +20,9 @@ namespace ErrorCodes
template <bool reverse>
struct ArrayFillImpl
{
using column_type = ColumnArray;
using data_type = DataTypeArray;
static bool needBoolean() { return true; }
static bool needExpression() { return true; }
static bool needOneArray() { return false; }

View File

@ -1,8 +1,9 @@
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnsNumber.h>
#include "FunctionArrayMapped.h"
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include "FunctionArrayMapped.h"
namespace DB
{
@ -15,6 +16,9 @@ namespace ErrorCodes
*/
struct ArrayFilterImpl
{
using column_type = ColumnArray;
using data_type = DataTypeArray;
static bool needBoolean() { return true; }
static bool needExpression() { return true; }
static bool needOneArray() { return false; }

View File

@ -1,8 +1,9 @@
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnsNumber.h>
#include "FunctionArrayMapped.h"
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include "FunctionArrayMapped.h"
namespace DB
{
@ -20,6 +21,9 @@ enum class ArrayFirstLastStrategy
template <ArrayFirstLastStrategy strategy>
struct ArrayFirstLastImpl
{
using column_type = ColumnArray;
using data_type = DataTypeArray;
static bool needBoolean() { return false; }
static bool needExpression() { return true; }
static bool needOneArray() { return false; }

View File

@ -1,8 +1,9 @@
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnsNumber.h>
#include "FunctionArrayMapped.h"
#include <Functions/FunctionFactory.h>
#include "FunctionArrayMapped.h"
namespace DB
{
@ -20,6 +21,9 @@ enum class ArrayFirstLastIndexStrategy
template <ArrayFirstLastIndexStrategy strategy>
struct ArrayFirstLastIndexImpl
{
using column_type = ColumnArray;
using data_type = DataTypeArray;
static bool needBoolean() { return false; }
static bool needExpression() { return true; }
static bool needOneArray() { return false; }

View File

@ -1,6 +1,7 @@
#include "FunctionArrayMapped.h"
#include <Functions/FunctionFactory.h>
#include "FunctionArrayMapped.h"
namespace DB
{
@ -9,6 +10,9 @@ namespace DB
*/
struct ArrayMapImpl
{
using column_type = ColumnArray;
using data_type = DataTypeArray;
/// true if the expression (for an overload of f(expression, arrays)) or an array (for f(array)) should be boolean.
static bool needBoolean() { return false; }
/// true if the f(array) overload is unavailable.

View File

@ -1,8 +1,8 @@
#include "FunctionArrayMapped.h"
#include <base/sort.h>
#include <Functions/FunctionFactory.h>
namespace DB
{
@ -11,6 +11,9 @@ namespace DB
template <bool positive>
struct ArraySortImpl
{
using column_type = ColumnArray;
using data_type = DataTypeArray;
static bool needBoolean() { return false; }
static bool needExpression() { return false; }
static bool needOneArray() { return false; }

View File

@ -1,8 +1,9 @@
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnsNumber.h>
#include "FunctionArrayMapped.h"
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include "FunctionArrayMapped.h"
namespace DB
{
@ -14,6 +15,9 @@ namespace ErrorCodes
template <bool reverse>
struct ArraySplitImpl
{
using column_type = ColumnArray;
using data_type = DataTypeArray;
static bool needBoolean() { return true; }
static bool needExpression() { return true; }
static bool needOneArray() { return false; }

View File

@ -518,6 +518,115 @@ public:
}
};
class FunctionMapUpdate : public IFunction
{
public:
static constexpr auto name = "mapUpdate";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionMapUpdate>(); }
String getName() const override
{
return name;
}
size_t getNumberOfArguments() const override { return 2; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.size() != 2)
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
+ toString(arguments.size()) + ", should be 2",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
const DataTypeMap * left = checkAndGetDataType<DataTypeMap>(arguments[0].type.get());
const DataTypeMap * right = checkAndGetDataType<DataTypeMap>(arguments[1].type.get());
if (!left || !right)
throw Exception{"The two arguments for function " + getName() + " must be both Map type",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
if (!left->getKeyType()->equals(*right->getKeyType()) || !left->getValueType()->equals(*right->getValueType()))
throw Exception{"The Key And Value type of Map for function " + getName() + " must be the same",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
return std::make_shared<DataTypeMap>(left->getKeyType(), left->getValueType());
}
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
const ColumnMap * col_map_left = typeid_cast<const ColumnMap *>(arguments[0].column.get());
const auto * col_const_map_left = checkAndGetColumnConst<ColumnMap>(arguments[0].column.get());
if (col_const_map_left)
col_map_left = typeid_cast<const ColumnMap *>(&col_const_map_left->getDataColumn());
if (!col_map_left)
return nullptr;
const ColumnMap * col_map_right = typeid_cast<const ColumnMap *>(arguments[1].column.get());
const auto * col_const_map_right = checkAndGetColumnConst<ColumnMap>(arguments[1].column.get());
if (col_const_map_right)
col_map_right = typeid_cast<const ColumnMap *>(&col_const_map_right->getDataColumn());
if (!col_map_right)
return nullptr;
const auto & nested_column_left = col_map_left->getNestedColumn();
const auto & keys_data_left = col_map_left->getNestedData().getColumn(0);
const auto & values_data_left = col_map_left->getNestedData().getColumn(1);
const auto & offsets_left = nested_column_left.getOffsets();
const auto & nested_column_right = col_map_right->getNestedColumn();
const auto & keys_data_right = col_map_right->getNestedData().getColumn(0);
const auto & values_data_right = col_map_right->getNestedData().getColumn(1);
const auto & offsets_right = nested_column_right.getOffsets();
const auto & result_type_map = static_cast<const DataTypeMap &>(*result_type);
const DataTypePtr & key_type = result_type_map.getKeyType();
const DataTypePtr & value_type = result_type_map.getValueType();
MutableColumnPtr keys_data = key_type->createColumn();
MutableColumnPtr values_data = value_type->createColumn();
MutableColumnPtr offsets = DataTypeNumber<IColumn::Offset>().createColumn();
IColumn::Offset current_offset = 0;
for (size_t idx = 0; idx < input_rows_count; ++idx)
{
for (size_t i = offsets_left[idx - 1]; i < offsets_left[idx]; ++i)
{
bool matched = false;
auto key = keys_data_left.getDataAt(i);
for (size_t j = offsets_right[idx - 1]; j < offsets_right[idx]; ++j)
{
if (keys_data_right.getDataAt(j).toString() == key.toString())
{
matched = true;
break;
}
}
if (!matched)
{
keys_data->insertFrom(keys_data_left, i);
values_data->insertFrom(values_data_left, i);
++current_offset;
}
}
for (size_t j = offsets_right[idx - 1]; j < offsets_right[idx]; ++j)
{
keys_data->insertFrom(keys_data_right, j);
values_data->insertFrom(values_data_right, j);
++current_offset;
}
offsets->insert(current_offset);
}
auto nested_column = ColumnArray::create(
ColumnTuple::create(Columns{std::move(keys_data), std::move(values_data)}),
std::move(offsets));
return ColumnMap::create(nested_column);
}
};
}
void registerFunctionsMap(FunctionFactory & factory)
@ -528,6 +637,7 @@ void registerFunctionsMap(FunctionFactory & factory)
factory.registerFunction<FunctionMapValues>();
factory.registerFunction<FunctionMapContainsKeyLike>();
factory.registerFunction<FunctionExtractKeyLike>();
factory.registerFunction<FunctionMapUpdate>();
}
}

144
src/Functions/mapFilter.cpp Normal file
View File

@ -0,0 +1,144 @@
#include <Columns/ColumnMap.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeTuple.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/array/FunctionArrayMapped.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
/** Higher-order functions for map.
* These functions optionally apply a map by lambda function,
* and return some result based on that transformation.
*/
/** mapFilter((k, v) -> predicate, map) - leave in the map only the kv elements for which the expression is true.
*/
struct MapFilterImpl
{
using data_type = DataTypeMap;
using column_type = ColumnMap;
static constexpr auto name = "mapFilter";
static bool needBoolean() { return true; }
static bool needExpression() { return true; }
static bool needOneArray() { return true; }
static DataTypePtr getReturnType(const DataTypePtr & /*expression_return*/, const DataTypes & elems)
{
return std::make_shared<DataTypeMap>(elems);
}
/// If there are several arrays, the first one is passed here.
static ColumnPtr execute(const ColumnMap & map_column, ColumnPtr mapped)
{
const ColumnUInt8 * column_filter = typeid_cast<const ColumnUInt8 *>(&*mapped);
if (!column_filter)
{
const auto * column_filter_const = checkAndGetColumnConst<ColumnUInt8>(&*mapped);
if (!column_filter_const)
throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN);
if (column_filter_const->getValue<UInt8>())
return map_column.clone();
else
{
const auto * column_array = typeid_cast<const ColumnArray *>(map_column.getNestedColumnPtr().get());
const auto * column_tuple = typeid_cast<const ColumnTuple *>(column_array->getDataPtr().get());
ColumnPtr keys = column_tuple->getColumnPtr(0)->cloneEmpty();
ColumnPtr values = column_tuple->getColumnPtr(1)->cloneEmpty();
return ColumnMap::create(keys, values, ColumnArray::ColumnOffsets::create(map_column.size(), 0));
}
}
const IColumn::Filter & filter = column_filter->getData();
ColumnPtr filtered = map_column.getNestedColumn().getData().filter(filter, -1);
const IColumn::Offsets & in_offsets = map_column.getNestedColumn().getOffsets();
auto column_offsets = ColumnArray::ColumnOffsets::create(in_offsets.size());
IColumn::Offsets & out_offsets = column_offsets->getData();
size_t in_pos = 0;
size_t out_pos = 0;
for (size_t i = 0; i < in_offsets.size(); ++i)
{
for (; in_pos < in_offsets[i]; ++in_pos)
{
if (filter[in_pos])
++out_pos;
}
out_offsets[i] = out_pos;
}
return ColumnMap::create(ColumnArray::create(filtered, std::move(column_offsets)));
}
};
/** mapApply((k,v) -> expression, map) - apply the expression to the map.
*/
struct MapApplyImpl
{
using data_type = DataTypeMap;
using column_type = ColumnMap;
static constexpr auto name = "mapApply";
/// true if the expression (for an overload of f(expression, maps)) or a map (for f(map)) should be boolean.
static bool needBoolean() { return false; }
static bool needExpression() { return true; }
static bool needOneArray() { return true; }
static DataTypePtr getReturnType(const DataTypePtr & expression_return, const DataTypes & /*elems*/)
{
const auto * tuple_types = typeid_cast<const DataTypeTuple *>(expression_return.get());
if (!tuple_types)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Expected return type is tuple, got {}", expression_return->getName());
if (tuple_types->getElements().size() != 2)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Expected 2 columns as map's key and value, but found {}", tuple_types->getElements().size());
return std::make_shared<DataTypeMap>(tuple_types->getElements());
}
static ColumnPtr execute(const ColumnMap & map, ColumnPtr mapped)
{
const auto * column_tuple = checkAndGetColumn<ColumnTuple>(mapped.get());
if (!column_tuple)
{
const ColumnConst * column_const_tuple = checkAndGetColumnConst<ColumnTuple>(mapped.get());
if (!column_const_tuple)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Expected tuple column, found {}", mapped->getName());
auto cols = convertConstTupleToConstantElements(*column_const_tuple);
return ColumnMap::create(cols[0]->convertToFullColumnIfConst(), cols[1]->convertToFullColumnIfConst(), map.getNestedColumn().getOffsetsPtr());
}
return ColumnMap::create(column_tuple->getColumnPtr(0), column_tuple->getColumnPtr(1),
map.getNestedColumn().getOffsetsPtr());
}
};
void registerFunctionMapApply(FunctionFactory & factory)
{
factory.registerFunction<FunctionArrayMapped<MapFilterImpl, MapFilterImpl>>();
factory.registerFunction<FunctionArrayMapped<MapApplyImpl, MapApplyImpl>>();
}
}

View File

@ -18,6 +18,7 @@ void registerFunctionsArraySort(FunctionFactory & factory);
void registerFunctionArrayCumSum(FunctionFactory & factory);
void registerFunctionArrayCumSumNonNegative(FunctionFactory & factory);
void registerFunctionArrayDifference(FunctionFactory & factory);
void registerFunctionMapApply(FunctionFactory & factory);
void registerFunctionsHigherOrder(FunctionFactory & factory)
{
@ -36,6 +37,7 @@ void registerFunctionsHigherOrder(FunctionFactory & factory)
registerFunctionArrayCumSum(factory);
registerFunctionArrayCumSumNonNegative(factory);
registerFunctionArrayDifference(factory);
registerFunctionMapApply(factory);
}
}

View File

@ -11,7 +11,6 @@
#include "HadoopSnappyReadBuffer.h"
namespace DB
{
namespace ErrorCodes
@ -32,11 +31,11 @@ inline bool HadoopSnappyDecoder::checkAvailIn(size_t avail_in, int min)
inline void HadoopSnappyDecoder::copyToBuffer(size_t * avail_in, const char ** next_in)
{
assert(*avail_in <= sizeof(buffer));
assert(*avail_in + buffer_length <= sizeof(buffer));
memcpy(buffer, *next_in, *avail_in);
memcpy(buffer + buffer_length, *next_in, *avail_in);
buffer_length = *avail_in;
buffer_length += *avail_in;
*next_in += *avail_in;
*avail_in = 0;
}
@ -78,14 +77,21 @@ inline HadoopSnappyDecoder::Status HadoopSnappyDecoder::readLength(size_t * avai
inline HadoopSnappyDecoder::Status HadoopSnappyDecoder::readBlockLength(size_t * avail_in, const char ** next_in)
{
if (block_length < 0)
{
return readLength(avail_in, next_in, &block_length);
}
return Status::OK;
}
inline HadoopSnappyDecoder::Status HadoopSnappyDecoder::readCompressedLength(size_t * avail_in, const char ** next_in)
{
if (compressed_length < 0)
return readLength(avail_in, next_in, &compressed_length);
{
auto status = readLength(avail_in, next_in, &compressed_length);
if (unlikely(compressed_length > 0 && static_cast<size_t>(compressed_length) > sizeof(buffer)))
throw Exception(ErrorCodes::SNAPPY_UNCOMPRESS_FAILED, "Too large snappy compressed block. buffer size: {}, compressed block size: {}", sizeof(buffer), compressed_length);
return status;
}
return Status::OK;
}
@ -111,7 +117,6 @@ HadoopSnappyDecoder::readCompressedData(size_t * avail_in, const char ** next_in
{
compressed = const_cast<char *>(*next_in);
}
size_t uncompressed_length = *avail_out;
auto status = snappy_uncompress(compressed, compressed_length, *next_out, &uncompressed_length);
if (status != SNAPPY_OK)
@ -154,7 +159,9 @@ HadoopSnappyDecoder::Status HadoopSnappyDecoder::readBlock(size_t * avail_in, co
return status;
}
if (total_uncompressed_length != block_length)
{
return Status::INVALID_INPUT;
}
return Status::OK;
}

View File

@ -115,9 +115,16 @@ void WriteBufferFromS3::allocateBuffer()
}
WriteBufferFromS3::~WriteBufferFromS3()
{
try
{
finalize();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
void WriteBufferFromS3::preFinalize()
{

View File

@ -38,6 +38,11 @@ int main()
return 1;
}
}
if (uncompress(256) != output)
{
std::cout << "test hadoop snappy read buffer failed, buf_size:" << 256 << std::endl;
return 1;
}
std::cout << "test hadoop snappy read buffer success" << std::endl;
return 0;
}

Some files were not shown because too many files have changed in this diff Show More