Merge branch 'master' into kssenii-patch-3

This commit is contained in:
mergify[bot] 2022-02-14 08:08:59 +00:00 committed by GitHub
commit 55ee701d07
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
139 changed files with 2686 additions and 1195 deletions

View File

@ -12,6 +12,7 @@ BraceWrapping:
AfterUnion: true
BeforeCatch: true
BeforeElse: true
BeforeLambdaBody: true
IndentBraces: false
BreakConstructorInitializersBeforeComma: false
Cpp11BracedListStyle: true

73
.github/workflows/nightly.yml vendored Normal file
View File

@ -0,0 +1,73 @@
name: NightlyBuilds
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
"on":
schedule:
- cron: '0 0 * * *'
jobs:
DockerHubPushAarch64:
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_images_check.py --suffix aarch64 --all
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images_aarch64
path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json
DockerHubPushAmd64:
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_images_check.py --suffix amd64 --all
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images_amd64
path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json
DockerHubPush:
needs: [DockerHubPushAmd64, DockerHubPushAarch64]
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Download changed aarch64 images
uses: actions/download-artifact@v2
with:
name: changed_images_aarch64
path: ${{ runner.temp }}
- name: Download changed amd64 images
uses: actions/download-artifact@v2
with:
name: changed_images_amd64
path: ${{ runner.temp }}
- name: Images check
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/changed_images.json

View File

@ -1,27 +0,0 @@
# This is the configuration file with settings for Potato.
# Potato is an internal Yandex technology that allows us to sync internal [Yandex.Tracker](https://yandex.com/tracker/) and GitHub.
# For all PRs where documentation is needed, just add a 'pr-feature' label and we will include it into documentation sprints.
# The project name.
name: clickhouse
# Object handlers defines which handlers we use.
handlers:
# The handler for creating an Yandex.Tracker issue.
- name: issue-create
params:
triggers:
# The trigger for creating the Yandex.Tracker issue. When the specified event occurs, it transfers PR data to Yandex.Tracker.
github:pullRequest:labeled:
data:
# The Yandex.Tracker queue to create the issue in. Each issue in Tracker belongs to one of the project queues.
queue: CLICKHOUSEDOCS
# The issue title.
summary: '[Potato] Pull Request #{{pullRequest.number}}'
# The issue description.
description: >
{{pullRequest.description}}
Ссылка на Pull Request: {{pullRequest.webUrl}}
# The condition for creating the Yandex.Tracker issue.
condition: eventPayload.labels.filter(label => ['pr-feature'].includes(label.name)).length

View File

@ -127,11 +127,6 @@ endif()
if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
add_definitions(-DOS_MACOSX)
if(CMAKE_SYSTEM_PROCESSOR MATCHES arm)
add_definitions(-DIOS_CROSS_COMPILE -DROCKSDB_LITE)
# no debug info for IOS, that will make our library big
add_definitions(-DNDEBUG)
endif()
elseif(CMAKE_SYSTEM_NAME MATCHES "Linux")
add_definitions(-DOS_LINUX)
elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS")

View File

@ -1,15 +0,0 @@
version: "2"
services:
builder:
image: clickhouse/clickhouse-builder
build: docker/builder
client:
image: clickhouse/clickhouse-client
build: docker/client
command: ['--host', 'server']
server:
image: clickhouse/clickhouse-server
build: docker/server
ports:
- 8123:8123

View File

@ -32,6 +32,7 @@
"dependent": []
},
"docker/test/pvs": {
"only_amd64": true,
"name": "clickhouse/pvs-test",
"dependent": []
},
@ -72,6 +73,7 @@
"dependent": []
},
"docker/test/integration/runner": {
"only_amd64": true,
"name": "clickhouse/integration-tests-runner",
"dependent": []
},
@ -124,6 +126,7 @@
"dependent": []
},
"docker/test/integration/kerberos_kdc": {
"only_amd64": true,
"name": "clickhouse/kerberos-kdc",
"dependent": []
},
@ -137,6 +140,7 @@
]
},
"docker/test/integration/kerberized_hadoop": {
"only_amd64": true,
"name": "clickhouse/kerberized-hadoop",
"dependent": []
},

View File

@ -185,15 +185,14 @@ handle SIGUSR2 nostop noprint pass
handle SIG$RTMIN nostop noprint pass
info signals
continue
gcore
backtrace full
info locals
thread apply all backtrace full
info registers
disassemble /s
up
info locals
disassemble /s
up
info locals
disassemble /s
p \"done\"
detach
@ -314,6 +313,11 @@ quit
|| echo "Fuzzer failed ($fuzzer_exit_code). See the logs." ; } \
| tail -1 > description.txt
fi
if test -f core.*; then
pigz core.*
mv core.*.gz core.gz
fi
}
case "$stage" in
@ -345,6 +349,10 @@ case "$stage" in
time fuzz
;&
"report")
CORE_LINK=''
if [ -f core.gz ]; then
CORE_LINK='<a href="core.gz">core.gz</a>'
fi
cat > report.html <<EOF ||:
<!DOCTYPE html>
<html lang="en">
@ -386,6 +394,7 @@ th { cursor: pointer; }
<a href="fuzzer.log">fuzzer.log</a>
<a href="server.log">server.log</a>
<a href="main.log">main.log</a>
${CORE_LINK}
</p>
<table>
<tr><th>Test name</th><th>Test status</th><th>Description</th></tr>

View File

@ -15,9 +15,10 @@ RUN curl -o krb5-libs-1.10.3-65.el6.x86_64.rpm ftp://ftp.pbone.net/mirror/vault.
rm -fr *.rpm
RUN cd /tmp && \
curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \
tar xzf commons-daemon-1.0.15-src.tar.gz && \
cd commons-daemon-1.0.15-src/src/native/unix && \
./configure && \
make && \
cp ./jsvc /usr/local/hadoop/sbin
curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \
tar xzf commons-daemon-1.0.15-src.tar.gz && \
cd commons-daemon-1.0.15-src/src/native/unix && \
./configure && \
make && \
cp ./jsvc /usr/local/hadoop-2.7.0/sbin && \
[ -e /usr/local/hadoop ] || ln -s ./hadoop-2.7.0 /usr/local/hadoop

View File

@ -58,9 +58,7 @@ RUN apt-get update \
RUN dockerd --version; docker --version
ARG TARGETARCH
# FIXME: psycopg2-binary is not available for aarch64, we skip it for now
RUN test x$TARGETARCH = xarm64 || ( python3 -m pip install \
RUN python3 -m pip install \
PyMySQL \
aerospike==4.0.0 \
avro==1.10.2 \
@ -90,7 +88,7 @@ RUN test x$TARGETARCH = xarm64 || ( python3 -m pip install \
urllib3 \
requests-kerberos \
pyhdfs \
azure-storage-blob )
azure-storage-blob
COPY modprobe.sh /usr/local/bin/modprobe
COPY dockerd-entrypoint.sh /usr/local/bin/

View File

@ -4,7 +4,7 @@ services:
kerberizedhdfs1:
cap_add:
- DAC_READ_SEARCH
image: clickhouse/kerberized-hadoop
image: clickhouse/kerberized-hadoop:${DOCKER_KERBERIZED_HADOOP_TAG:-latest}
hostname: kerberizedhdfs1
restart: always
volumes:

View File

@ -45,6 +45,7 @@ export DOCKER_MYSQL_JS_CLIENT_TAG=${DOCKER_MYSQL_JS_CLIENT_TAG:=latest}
export DOCKER_MYSQL_PHP_CLIENT_TAG=${DOCKER_MYSQL_PHP_CLIENT_TAG:=latest}
export DOCKER_POSTGRESQL_JAVA_CLIENT_TAG=${DOCKER_POSTGRESQL_JAVA_CLIENT_TAG:=latest}
export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest}
export DOCKER_KERBERIZED_HADOOP_TAG=${DOCKER_KERBERIZED_HADOOP_TAG:=latest}
cd /ClickHouse/tests/integration
exec "$@"

View File

@ -1,5 +1,5 @@
# docker build -t clickhouse/performance-comparison .
FROM ubuntu:18.04
FROM ubuntu:20.04
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"

View File

@ -4,11 +4,7 @@
ARG FROM_TAG=latest
FROM clickhouse/binary-builder:$FROM_TAG
# PVS studio doesn't support aarch64/arm64, so there is a check for it everywhere
# We'll produce an empty image for arm64
ARG TARGETARCH
RUN test x$TARGETARCH = xarm64 || ( apt-get update --yes \
RUN apt-get update --yes \
&& apt-get install \
bash \
wget \
@ -21,7 +17,7 @@ RUN test x$TARGETARCH = xarm64 || ( apt-get update --yes \
libprotoc-dev \
libgrpc++-dev \
libc-ares-dev \
--yes --no-install-recommends )
--yes --no-install-recommends
#RUN wget -nv -O - http://files.viva64.com/etc/pubkey.txt | sudo apt-key add -
#RUN sudo wget -nv -O /etc/apt/sources.list.d/viva64.list http://files.viva64.com/etc/viva64.list
@ -33,7 +29,7 @@ RUN test x$TARGETARCH = xarm64 || ( apt-get update --yes \
ENV PKG_VERSION="pvs-studio-latest"
RUN test x$TARGETARCH = xarm64 || ( set -x \
RUN set -x \
&& export PUBKEY_HASHSUM="ad369a2e9d8b8c30f5a9f2eb131121739b79c78e03fef0f016ea51871a5f78cd4e6257b270dca0ac3be3d1f19d885516" \
&& wget -nv https://files.viva64.com/etc/pubkey.txt -O /tmp/pubkey.txt \
&& echo "${PUBKEY_HASHSUM} /tmp/pubkey.txt" | sha384sum -c \
@ -41,7 +37,7 @@ RUN test x$TARGETARCH = xarm64 || ( set -x \
&& wget -nv "https://files.viva64.com/${PKG_VERSION}.deb" \
&& { debsig-verify ${PKG_VERSION}.deb \
|| echo "WARNING: Some file was just downloaded from the internet without any validation and we are installing it into the system"; } \
&& dpkg -i "${PKG_VERSION}.deb" )
&& dpkg -i "${PKG_VERSION}.deb"
ENV CCACHE_DIR=/test_output/ccache

View File

@ -148,14 +148,12 @@ info signals
continue
gcore
backtrace full
info locals
thread apply all backtrace full
info registers
disassemble /s
up
info locals
disassemble /s
up
info locals
disassemble /s
p \"done\"
detach
@ -269,5 +267,5 @@ clickhouse-local --structure "test String, res String" -q "SELECT 'failure', tes
# Default filename is 'core.PROCESS_ID'
for core in core.*; do
pigz $core
mv $core.gz /output/
mv $core.gz /test_output/
done

View File

@ -43,24 +43,27 @@ RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.1 docker==5.0.0
ENV DOCKER_CHANNEL stable
ENV DOCKER_VERSION 20.10.6
RUN set -eux; \
\
# this "case" statement is generated via "update.sh"
\
if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/x86_64/docker-${DOCKER_VERSION}.tgz"; then \
echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${x86_64}'"; \
exit 1; \
fi; \
\
tar --extract \
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH
# Install docker
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) rarch=x86_64 ;; \
arm64) rarch=aarch64 ;; \
esac \
&& set -eux \
&& if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/${rarch}/docker-${DOCKER_VERSION}.tgz"; then \
echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${rarch}'" \
&& exit 1; \
fi \
&& tar --extract \
--file docker.tgz \
--strip-components 1 \
--directory /usr/local/bin/ \
; \
rm docker.tgz; \
\
dockerd --version; \
docker --version
&& rm docker.tgz \
&& dockerd --version \
&& docker --version
COPY modprobe.sh /usr/local/bin/modprobe
COPY dockerd-entrypoint.sh /usr/local/bin/

View File

@ -886,3 +886,12 @@ S3 disk can be configured as `main` or `cold` storage:
```
In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule.
## Virtual Columns {#virtual-columns}
- `_part` — Name of a part.
- `_part_index` — Sequential index of the part in the query result.
- `_partition_id` — Name of a partition.
- `_part_uuid` — Unique part identifier (if enabled MergeTree setting `assign_part_uuids`).
- `_partition_value` — Values (a tuple) of a `partition by` expression.
- `_sample_factor` — Sample factor (from the query).

View File

@ -209,6 +209,8 @@ When querying a `Distributed` table, `SELECT` queries are sent to all shards and
When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
To learn more about how distibuted `in` and `global in` queries are processed, refer to [this](../../../sql-reference/operators/in.md#select-distributed-subqueries) documentation.
## Virtual Columns {#virtual-columns}
- `_shard_num` — Contains the `shard_num` value from the table `system.clusters`. Type: [UInt32](../../../sql-reference/data-types/int-uint.md).

View File

@ -7,18 +7,29 @@ toc_title: URL
Queries data to/from a remote HTTP/HTTPS server. This engine is similar to the [File](../../../engines/table-engines/special/file.md) engine.
Syntax: `URL(URL, Format)`
Syntax: `URL(URL [,Format] [,CompressionMethod])`
- The `URL` parameter must conform to the structure of a Uniform Resource Locator. The specified URL must point to a server that uses HTTP or HTTPS. This does not require any additional headers for getting a response from the server.
- The `Format` must be one that ClickHouse can use in `SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see [Formats](../../../interfaces/formats.md#formats).
- `CompressionMethod` indicates that whether the HTTP body should be compressed. If the compression is enabled, the HTTP packets sent by the URL engine contain 'Content-Encoding' header to indicate which compression method is used.
To enable compression, please first make sure the remote HTTP endpoint indicated by the `URL` parameter supports corresponding compression algorithm.
The supported `CompressionMethod` should be one of following:
- gzip or gz
- deflate
- brotli or br
- lzma or xz
- zstd or zst
- lz4
- bz2
- snappy
- none
## Usage {#using-the-engine-in-the-clickhouse-server}
The `format` must be one that ClickHouse can use in
`SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see
[Formats](../../../interfaces/formats.md#formats).
The `URL` must conform to the structure of a Uniform Resource Locator. The specified URL must point to a server
that uses HTTP or HTTPS. This does not require any
additional headers for getting a response from the server.
`INSERT` and `SELECT` queries are transformed to `POST` and `GET` requests,
respectively. For processing `POST` requests, the remote server must support
[Chunked transfer encoding](https://en.wikipedia.org/wiki/Chunked_transfer_encoding).

View File

@ -2304,7 +2304,7 @@ Possible values:
- 1 — Enabled.
- 0 — Disabled.
Default value: `0`.
Default value: `1`.
## output_format_parallel_formatting {#output-format-parallel-formatting}
@ -2315,7 +2315,7 @@ Possible values:
- 1 — Enabled.
- 0 — Disabled.
Default value: `0`.
Default value: `1`.
## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing}

View File

@ -216,6 +216,17 @@ This is more optimal than using the normal IN. However, keep the following point
It also makes sense to specify a local table in the `GLOBAL IN` clause, in case this local table is only available on the requestor server and you want to use data from it on remote servers.
### Distributed Subqueries and max_rows_in_set
You can use [`max_rows_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) and [`max_bytes_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) to control how much data is tranferred during distributed queries.
This is specially important if the `global in` query returns a large amount of data. Consider the following sql -
```sql
select * from table1 where col1 global in (select col1 from table2 where <some_predicate>)
```
If `some_predicate` is not selective enough, it will return large amount of data and cause performance issues. In such cases, it is wise to limit the data transfer over the network. Also, note that [`set_overflow_mode`](../../operations/settings/query-complexity.md#set_overflow_mode) is set to `throw` (by default) meaning that an exception is raised when these thresholds are met.
### Distributed Subqueries and max_parallel_replicas {#max_parallel_replica-subqueries}
When max_parallel_replicas is greater than 1, distributed queries are further transformed. For example, the following:

View File

@ -197,12 +197,13 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
## MATERIALIZE COLUMN {#materialize-column}
Materializes the column in the parts where the column is missing. This is useful in case of creating a new column with complicated `DEFAULT` or `MATERIALIZED` expression. Calculation of the column directly on `SELECT` query can cause bigger request execution time, so it is reasonable to use `MATERIALIZE COLUMN` for such columns. To perform same manipulation for existing column, use `FINAL` modifier.
Materializes or updates a column with an expression for a default value (`DEFAULT` or `MATERIALIZED`).
It is used if it is necessary to add or update a column with a complicated expression, because evaluating such an expression directly on `SELECT` executing turns out to be expensive.
Syntax:
```sql
ALTER TABLE table MATERIALIZE COLUMN col [FINAL];
ALTER TABLE table MATERIALIZE COLUMN col;
```
**Example**
@ -211,20 +212,34 @@ ALTER TABLE table MATERIALIZE COLUMN col [FINAL];
DROP TABLE IF EXISTS tmp;
SET mutations_sync = 2;
CREATE TABLE tmp (x Int64) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY tuple();
INSERT INTO tmp SELECT * FROM system.numbers LIMIT 10;
INSERT INTO tmp SELECT * FROM system.numbers LIMIT 5;
ALTER TABLE tmp ADD COLUMN s String MATERIALIZED toString(x);
ALTER TABLE tmp MATERIALIZE COLUMN s;
SELECT groupArray(x), groupArray(s) FROM (select x,s from tmp order by x);
┌─groupArray(x)─┬─groupArray(s)─────────┐
│ [0,1,2,3,4] │ ['0','1','2','3','4'] │
└───────────────┴───────────────────────┘
ALTER TABLE tmp MODIFY COLUMN s String MATERIALIZED toString(round(100/x));
INSERT INTO tmp SELECT * FROM system.numbers LIMIT 5,5;
SELECT groupArray(x), groupArray(s) FROM tmp;
```
**Result:**
┌─groupArray(x)─────────┬─groupArray(s)──────────────────────────────────┐
│ [0,1,2,3,4,5,6,7,8,9] │ ['0','1','2','3','4','20','17','14','12','11'] │
└───────────────────────┴────────────────────────────────────────────────┘
```sql
┌─groupArray(x)─────────┬─groupArray(s)─────────────────────────────┐
│ [0,1,2,3,4,5,6,7,8,9] │ ['0','1','2','3','4','5','6','7','8','9'] │
└───────────────────────┴───────────────────────────────────────────┘
ALTER TABLE tmp MATERIALIZE COLUMN s;
SELECT groupArray(x), groupArray(s) FROM tmp;
┌─groupArray(x)─────────┬─groupArray(s)─────────────────────────────────────────┐
│ [0,1,2,3,4,5,6,7,8,9] │ ['inf','100','50','33','25','20','17','14','12','11'] │
└───────────────────────┴───────────────────────────────────────────────────────┘
```
**See Also**

View File

@ -5,6 +5,6 @@ toc_title: Roadmap
# Roadmap {#roadmap}
The roadmap for the year 2021 is published for open discussion [here](https://github.com/ClickHouse/ClickHouse/issues/17623).
The roadmap for the year 2022 is published for open discussion [here](https://github.com/ClickHouse/ClickHouse/issues/32513).
{## [Original article](https://clickhouse.com/docs/en/roadmap/) ##}

View File

@ -872,3 +872,13 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
```
Если диск сконфигурирован как `cold`, данные будут переноситься в S3 при срабатывании правил TTL или когда свободное место на локальном диске станет меньше порогового значения, которое определяется как `move_factor * disk_size`.
## Виртуальные столбцы {#virtual-columns}
- `_part` — Имя куска.
- `_part_index` — Номер куска по порядку в результате запроса.
- `_partition_id` — Имя партиции.
- `_part_uuid` — Уникальный идентификатор куска (если включена MergeTree настройка `assign_part_uuids`).
- `_partition_value` — Значения (кортеж) выражения `partition by`.
- `_sample_factor` — Коэффициент сэмплирования (из запроса).

View File

@ -2119,7 +2119,7 @@ ClickHouse генерирует исключение:
- 1 — включен режим параллельного разбора.
- 0 — отключен режим параллельного разбора.
Значение по умолчанию: `0`.
Значение по умолчанию: `1`.
## output_format_parallel_formatting {#output-format-parallel-formatting}
@ -2130,7 +2130,7 @@ ClickHouse генерирует исключение:
- 1 — включен режим параллельного форматирования.
- 0 — отключен режим параллельного форматирования.
Значение по умолчанию: `0`.
Значение по умолчанию: `1`.
## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing}

View File

@ -197,12 +197,13 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
## MATERIALIZE COLUMN {#materialize-column}
Материализует столбец таблицы в кусках, в которых отсутствуют значения. Используется, если необходимо создать новый столбец со сложным материализованным выражением или выражением для заполнения по умолчанию (`DEFAULT`), потому как вычисление такого столбца прямо во время выполнения запроса `SELECT` оказывается ощутимо затратным. Чтобы совершить ту же операцию для существующего столбца, используйте модификатор `FINAL`.
Материализует или обновляет столбец таблицы с выражением для значения по умолчанию (`DEFAULT` или `MATERIALIZED`).
Используется, если необходимо добавить или обновить столбец со сложным выражением, потому как вычисление такого выражения прямо во время выполнения запроса `SELECT` оказывается ощутимо затратным.
Синтаксис:
```sql
ALTER TABLE table MATERIALIZE COLUMN col [FINAL];
ALTER TABLE table MATERIALIZE COLUMN col;
```
**Пример**
@ -211,21 +212,39 @@ ALTER TABLE table MATERIALIZE COLUMN col [FINAL];
DROP TABLE IF EXISTS tmp;
SET mutations_sync = 2;
CREATE TABLE tmp (x Int64) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY tuple();
INSERT INTO tmp SELECT * FROM system.numbers LIMIT 10;
INSERT INTO tmp SELECT * FROM system.numbers LIMIT 5;
ALTER TABLE tmp ADD COLUMN s String MATERIALIZED toString(x);
ALTER TABLE tmp MATERIALIZE COLUMN s;
SELECT groupArray(x), groupArray(s) FROM (select x,s from tmp order by x);
┌─groupArray(x)─┬─groupArray(s)─────────┐
│ [0,1,2,3,4] │ ['0','1','2','3','4'] │
└───────────────┴───────────────────────┘
ALTER TABLE tmp MODIFY COLUMN s String MATERIALIZED toString(round(100/x));
INSERT INTO tmp SELECT * FROM system.numbers LIMIT 5,5;
SELECT groupArray(x), groupArray(s) FROM tmp;
┌─groupArray(x)─────────┬─groupArray(s)──────────────────────────────────┐
│ [0,1,2,3,4,5,6,7,8,9] │ ['0','1','2','3','4','20','17','14','12','11'] │
└───────────────────────┴────────────────────────────────────────────────┘
ALTER TABLE tmp MATERIALIZE COLUMN s;
SELECT groupArray(x), groupArray(s) FROM tmp;
┌─groupArray(x)─────────┬─groupArray(s)─────────────────────────────────────────┐
│ [0,1,2,3,4,5,6,7,8,9] │ ['inf','100','50','33','25','20','17','14','12','11'] │
└───────────────────────┴───────────────────────────────────────────────────────┘
```
**Результат:**
**Смотрите также**
```sql
┌─groupArray(x)─────────┬─groupArray(s)─────────────────────────────┐
│ [0,1,2,3,4,5,6,7,8,9] │ ['0','1','2','3','4','5','6','7','8','9'] │
└───────────────────────┴───────────────────────────────────────────┘
```
- [MATERIALIZED](../../statements/create/table.md#materialized).
## Ограничения запроса ALTER {#ogranicheniia-zaprosa-alter}

View File

@ -14,7 +14,7 @@ toc_title: Introduction
- [MySQL](../../engines/database-engines/mysql.md)
- [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md)
- [MaterializeMySQL](../../engines/database-engines/materialized-mysql.md)
- [Lazy](../../engines/database-engines/lazy.md)

View File

@ -1 +0,0 @@
../../../en/engines/database-engines/materialized-mysql.md

View File

@ -0,0 +1,274 @@
---
toc_priority: 29
toc_title: MaterializedMySQL
---
# [experimental] MaterializedMySQL {#materialized-mysql}
!!! warning "警告"
这是一个实验性的特性,不应该在生产中使用.
创建ClickHouse数据库包含MySQL中所有的表以及这些表中的所有数据。
ClickHouse服务器作为MySQL副本工作。它读取binlog并执行DDL和DML查询。
## 创建数据库 {#creating-a-database}
``` sql
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster]
ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
[TABLE OVERRIDE table1 (...), TABLE OVERRIDE table2 (...)]
```
**引擎参数**
- `host:port` — MySQL 服务地址.
- `database` — MySQL 数据库名称.
- `user` — MySQL 用户名.
- `password` — MySQL 用户密码.
**引擎配置**
- `max_rows_in_buffer` — 允许在内存中缓存数据的最大行数(对于单个表和无法查询的缓存数据)。当超过这个数字时,数据将被物化。默认值:`65 505`。
- `max_bytes_in_buffer` - 允许在内存中缓存数据的最大字节数(对于单个表和无法查询的缓存数据)。当超过这个数字时,数据将被物化。默认值: `1 048 576 `
- `max_rows_in_buffers` - 允许在内存中缓存数据的最大行数(用于数据库和无法查询的缓存数据)。当超过这个数字时,数据将被物化。默认值: `65 505`
- `max_bytes_in_buffers` - 允许在内存中缓存数据的最大字节数(用于数据库和无法查询的缓存数据)。当超过这个数字时,数据将被物化。默认值: `1 048 576`
- `max_flush_data_time ` - 允许数据在内存中缓存的最大毫秒数(对于数据库和无法查询的缓存数据)。当超过这个时间,数据将被物化。默认值: `1000`
- `max_wait_time_when_mysql_unavailable` - MySQL不可用时的重试间隔(毫秒)。负值禁用重试。默认值:`1000`。
`allows_query_when_mysql_lost `—允许在MySQL丢失时查询物化表。默认值:`0`(`false`)。
```sql
CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***')
SETTINGS
allows_query_when_mysql_lost=true,
max_wait_time_when_mysql_unavailable=10000;
```
**MySQL服务器端配置**
为了`MaterializedMySQL`的正确工作,有一些必须设置的`MySQL`端配置设置:
- `default_authentication_plugin = mysql_native_password `,因为 `MaterializedMySQL` 只能授权使用该方法。
- `gtid_mode = on`因为基于GTID的日志记录是提供正确的 `MaterializedMySQL`复制的强制要求。
!!! attention "注意"
当打开`gtid_mode`时,您还应该指定`enforce_gtid_consistency = on`。
## 虚拟列 {#virtual-columns}
当使用`MaterializeMySQL`数据库引擎时,[ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md)表与虚拟的`_sign`和`_version`列一起使用。
- `_version` — 事务版本. 类型 [UInt64](../../sql-reference/data-types/int-uint.md).
- `_sign` — 删除标记. 类型 [Int8](../../sql-reference/data-types/int-uint.md). 可能的值:
- `1` — 行没有删除,
- `-1` — 行已被删除.
## 支持的数据类型 {#data_types-support}
| MySQL | ClickHouse |
|-------------------------|--------------------------------------------------------------|
| TINY | [Int8](../../sql-reference/data-types/int-uint.md) |
| SHORT | [Int16](../../sql-reference/data-types/int-uint.md) |
| INT24 | [Int32](../../sql-reference/data-types/int-uint.md) |
| LONG | [UInt32](../../sql-reference/data-types/int-uint.md) |
| LONGLONG | [UInt64](../../sql-reference/data-types/int-uint.md) |
| FLOAT | [Float32](../../sql-reference/data-types/float.md) |
| DOUBLE | [Float64](../../sql-reference/data-types/float.md) |
| DECIMAL, NEWDECIMAL | [Decimal](../../sql-reference/data-types/decimal.md) |
| DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) |
| DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) |
| DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) |
| YEAR | [UInt16](../../sql-reference/data-types/int-uint.md) |
| TIME | [Int64](../../sql-reference/data-types/int-uint.md) |
| ENUM | [Enum](../../sql-reference/data-types/enum.md) |
| STRING | [String](../../sql-reference/data-types/string.md) |
| VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) |
| BLOB | [String](../../sql-reference/data-types/string.md) |
| GEOMETRY | [String](../../sql-reference/data-types/string.md) |
| BINARY | [FixedString](../../sql-reference/data-types/fixedstring.md) |
| BIT | [UInt64](../../sql-reference/data-types/int-uint.md) |
| SET | [UInt64](../../sql-reference/data-types/int-uint.md) |
[Nullable](../../sql-reference/data-types/nullable.md) 已经被支持.
MySQL中的Time 类型会被ClickHouse转换成微秒来存储
不支持其他类型。如果MySQL表包含此类类型的列ClickHouse抛出异常"Unhandled data type"并停止复制。
## 规范和推荐用法 {#specifics-and-recommendations}
### 兼容性限制 {#compatibility-restrictions}
除了数据类型的限制之外,还有一些限制与`MySQL`数据库相比有所不同,这应该在复制之前解决:
- `MySQL` 中的每个表都应该包含 `PRIMARY KEY`
- 对于表的复制,那些包含 `ENUM` 字段值超出范围的行(在 `ENUM` 签名中指定)将不起作用。
### DDL Queries {#ddl-queries}
MySQL DDL 语句会被转换成对应的ClickHouse DDL 语句,比如: ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). 如果ClickHouse 无法解析某些语句DDL 操作,则会跳过。
### 数据复制 {#data-replication}
MaterializedMySQL不支持直接的 `INSERT` `DELETE``UPDATE` 查询。然而,它们在数据复制方面得到了支持:
- MySQL `INSERT`查询被转换为`_sign=1`的INSERT查询。
- MySQL `DELETE`查询被转换为`INSERT`,并且`_sign=-1`。
- 如果主键被修改了MySQL的 `UPDATE` 查询将被转换为 `INSERT``_sign=1` 和INSERT 带有_sign=-1;如果主键没有被修改,则转换为`INSERT`和`_sign=1`。
### MaterializedMySQL 数据表查询 {#select}
`SELECT` 查询从 `MaterializedMySQL`表有一些细节:
- 如果在SELECT查询中没有指定`_version`,则 [FINAL](../../sql-reference/statements/select/from.md#select-from- FINAL)修饰符被使用,所以只有带有 `MAX(_version)`的行会返回每个主键值。
- 如果在SELECT查询中没有指定 `_sign`,则默认使用 `WHERE _sign=1 `。所以被删除的行不是
包含在结果集中。
- 结果包括列注释以防MySQL数据库表中存在这些列注释。
### 索引转换 {#index-conversion}
在ClickHouse表中MySQL的 `PRIMARY KEY``INDEX` 子句被转换为 `ORDER BY` 元组。
ClickHouse只有一个物理排序`order by` 条件决定。要创建一个新的物理排序,请使用[materialized views](../../sql-reference/statements/create/view.md#materialized)。
**注意**
- `_sign=-1` 的行不会被物理地从表中删除。
- 级联 `UPDATE/DELETE` 查询不支持 `MaterializedMySQL` 引擎,因为他们在 MySQL binlog中不可见的
— 复制很容易被破坏。
— 禁止对数据库和表进行手工操作。
- `MaterializedMySQL` 受[optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert)设置的影响。当MySQL服务器中的一个表发生变化时数据会合并到 `MaterializedMySQL` 数据库中相应的表中。
### 表重写 {#table-overrides}
表覆盖可用于自定义ClickHouse DDL查询从而允许您对应用程序进行模式优化。这对于控制分区特别有用分区对MaterializedMySQL的整体性能非常重要。
这些是你可以对MaterializedMySQL表重写的模式转换操作:
* 修改列类型。必须与原始类型兼容,否则复制将失败。例如,可以将`UInt32`列修改为`UInt64`,不能将 `String` 列修改为 `Array(String)`
* 修改 [column TTL](../table-engines/mergetree-family/mergetree/#mergetree-column-ttl).
* 修改 [column compression codec](../../sql-reference/statements/create/table/#codecs).
* 增加 [ALIAS columns](../../sql-reference/statements/create/table/#alias).
* 增加 [skipping indexes](../table-engines/mergetree-family/mergetree/#table_engine-mergetree-data_skipping-indexes)
* 增加 [projections](../table-engines/mergetree-family/mergetree/#projections).
请注意,当使用 `SELECT ... FINAL ` (MaterializedMySQL默认是这样做的) 时,预测优化是被禁用的,所以这里是受限的, `INDEX ... TYPE hypothesis `[在v21.12的博客文章中描述]](https://clickhouse.com/blog/en/2021/clickhouse-v21.12-released/)可能在这种情况下更有用。
* 修改 [PARTITION BY](../table-engines/mergetree-family/custom-partitioning-key/)
* 修改 [ORDER BY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
* 修改 [PRIMARY KEY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
* 增加 [SAMPLE BY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
* 增加 [table TTL](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
```sql
CREATE DATABASE db_name ENGINE = MaterializedMySQL(...)
[SETTINGS ...]
[TABLE OVERRIDE table_name (
[COLUMNS (
[col_name [datatype] [ALIAS expr] [CODEC(...)] [TTL expr], ...]
[INDEX index_name expr TYPE indextype[(...)] GRANULARITY val, ...]
[PROJECTION projection_name (SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]), ...]
)]
[ORDER BY expr]
[PRIMARY KEY expr]
[PARTITION BY expr]
[SAMPLE BY expr]
[TTL expr]
), ...]
```
示例:
```sql
CREATE DATABASE db_name ENGINE = MaterializedMySQL(...)
TABLE OVERRIDE table1 (
COLUMNS (
userid UUID,
category LowCardinality(String),
timestamp DateTime CODEC(Delta, Default)
)
PARTITION BY toYear(timestamp)
),
TABLE OVERRIDE table2 (
COLUMNS (
client_ip String TTL created + INTERVAL 72 HOUR
)
SAMPLE BY ip_hash
)
```
`COLUMNS`列表是稀疏的;根据指定修改现有列添加额外的ALIAS列。不可能添加普通列或实体化列。具有不同类型的已修改列必须可从原始类型赋值。在执行`CREATE DATABASE` 查询时,目前还没有验证这个或类似的问题,因此需要格外小心。
您可以为还不存在的表指定重写。
!!! warning "警告"
如果使用时不小心,很容易用表重写中断复制。例如:
* 如果一个ALIAS列被添加了一个表覆盖并且一个具有相同名称的列后来被添加到源MySQL表在ClickHouse中转换后的ALTER table查询将失败并停止复制。
* 目前可以添加引用可空列的覆盖,而非空列是必需的,例如 `ORDER BY``PARTITION BY`。这将导致CREATE TABLE查询失败也会导致复制停止。
## 使用示例 {#examples-of-use}
MySQL 查询语句:
``` sql
mysql> CREATE DATABASE db;
mysql> CREATE TABLE db.test (a INT PRIMARY KEY, b INT);
mysql> INSERT INTO db.test VALUES (1, 11), (2, 22);
mysql> DELETE FROM db.test WHERE a=1;
mysql> ALTER TABLE db.test ADD COLUMN c VARCHAR(16);
mysql> UPDATE db.test SET c='Wow!', b=222;
mysql> SELECT * FROM test;
```
```text
┌─a─┬───b─┬─c────┐
│ 2 │ 222 │ Wow! │
└───┴─────┴──────┘
```
ClickHouse中的数据库与MySQL服务器交换数据:
创建的数据库和表:
``` sql
CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***');
SHOW TABLES FROM mysql;
```
``` text
┌─name─┐
│ test │
└──────┘
```
数据插入之后:
``` sql
SELECT * FROM mysql.test;
```
``` text
┌─a─┬──b─┐
│ 1 │ 11 │
│ 2 │ 22 │
└───┴────┘
```
删除数据后,添加列并更新:
``` sql
SELECT * FROM mysql.test;
```
``` text
┌─a─┬───b─┬─c────┐
│ 2 │ 222 │ Wow! │
└───┴─────┴──────┘
```
[来源文章](https://clickhouse.com/docs/en/engines/database-engines/materialized-mysql/) <!--hide-->

View File

@ -24,6 +24,7 @@ ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `use_table_cac
- `database` — 远程数据库名次
- `user` — PostgreSQL用户名称
- `password` — PostgreSQL用户密码
- `schema` - PostgreSQL 模式
- `use_table_cache` — 定义数据库表结构是否已缓存或不进行。可选的。默认值: `0`.
## 支持的数据类型 {#data_types-support}

View File

@ -31,6 +31,7 @@ CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_na
当创建数据库的新副本时,该副本会自己创建表。如果副本已经不可用很长一段时间,并且已经滞后于复制日志-它用ZooKeeper中的当前元数据检查它的本地元数据将带有数据的额外表移动到一个单独的非复制数据库(以免意外地删除任何多余的东西),创建缺失的表,如果表名已经被重命名,则更新表名。数据在`ReplicatedMergeTree`级别被复制,也就是说,如果表没有被复制,数据将不会被复制(数据库只负责元数据)。
允许[`ALTER TABLE ATTACH|FETCH|DROP|DROP DETACHED|DETACH PARTITION|PART`](../../sql-reference/statements/alter/partition.md)查询,但不允许复制。数据库引擎将只向当前副本添加/获取/删除分区/部件。但是如果表本身使用了Replicated表引擎那么数据将在使用`ATTACH`后被复制。
## 使用示例 {#usage-example}
创建三台主机的集群:

View File

@ -1,67 +1,62 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# 系统。query_thread_log {#system_tables-query_thread_log}
包含有关执行查询的线程的信息,例如,线程名称、线程开始时间、查询处理的持续时间。
始记录:
开启日志功能:
1. 在配置参数 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log)
2. 设置 [log_query_threads](../../operations/settings/settings.md#settings-log-query-threads) 1。
1. 在配置参数 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 部分
2. 设置 [log_query_threads](../../operations/settings/settings.md#settings-log-query-threads) 1。
数据的冲洗周期设置在 `flush_interval_milliseconds` 的参数 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 服务器设置部分。 要强制冲洗,请使用 [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) 查询。
数据从缓存写入数据表周期时间参数 `flush_interval_milliseconds` 位于 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 服务器设置部分。如果需要强制从缓存写入数据表,请使用 [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) 查询请求
ClickHouse不会自动从表中删除数据。 看 [导言](../../operations/system-tables/index.md#system-tables-introduction) 欲了解更多详情
ClickHouse不会自动从表中删除数据。 欲了解更多详情,请参照 [介绍](../../operations/system-tables/index.md#system-tables-introduction)。
列:
- `event_date` ([日期](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query.
- `event_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query.
- `query_start_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — Start time of query execution.
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution.
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows.
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes.
- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` 查询,写入的行数。 对于其他查询,列值为0。
- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` 查询时,写入的字节数。 对于其他查询,列值为0。
- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread.
- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread.
- `thread_name` ([字符串](../../sql-reference/data-types/string.md)) — Name of the thread.
- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Internal thread ID.
- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — thread ID.
- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread.
- `query` ([字符串](../../sql-reference/data-types/string.md)) — Query string.
- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values:
- 1 — Query was initiated by the client.
- 0 — Query was initiated by another query for distributed query execution.
- `user` ([字符串](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query.
- `query_id` ([字符串](../../sql-reference/data-types/string.md)) — ID of the query.
- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query.
- `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the query.
- `initial_user` ([字符串](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution).
- `initial_query_id` ([字符串](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution).
- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from.
- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the parent query.
- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Interface that the query was initiated from. Possible values:
- `event_date` ([日期](../../sql-reference/data-types/date.md)) — 该查询线程执行完成的日期。
- `event_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — 该查询线程执行完成的时间。
- `query_start_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — 查询的开始时间。
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 查询执行持续的时间。
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 读取的行数。
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 读取的字节数。
- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 对于 `INSERT` 查询,写入的行数。 对于其他查询为0。
- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 对于 `INSERT` 查询,写入的字节数。 对于其他查询,为0。
- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — 在线程上下文,分配的内存和空闲内存之差。
- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — 在线程上下文,分配的内存和空闲内存之差的最大值。
- `thread_name` ([字符串](../../sql-reference/data-types/string.md)) — 线程名。
- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — 内部线程ID。
- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — 线程ID。
- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS初始线程的初始ID。
- `query` ([字符串](../../sql-reference/data-types/string.md)) — 查询语句。
- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 查询类型,可能的值:
- 1 — 由用户发起的查询。
- 0 — 由其他查询发起的分布式查询。
- `user` ([字符串](../../sql-reference/data-types/string.md)) — 发起查询的用户名。
- `query_id` ([字符串](../../sql-reference/data-types/string.md)) — 查询的ID。
- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — 发起查询的IP地址。
- `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的端口。
- `initial_user` ([字符串](../../sql-reference/data-types/string.md)) — 首次发起查询的用户名(对于分布式查询)。
- `initial_query_id` ([字符串](../../sql-reference/data-types/string.md)) — 首次发起查询的ID对于分布式查询
- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — 发起该查询的父查询IP地址。
- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起该查询的父查询端口。
- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的界面,可能的值:
- 1 — TCP.
- 2 — HTTP.
- `os_user` ([字符串](../../sql-reference/data-types/string.md)) — OS's username who runs [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md).
- `client_hostname` ([字符串](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或者运行另一个TCP客户端
- `client_name` ([字符串](../../sql-reference/data-types/string.md)) — The [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端名称。
- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。
- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。
- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。
- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端版本。
- `http_method` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — HTTP method that initiated the query. Possible values:
- 0 — The query was launched from the TCP interface.
- `os_user` ([字符串](../../sql-reference/data-types/string.md)) — 使用 [clickhouse-client](../../interfaces/cli.md) 的系统用户名。
- `client_hostname` ([字符串](../../sql-reference/data-types/string.md)) — 运行 [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的主机名
- `client_name` ([字符串](../../sql-reference/data-types/string.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端名称。
- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的修订号
- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的主版本号
- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的次版本号
- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的补丁版本
- `http_method` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的HTTP方法可能的值
- 0 — 查询通过TCP界面发起。
- 1 — `GET` 方法被使用。
- 2 — `POST` 方法被使用。
- `http_user_agent` ([字符串](../../sql-reference/data-types/string.md)) — The `UserAgent` http请求中传递的标头。
- `quota_key` ([字符串](../../sql-reference/data-types/string.md)) — The “quota key” 在指定 [配额](../../operations/quotas.md) 设置(见 `keyed`).
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision.
- `ProfileEvents` ([数组(字符串, UInt64)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics for this thread. The description of them could be found in the table [系统。活动](#system_tables-events).
- `http_user_agent` ([字符串](../../sql-reference/data-types/string.md)) — `UserAgent` HTTP请求中传递的UA表头。
- `quota_key` ([字符串](../../sql-reference/data-types/string.md)) — “quota key” 在 [配额](../../operations/quotas.md) 设置`keyed`).
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse 修订版本号.
- `ProfileEvents` ([数组(字符串, UInt64)](../../sql-reference/data-types/array.md)) — 对于该线程的多个指标计数器。这一项可以参考 [system.events](#system_tables-events).
**示例**
@ -113,4 +108,5 @@ ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'Compr
**另请参阅**
- [系统。query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` 系统表,其中包含有关查询执行的公共信息。
- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — `query_log` 系统表描述,其中包含有关查询执行的公共信息。
- [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log) — 这个表包含在查询线程中使用的各个视图的信息。

View File

@ -5,6 +5,6 @@ toc_title: Roadmap
# Roadmap {#roadmap}
`2021年Roadmap`已公布供公开讨论查看[这里](https://github.com/ClickHouse/ClickHouse/issues/17623).
`2022年Roadmap`已公布供公开讨论查看 [这里](https://github.com/ClickHouse/ClickHouse/issues/32513).
{## [源文章](https://clickhouse.com/docs/en/roadmap/) ##}

View File

@ -217,13 +217,12 @@
<!-- The following file is used only if ssl_require_client_auth=1 -->
<ssl_ca_cert_file>/path/to/ssl_ca_cert_file</ssl_ca_cert_file>
<!-- Default compression algorithm (applied if client doesn't specify another algorithm, see result_compression in QueryInfo).
<!-- Default transport compression type (can be overridden by client, see the transport_compression_type field in QueryInfo).
Supported algorithms: none, deflate, gzip, stream_gzip -->
<compression>deflate</compression>
<transport_compression_type>none</transport_compression_type>
<!-- Default compression level (applied if client doesn't specify another level, see result_compression in QueryInfo).
Supported levels: none, low, medium, high -->
<compression_level>medium</compression_level>
<!-- Default transport compression level. Supported levels: 0..3 -->
<transport_compression_level>0</transport_compression_level>
<!-- Send/receive message size limits in bytes. -1 means unlimited -->
<max_send_message_size>-1</max_send_message_size>

View File

@ -86,7 +86,7 @@ enum class AccessType
M(CREATE_DICTIONARY, "", DICTIONARY, CREATE) /* allows to execute {CREATE|ATTACH} DICTIONARY */\
M(CREATE_TEMPORARY_TABLE, "", GLOBAL, CREATE) /* allows to create and manipulate temporary tables;
implicitly enabled by the grant CREATE_TABLE on any table */ \
M(CREATE_FUNCTION, "", DATABASE, CREATE) /* allows to execute CREATE FUNCTION */ \
M(CREATE_FUNCTION, "", GLOBAL, CREATE) /* allows to execute CREATE FUNCTION */ \
M(CREATE, "", GROUP, ALL) /* allows to execute {CREATE|ATTACH} */ \
\
M(DROP_DATABASE, "", DATABASE, DROP) /* allows to execute {DROP|DETACH} DATABASE */\
@ -94,7 +94,7 @@ enum class AccessType
M(DROP_VIEW, "", VIEW, DROP) /* allows to execute {DROP|DETACH} TABLE for views;
implicitly enabled by the grant DROP_TABLE */\
M(DROP_DICTIONARY, "", DICTIONARY, DROP) /* allows to execute {DROP|DETACH} DICTIONARY */\
M(DROP_FUNCTION, "", DATABASE, DROP) /* allows to execute DROP FUNCTION */\
M(DROP_FUNCTION, "", GLOBAL, DROP) /* allows to execute DROP FUNCTION */\
M(DROP, "", GROUP, ALL) /* allows to execute {DROP|DETACH} */\
\
M(TRUNCATE, "TRUNCATE TABLE", TABLE, ALL) \
@ -113,9 +113,9 @@ enum class AccessType
M(ALTER_ROLE, "", GLOBAL, ACCESS_MANAGEMENT) \
M(DROP_ROLE, "", GLOBAL, ACCESS_MANAGEMENT) \
M(ROLE_ADMIN, "", GLOBAL, ACCESS_MANAGEMENT) /* allows to grant and revoke the roles which are not granted to the current user with admin option */\
M(CREATE_ROW_POLICY, "CREATE POLICY", GLOBAL, ACCESS_MANAGEMENT) \
M(ALTER_ROW_POLICY, "ALTER POLICY", GLOBAL, ACCESS_MANAGEMENT) \
M(DROP_ROW_POLICY, "DROP POLICY", GLOBAL, ACCESS_MANAGEMENT) \
M(CREATE_ROW_POLICY, "CREATE POLICY", TABLE, ACCESS_MANAGEMENT) \
M(ALTER_ROW_POLICY, "ALTER POLICY", TABLE, ACCESS_MANAGEMENT) \
M(DROP_ROW_POLICY, "DROP POLICY", TABLE, ACCESS_MANAGEMENT) \
M(CREATE_QUOTA, "", GLOBAL, ACCESS_MANAGEMENT) \
M(ALTER_QUOTA, "", GLOBAL, ACCESS_MANAGEMENT) \
M(DROP_QUOTA, "", GLOBAL, ACCESS_MANAGEMENT) \
@ -124,7 +124,7 @@ enum class AccessType
M(DROP_SETTINGS_PROFILE, "DROP PROFILE", GLOBAL, ACCESS_MANAGEMENT) \
M(SHOW_USERS, "SHOW CREATE USER", GLOBAL, SHOW_ACCESS) \
M(SHOW_ROLES, "SHOW CREATE ROLE", GLOBAL, SHOW_ACCESS) \
M(SHOW_ROW_POLICIES, "SHOW POLICIES, SHOW CREATE ROW POLICY, SHOW CREATE POLICY", GLOBAL, SHOW_ACCESS) \
M(SHOW_ROW_POLICIES, "SHOW POLICIES, SHOW CREATE ROW POLICY, SHOW CREATE POLICY", TABLE, SHOW_ACCESS) \
M(SHOW_QUOTAS, "SHOW CREATE QUOTA", GLOBAL, SHOW_ACCESS) \
M(SHOW_SETTINGS_PROFILES, "SHOW PROFILES, SHOW CREATE SETTINGS PROFILE, SHOW CREATE PROFILE", GLOBAL, SHOW_ACCESS) \
M(SHOW_ACCESS, "", GROUP, ACCESS_MANAGEMENT) \

View File

@ -425,6 +425,7 @@ bool ContextAccess::checkAccessImplHelper(const AccessFlags & flags, const Args
| AccessType::TRUNCATE;
const AccessFlags dictionary_ddl = AccessType::CREATE_DICTIONARY | AccessType::DROP_DICTIONARY;
const AccessFlags function_ddl = AccessType::CREATE_FUNCTION | AccessType::DROP_FUNCTION;
const AccessFlags table_and_dictionary_ddl = table_ddl | dictionary_ddl;
const AccessFlags write_table_access = AccessType::INSERT | AccessType::OPTIMIZE;
const AccessFlags write_dcl_access = AccessType::ACCESS_MANAGEMENT - AccessType::SHOW_ACCESS;
@ -432,7 +433,7 @@ bool ContextAccess::checkAccessImplHelper(const AccessFlags & flags, const Args
const AccessFlags not_readonly_flags = write_table_access | table_and_dictionary_ddl | write_dcl_access | AccessType::SYSTEM | AccessType::KILL_QUERY;
const AccessFlags not_readonly_1_flags = AccessType::CREATE_TEMPORARY_TABLE;
const AccessFlags ddl_flags = table_ddl | dictionary_ddl;
const AccessFlags ddl_flags = table_ddl | dictionary_ddl | function_ddl;
const AccessFlags introspection_flags = AccessType::INTROSPECTION;
};
static const PrecalculatedFlags precalc;

View File

@ -45,7 +45,15 @@ TEST(AccessRights, Union)
lhs.grant(AccessType::INSERT);
rhs.grant(AccessType::ALL, "db1");
lhs.makeUnion(rhs);
ASSERT_EQ(lhs.toString(), "GRANT INSERT ON *.*, GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, CREATE FUNCTION, DROP, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*");
ASSERT_EQ(lhs.toString(),
"GRANT INSERT ON *.*, "
"GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, "
"CREATE DICTIONARY, DROP DATABASE, DROP TABLE, DROP VIEW, DROP DICTIONARY, "
"TRUNCATE, OPTIMIZE, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, "
"SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, "
"SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, "
"SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, "
"SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*");
}

View File

@ -239,6 +239,7 @@ private:
UInt64 genRandom(size_t lim)
{
assert(lim > 0);
/// With a large number of values, we will generate random numbers several times slower.
if (lim <= static_cast<UInt64>(rng.max()))
return static_cast<UInt32>(rng()) % static_cast<UInt32>(lim);

View File

@ -1317,7 +1317,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (insert && insert->select)
insert->tryFindInputFunction(input_function);
bool is_async_insert = global_context->getSettings().async_insert && insert && insert->hasInlinedData();
bool is_async_insert = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData();
/// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately.
if (insert && (!insert->select || input_function) && !insert->watch && !is_async_insert)

View File

@ -50,12 +50,12 @@ ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr &&
if (!offsets_concrete)
throw Exception("offsets_column must be a ColumnUInt64", ErrorCodes::LOGICAL_ERROR);
if (!offsets_concrete->empty() && nested_column)
if (!offsets_concrete->empty() && data)
{
Offset last_offset = offsets_concrete->getData().back();
/// This will also prevent possible overflow in offset.
if (nested_column->size() != last_offset)
if (data->size() != last_offset)
throw Exception("offsets_column has data inconsistent with nested_column", ErrorCodes::LOGICAL_ERROR);
}

20
src/Common/ArenaUtils.h Normal file
View File

@ -0,0 +1,20 @@
#pragma once
#include <string.h>
#include <string>
#include <base/StringRef.h>
/** Copy string value into Arena.
* Arena should support method:
* char * alloc(size_t size).
*/
template <typename Arena>
inline StringRef copyStringInArena(Arena & arena, StringRef value)
{
size_t key_size = value.size;
char * place_for_key = arena.alloc(key_size);
memcpy(reinterpret_cast<void *>(place_for_key), reinterpret_cast<const void *>(value.data), key_size);
StringRef result{place_for_key, key_size};
return result;
}

View File

@ -387,47 +387,52 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
}
template <typename Data>
ALWAYS_INLINE FindResult findFromRow(Data & data, size_t row_, Arena & pool)
ALWAYS_INLINE FindResult findKey(Data & data, size_t row_, Arena & pool)
{
size_t row = getIndexAt(row_);
if (is_nullable && row == 0)
{
if constexpr (has_mapped)
return FindResult(data.hasNullKeyData() ? &data.getNullKeyData() : nullptr, data.hasNullKeyData());
return FindResult(data.hasNullKeyData() ? &data.getNullKeyData() : nullptr, data.hasNullKeyData(), 0);
else
return FindResult(data.hasNullKeyData());
return FindResult(data.hasNullKeyData(), 0);
}
if (visit_cache[row] != VisitValue::Empty)
{
if constexpr (has_mapped)
return FindResult(&mapped_cache[row], visit_cache[row] == VisitValue::Found);
return FindResult(&mapped_cache[row], visit_cache[row] == VisitValue::Found, 0);
else
return FindResult(visit_cache[row] == VisitValue::Found);
return FindResult(visit_cache[row] == VisitValue::Found, 0);
}
auto key_holder = getKeyHolder(row_, pool);
typename Data::iterator it;
typename Data::LookupResult it;
if (saved_hash)
it = data.find(*key_holder, saved_hash[row]);
it = data.find(keyHolderGetKey(key_holder), saved_hash[row]);
else
it = data.find(*key_holder);
it = data.find(keyHolderGetKey(key_holder));
bool found = it != data.end();
bool found = it;
visit_cache[row] = found ? VisitValue::Found : VisitValue::NotFound;
if constexpr (has_mapped)
{
if (found)
mapped_cache[row] = it->second;
mapped_cache[row] = it->getMapped();
}
size_t offset = 0;
if constexpr (FindResult::has_offset)
offset = found ? data.offsetInternal(it) : 0;
if constexpr (has_mapped)
return FindResult(&mapped_cache[row], found);
return FindResult(&mapped_cache[row], found, offset);
else
return FindResult(found);
return FindResult(found, offset);
}
template <typename Data>

View File

@ -2,6 +2,7 @@
#include <base/StringRef.h>
#include <Common/HashTable/HashMap.h>
#include <Common/ArenaWithFreeLists.h>
#include <Common/ArenaUtils.h>
#include <unordered_map>
#include <list>
#include <atomic>
@ -36,6 +37,8 @@ private:
/// Allows to avoid additional copies in updateValue function
size_t snapshot_up_to_size = 0;
ArenaWithFreeLists arena;
/// Collect invalid iterators to avoid traversing the whole list
std::vector<Mapped> snapshot_invalid_iters;
uint64_t approximate_data_size{0};
@ -113,17 +116,6 @@ private:
}
}
StringRef copyStringInArena(const std::string & value_to_copy)
{
size_t value_to_copy_size = value_to_copy.size();
char * place_for_key = arena.alloc(value_to_copy_size);
memcpy(reinterpret_cast<void *>(place_for_key), reinterpret_cast<const void *>(value_to_copy.data()), value_to_copy_size);
StringRef updated_value{place_for_key, value_to_copy_size};
return updated_value;
}
public:
using iterator = typename List::iterator;
@ -137,7 +129,7 @@ public:
if (!it)
{
ListElem elem{copyStringInArena(key), value, true};
ListElem elem{copyStringInArena(arena, key), value, true};
auto itr = list.insert(list.end(), elem);
bool inserted;
map.emplace(itr->key, it, inserted, hash_value);
@ -159,7 +151,7 @@ public:
if (it == map.end())
{
ListElem elem{copyStringInArena(key), value, true};
ListElem elem{copyStringInArena(arena, key), value, true};
auto itr = list.insert(list.end(), elem);
bool inserted;
map.emplace(itr->key, it, inserted, hash_value);
@ -175,6 +167,7 @@ public:
list_itr->active_in_map = false;
auto new_list_itr = list.insert(list.end(), elem);
it->getMapped() = new_list_itr;
snapshot_invalid_iters.push_back(list_itr);
}
else
{
@ -195,6 +188,7 @@ public:
if (snapshot_mode)
{
list_itr->active_in_map = false;
snapshot_invalid_iters.push_back(list_itr);
list_itr->free_key = true;
map.erase(it->getKey());
}
@ -235,6 +229,7 @@ public:
{
auto elem_copy = *(list_itr);
list_itr->active_in_map = false;
snapshot_invalid_iters.push_back(list_itr);
updater(elem_copy.value);
auto itr = list.insert(list.end(), elem_copy);
it->getMapped() = itr;
@ -274,23 +269,15 @@ public:
void clearOutdatedNodes()
{
auto start = list.begin();
auto end = list.end();
for (auto itr = start; itr != end;)
for (auto & itr: snapshot_invalid_iters)
{
if (!itr->active_in_map)
{
updateDataSize(CLEAR_OUTDATED_NODES, itr->key.size, itr->value.sizeInBytes(), 0);
if (itr->free_key)
arena.free(const_cast<char *>(itr->key.data), itr->key.size);
itr = list.erase(itr);
}
else
{
assert(!itr->free_key);
itr++;
}
assert(!itr->active_in_map);
updateDataSize(CLEAR_OUTDATED_NODES, itr->key.size, itr->value.sizeInBytes(), 0);
if (itr->free_key)
arena.free(const_cast<char *>(itr->key.data), itr->key.size);
list.erase(itr);
}
snapshot_invalid_iters.clear();
}
void clear()
@ -310,7 +297,6 @@ public:
void disableSnapshotMode()
{
snapshot_mode = false;
snapshot_up_to_size = 0;
}

View File

@ -429,6 +429,7 @@ class IColumn;
M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \
\
M(DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic, "Default database engine.", 0) \
M(DefaultTableEngine, default_table_engine, DefaultTableEngine::None, "Default table engine used when ENGINE is not set in CREATE statement.",0) \
M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \
M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
@ -482,7 +483,6 @@ class IColumn;
M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \
M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \
M(Bool, allow_experimental_projection_optimization, false, "Enable projection optimization when processing SELECT queries", 0) \
M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \
M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
M(Bool, insert_null_as_default, true, "Insert DEFAULT values instead of NULL in INSERT SELECT (UNION ALL)", 0) \
@ -582,6 +582,7 @@ class IColumn;
MAKE_OBSOLETE(M, UInt64, merge_tree_clear_old_parts_interval_seconds, 1) \
MAKE_OBSOLETE(M, UInt64, partial_merge_join_optimizations, 0) \
MAKE_OBSOLETE(M, MaxThreads, max_alter_threads, 0) \
MAKE_OBSOLETE(M, Bool, allow_experimental_projection_optimization, true) \
/** The section above is for obsolete settings. Do not add anything there. */

View File

@ -93,6 +93,16 @@ IMPLEMENT_SETTING_ENUM_WITH_RENAME(DefaultDatabaseEngine, ErrorCodes::BAD_ARGUME
{{"Ordinary", DefaultDatabaseEngine::Ordinary},
{"Atomic", DefaultDatabaseEngine::Atomic}})
IMPLEMENT_SETTING_ENUM_WITH_RENAME(DefaultTableEngine, ErrorCodes::BAD_ARGUMENTS,
{{"None", DefaultTableEngine::None},
{"Log", DefaultTableEngine::Log},
{"StripeLog", DefaultTableEngine::StripeLog},
{"MergeTree", DefaultTableEngine::MergeTree},
{"ReplacingMergeTree", DefaultTableEngine::ReplacingMergeTree},
{"ReplicatedMergeTree", DefaultTableEngine::ReplicatedMergeTree},
{"ReplicatedReplacingMergeTree", DefaultTableEngine::ReplicatedReplacingMergeTree},
{"Memory", DefaultTableEngine::Memory}})
IMPLEMENT_SETTING_MULTI_ENUM(MySQLDataTypesSupport, ErrorCodes::UNKNOWN_MYSQL_DATATYPES_SUPPORT_LEVEL,
{{"decimal", MySQLDataTypesSupport::DECIMAL},
{"datetime64", MySQLDataTypesSupport::DATETIME64}})

View File

@ -120,6 +120,19 @@ enum class DefaultDatabaseEngine
DECLARE_SETTING_ENUM(DefaultDatabaseEngine)
enum class DefaultTableEngine
{
None = 0, /// Disable. Need to use ENGINE =
Log,
StripeLog,
MergeTree,
ReplacingMergeTree,
ReplicatedMergeTree,
ReplicatedReplacingMergeTree,
Memory,
};
DECLARE_SETTING_ENUM(DefaultTableEngine)
enum class MySQLDataTypesSupport
{

View File

@ -77,6 +77,10 @@ std::pair<String, StoragePtr> createTableFromAST(
/// - the code is simpler, since the query is already brought to a suitable form.
if (!ast_create_query.columns_list || !ast_create_query.columns_list->columns)
{
if (!ast_create_query.storage || !ast_create_query.storage->engine)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid storage definition in metadata file: "
"it's a bug or result of manual intervention in metadata files");
if (!StorageFactory::instance().checkIfStorageSupportsSchemaInterface(ast_create_query.storage->engine->name))
throw Exception("Missing definition of columns.", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED);
/// Leave columns empty.

View File

@ -316,7 +316,7 @@ getTableOutput(const String & database_name, const String & table_name, ContextM
return std::move(res.pipeline);
}
static inline String reWriteMysqlQueryColumn(mysqlxx::Pool::Entry & connection, const String & database_name, const String & table_name, const Settings & global_settings)
static inline String rewriteMysqlQueryColumn(mysqlxx::Pool::Entry & connection, const String & database_name, const String & table_name, const Settings & global_settings)
{
Block tables_columns_sample_block
{
@ -376,7 +376,7 @@ static inline void dumpDataForTables(
auto pipeline = getTableOutput(database_name, table_name, query_context);
StreamSettings mysql_input_stream_settings(context->getSettingsRef());
String mysql_select_all_query = "SELECT " + reWriteMysqlQueryColumn(connection, mysql_database_name, table_name, context->getSettings()) + " FROM "
String mysql_select_all_query = "SELECT " + rewriteMysqlQueryColumn(connection, mysql_database_name, table_name, context->getSettingsRef()) + " FROM "
+ backQuoteIfNeed(mysql_database_name) + "." + backQuoteIfNeed(table_name);
LOG_INFO(&Poco::Logger::get("MaterializedMySQLSyncThread(" + database_name + ")"), "mysql_select_all_query is {}", mysql_select_all_query);
auto input = std::make_unique<MySQLSource>(connection, mysql_select_all_query, pipeline.getHeader(), mysql_input_stream_settings);

View File

@ -8,10 +8,10 @@
#include <Common/randomSeed.h>
#include <Common/Arena.h>
#include <Common/ArenaWithFreeLists.h>
#include <Common/ArenaUtils.h>
#include <Common/HashTable/LRUHashMap.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Dictionaries/ICacheDictionaryStorage.h>
#include <Dictionaries/DictionaryHelpers.h>
namespace DB

View File

@ -623,17 +623,6 @@ void mergeBlockWithPipe(
}
}
template <typename Arena>
static StringRef copyStringInArena(Arena & arena, StringRef value)
{
size_t key_size = value.size;
char * place_for_key = arena.alloc(key_size);
memcpy(reinterpret_cast<void *>(place_for_key), reinterpret_cast<const void *>(value.data), key_size);
StringRef result{place_for_key, key_size};
return result;
}
/**
* Returns ColumnVector data as PaddedPodArray.

View File

@ -197,7 +197,7 @@ void registerDictionarySourceExecutablePool(DictionarySourceFactory & factory)
size_t max_command_execution_time = config.getUInt64(settings_config_prefix + ".max_command_execution_time", 10);
size_t max_execution_time_seconds = static_cast<size_t>(context->getSettings().max_execution_time.totalSeconds());
size_t max_execution_time_seconds = static_cast<size_t>(context->getSettingsRef().max_execution_time.totalSeconds());
if (max_execution_time_seconds != 0 && max_command_execution_time > max_execution_time_seconds)
max_command_execution_time = max_execution_time_seconds;

View File

@ -3,6 +3,7 @@
#include <Core/Defines.h>
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashSet.h>
#include <Common/ArenaUtils.h>
#include <DataTypes/DataTypesDecimal.h>
#include <IO/WriteHelpers.h>
@ -13,7 +14,7 @@
#include <QueryPipeline/QueryPipelineBuilder.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Dictionaries//DictionarySource.h>
#include <Dictionaries/DictionarySource.h>
#include <Dictionaries/DictionaryFactory.h>
#include <Dictionaries/HierarchyDictionariesUtils.h>

View File

@ -1,5 +1,6 @@
#include "HashedArrayDictionary.h"
#include <Common/ArenaUtils.h>
#include <Core/Defines.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Columns/ColumnsNumber.h>

View File

@ -1,5 +1,6 @@
#include "HashedDictionary.h"
#include <Common/ArenaUtils.h>
#include <Core/Defines.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Columns/ColumnsNumber.h>

View File

@ -1,5 +1,7 @@
#include <Dictionaries/RangeHashedDictionary.h>
#include <Common/ArenaUtils.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeEnum.h>

View File

@ -16,6 +16,7 @@
#include <Common/randomSeed.h>
#include <Common/Arena.h>
#include <Common/ArenaWithFreeLists.h>
#include <Common/ArenaUtils.h>
#include <Common/MemorySanitizer.h>
#include <Common/CurrentMetrics.h>
#include <Common/HashTable/HashMap.h>

View File

@ -11,6 +11,7 @@
#include <Common/typeid_cast.h>
#include <base/range.h>
#include <constants.h>
#include <h3api.h>
@ -20,6 +21,8 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int INCORRECT_DATA;
extern const int ILLEGAL_COLUMN;
extern const int ARGUMENT_OUT_OF_BOUND;
}
namespace
@ -68,9 +71,35 @@ public:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto * col_lon = arguments[0].column.get();
const auto * col_lat = arguments[1].column.get();
const auto * col_res = arguments[2].column.get();
const auto * col_lon = checkAndGetColumn<ColumnFloat64>(arguments[0].column.get());
if (!col_lon)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be Float64.",
arguments[0].type->getName(),
1,
getName());
const auto & data_lon = col_lon->getData();
const auto * col_lat = checkAndGetColumn<ColumnFloat64>(arguments[1].column.get());
if (!col_lat)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be Float64.",
arguments[1].type->getName(),
2,
getName());
const auto & data_lat = col_lat->getData();
const auto * col_res = checkAndGetColumn<ColumnUInt8>(arguments[2].column.get());
if (!col_res)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be UInt8.",
arguments[2].type->getName(),
3,
getName());
const auto & data_res = col_res->getData();
auto dst = ColumnVector<UInt64>::create();
auto & dst_data = dst->getData();
@ -78,9 +107,17 @@ public:
for (size_t row = 0; row < input_rows_count; ++row)
{
const double lon = col_lon->getFloat64(row);
const double lat = col_lat->getFloat64(row);
const UInt8 res = col_res->getUInt(row);
const double lon = data_lon[row];
const double lat = data_lat[row];
const UInt8 res = data_res[row];
if (res > MAX_H3_RES)
throw Exception(
ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is ",
toString(res),
getName(),
MAX_H3_RES);
LatLng coord;
coord.lng = degsToRads(lon);

View File

@ -19,6 +19,7 @@
#include <Common/DNSResolver.h>
#include <Common/RemoteHostFilter.h>
#include <Common/config.h>
#include <Common/config_version.h>
#include <base/logger_useful.h>
#include <Poco/URIStreamFactory.h>
@ -291,6 +292,18 @@ namespace detail
"0 < http_retry_initial_backoff_ms < settings.http_retry_max_backoff_ms (now 0 < {} < {})",
settings.http_max_tries, settings.http_retry_initial_backoff_ms, settings.http_retry_max_backoff_ms);
// Configure User-Agent if it not already set.
const std::string user_agent = "User-Agent";
auto iter = std::find_if(http_header_entries.begin(), http_header_entries.end(), [&user_agent](const HTTPHeaderEntry & entry)
{
return std::get<0>(entry) == user_agent;
});
if (iter == http_header_entries.end())
{
http_header_entries.emplace_back(std::make_pair("User-Agent", fmt::format("ClickHouse/{}", VERSION_STRING)));
}
if (!delay_initialization)
{
initialize();

View File

@ -10,6 +10,7 @@ WriteBufferFromHTTP::WriteBufferFromHTTP(
const Poco::URI & uri,
const std::string & method,
const std::string & content_type,
const std::string & content_encoding,
const ConnectionTimeouts & timeouts,
size_t buffer_size_)
: WriteBufferFromOStream(buffer_size_)
@ -24,6 +25,9 @@ WriteBufferFromHTTP::WriteBufferFromHTTP(
request.set("Content-Type", content_type);
}
if (!content_encoding.empty())
request.set("Content-Encoding", content_encoding);
LOG_TRACE((&Poco::Logger::get("WriteBufferToHTTP")), "Sending request to {}", uri.toString());
ostr = &session->sendRequest(request);
@ -31,6 +35,10 @@ WriteBufferFromHTTP::WriteBufferFromHTTP(
void WriteBufferFromHTTP::finalizeImpl()
{
// for compressed body, the data is stored in buffered first
// here, make sure the content in the buffer has been flushed
this->nextImpl();
receiveResponse(*session, request, response, false);
/// TODO: Response body is ignored.
}

View File

@ -21,6 +21,7 @@ public:
explicit WriteBufferFromHTTP(const Poco::URI & uri,
const std::string & method = Poco::Net::HTTPRequest::HTTP_POST, // POST or PUT only
const std::string & content_type = "",
const std::string & content_encoding = "",
const ConnectionTimeouts & timeouts = {},
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE);

View File

@ -328,14 +328,16 @@ TEST_P(ArchiveReaderAndWriterTest, ArchiveNotExist)
}
#if USE_MINIZIP
namespace
{
const char * supported_archive_file_exts[] =
{
#if USE_MINIZIP
".zip",
#endif
};
}
INSTANTIATE_TEST_SUITE_P(All, ArchiveReaderAndWriterTest, ::testing::ValuesIn(supported_archive_file_exts));
#endif

View File

@ -5,6 +5,7 @@
#include <Parsers/formatAST.h>
#include <Access/AccessControl.h>
#include <Access/Common/AccessFlags.h>
#include <Access/Common/AccessRightsElement.h>
#include <Access/RowPolicy.h>
#include <Interpreters/Context.h>
#include <Interpreters/executeDDLQueryOnCluster.h>
@ -45,22 +46,24 @@ namespace
BlockIO InterpreterCreateRowPolicyQuery::execute()
{
auto & query = query_ptr->as<ASTCreateRowPolicyQuery &>();
auto & access_control = getContext()->getAccessControl();
getContext()->checkAccess(query.alter ? AccessType::ALTER_ROW_POLICY : AccessType::CREATE_ROW_POLICY);
auto required_access = getRequiredAccess();
if (!query.cluster.empty())
{
query.replaceCurrentUserTag(getContext()->getUserName());
return executeDDLQueryOnCluster(query_ptr, getContext());
return executeDDLQueryOnCluster(query_ptr, getContext(), required_access);
}
assert(query.names->cluster.empty());
auto & access_control = getContext()->getAccessControl();
getContext()->checkAccess(required_access);
query.replaceEmptyDatabase(getContext()->getCurrentDatabase());
std::optional<RolesOrUsersSet> roles_from_query;
if (query.roles)
roles_from_query = RolesOrUsersSet{*query.roles, access_control, getContext()->getUserID()};
query.replaceEmptyDatabase(getContext()->getCurrentDatabase());
if (query.alter)
{
auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr
@ -105,4 +108,15 @@ void InterpreterCreateRowPolicyQuery::updateRowPolicyFromQuery(RowPolicy & polic
updateRowPolicyFromQueryImpl(policy, query, {}, {});
}
AccessRightsElements InterpreterCreateRowPolicyQuery::getRequiredAccess() const
{
const auto & query = query_ptr->as<const ASTCreateRowPolicyQuery &>();
AccessRightsElements res;
auto access_type = (query.alter ? AccessType::ALTER_ROW_POLICY : AccessType::CREATE_ROW_POLICY);
for (const auto & row_policy_name : query.names->full_names)
res.emplace_back(access_type, row_policy_name.database, row_policy_name.table_name);
return res;
}
}

View File

@ -6,8 +6,8 @@
namespace DB
{
class ASTCreateRowPolicyQuery;
class AccessRightsElements;
struct RowPolicy;
class InterpreterCreateRowPolicyQuery : public IInterpreter, WithMutableContext
@ -20,6 +20,8 @@ public:
static void updateRowPolicyFromQuery(RowPolicy & policy, const ASTCreateRowPolicyQuery & query);
private:
AccessRightsElements getRequiredAccess() const;
ASTPtr query_ptr;
};

View File

@ -49,12 +49,37 @@ AccessRightsElements InterpreterDropAccessEntityQuery::getRequiredAccess() const
AccessRightsElements res;
switch (query.type)
{
case AccessEntityType::USER: res.emplace_back(AccessType::DROP_USER); return res;
case AccessEntityType::ROLE: res.emplace_back(AccessType::DROP_ROLE); return res;
case AccessEntityType::SETTINGS_PROFILE: res.emplace_back(AccessType::DROP_SETTINGS_PROFILE); return res;
case AccessEntityType::ROW_POLICY: res.emplace_back(AccessType::DROP_ROW_POLICY); return res;
case AccessEntityType::QUOTA: res.emplace_back(AccessType::DROP_QUOTA); return res;
case AccessEntityType::MAX: break;
case AccessEntityType::USER:
{
res.emplace_back(AccessType::DROP_USER);
return res;
}
case AccessEntityType::ROLE:
{
res.emplace_back(AccessType::DROP_ROLE);
return res;
}
case AccessEntityType::SETTINGS_PROFILE:
{
res.emplace_back(AccessType::DROP_SETTINGS_PROFILE);
return res;
}
case AccessEntityType::ROW_POLICY:
{
if (query.row_policy_names)
{
for (const auto & row_policy_name : query.row_policy_names->full_names)
res.emplace_back(AccessType::DROP_ROW_POLICY, row_policy_name.database, row_policy_name.table_name);
}
return res;
}
case AccessEntityType::QUOTA:
{
res.emplace_back(AccessType::DROP_QUOTA);
return res;
}
case AccessEntityType::MAX:
break;
}
throw Exception(
toString(query.type) + ": type is not supported by DROP query", ErrorCodes::NOT_IMPLEMENTED);

View File

@ -6,7 +6,6 @@
namespace DB
{
class AccessRightsElements;
class InterpreterDropAccessEntityQuery : public IInterpreter, WithMutableContext

View File

@ -377,12 +377,48 @@ AccessRightsElements InterpreterShowCreateAccessEntityQuery::getRequiredAccess()
AccessRightsElements res;
switch (show_query.type)
{
case AccessEntityType::USER: res.emplace_back(AccessType::SHOW_USERS); return res;
case AccessEntityType::ROLE: res.emplace_back(AccessType::SHOW_ROLES); return res;
case AccessEntityType::SETTINGS_PROFILE: res.emplace_back(AccessType::SHOW_SETTINGS_PROFILES); return res;
case AccessEntityType::ROW_POLICY: res.emplace_back(AccessType::SHOW_ROW_POLICIES); return res;
case AccessEntityType::QUOTA: res.emplace_back(AccessType::SHOW_QUOTAS); return res;
case AccessEntityType::MAX: break;
case AccessEntityType::USER:
{
res.emplace_back(AccessType::SHOW_USERS);
return res;
}
case AccessEntityType::ROLE:
{
res.emplace_back(AccessType::SHOW_ROLES);
return res;
}
case AccessEntityType::SETTINGS_PROFILE:
{
res.emplace_back(AccessType::SHOW_SETTINGS_PROFILES);
return res;
}
case AccessEntityType::ROW_POLICY:
{
if (show_query.row_policy_names)
{
for (const auto & row_policy_name : show_query.row_policy_names->full_names)
res.emplace_back(AccessType::SHOW_ROW_POLICIES, row_policy_name.database, row_policy_name.table_name);
}
else if (show_query.database_and_table_name)
{
if (show_query.database_and_table_name->second.empty())
res.emplace_back(AccessType::SHOW_ROW_POLICIES, show_query.database_and_table_name->first);
else
res.emplace_back(AccessType::SHOW_ROW_POLICIES, show_query.database_and_table_name->first, show_query.database_and_table_name->second);
}
else
{
res.emplace_back(AccessType::SHOW_ROW_POLICIES);
}
return res;
}
case AccessEntityType::QUOTA:
{
res.emplace_back(AccessType::SHOW_QUOTAS);
return res;
}
case AccessEntityType::MAX:
break;
}
throw Exception(toString(show_query.type) + ": type is not supported by SHOW CREATE query", ErrorCodes::NOT_IMPLEMENTED);
}

View File

@ -229,7 +229,7 @@ ClusterPtr ClusterDiscovery::makeCluster(const ClusterInfo & cluster_info)
bool secure = cluster_info.current_node.secure;
auto cluster = std::make_shared<Cluster>(
context->getSettings(),
context->getSettingsRef(),
shards,
/* username= */ context->getUserName(),
/* password= */ "",

View File

@ -2,6 +2,7 @@
#include <filesystem>
#include "Common/Exception.h"
#include <Common/StringUtils/StringUtils.h>
#include <Common/escapeForFileName.h>
#include <Common/typeid_cast.h>
@ -12,6 +13,7 @@
#include <Core/Defines.h>
#include <Core/Settings.h>
#include <Core/SettingsEnums.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteHelpers.h>
@ -91,6 +93,7 @@ namespace ErrorCodes
extern const int UNKNOWN_DATABASE;
extern const int PATH_ACCESS_DENIED;
extern const int NOT_IMPLEMENTED;
extern const int ENGINE_REQUIRED;
}
namespace fs = std::filesystem;
@ -157,6 +160,9 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Unknown database engine: {}", serializeAST(*create.storage));
}
if (create.storage && !create.storage->engine)
throw Exception(ErrorCodes::INCORRECT_QUERY, "Database engine must be specified");
if (create.storage->engine->name == "Atomic"
|| create.storage->engine->name == "Replicated"
|| create.storage->engine->name == "MaterializedPostgreSQL")
@ -581,6 +587,17 @@ ConstraintsDescription InterpreterCreateQuery::getConstraintsDescription(const A
InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTablePropertiesAndNormalizeCreateQuery(ASTCreateQuery & create) const
{
/// Set the table engine if it was not specified explicitly.
setEngine(create);
/// We have to check access rights again (in case engine was changed).
if (create.storage)
{
auto source_access_type = StorageFactory::instance().getSourceAccessType(create.storage->engine->name);
if (source_access_type != AccessType::NONE)
getContext()->checkAccess(source_access_type);
}
TableProperties properties;
TableLockHolder as_storage_lock;
@ -645,7 +662,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
}
/// We can have queries like "CREATE TABLE <table> ENGINE=<engine>" if <engine>
/// supports schema inference (will determine table structure in it's constructor).
else if (!StorageFactory::instance().checkIfStorageSupportsSchemaInterface(create.storage->engine->name))
else if (!StorageFactory::instance().checkIfStorageSupportsSchemaInterface(create.storage->engine->name)) // NOLINT
throw Exception("Incorrect CREATE query: required list of column descriptions or AS section or SELECT.", ErrorCodes::INCORRECT_QUERY);
/// Even if query has list of columns, canonicalize it (unfold Nested columns).
@ -663,8 +680,6 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
create.columns_list->setOrReplace(create.columns_list->projections, new_projections);
validateTableStructure(create, properties);
/// Set the table engine if it was not specified explicitly.
setEngine(create);
assert(as_database_saved.empty() && as_table_saved.empty());
std::swap(create.as_database, as_database_saved);
@ -718,30 +733,90 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat
}
}
String InterpreterCreateQuery::getTableEngineName(DefaultTableEngine default_table_engine)
{
switch (default_table_engine)
{
case DefaultTableEngine::Log:
return "Log";
case DefaultTableEngine::StripeLog:
return "StripeLog";
case DefaultTableEngine::MergeTree:
return "MergeTree";
case DefaultTableEngine::ReplacingMergeTree:
return "ReplacingMergeTree";
case DefaultTableEngine::ReplicatedMergeTree:
return "ReplicatedMergeTree";
case DefaultTableEngine::ReplicatedReplacingMergeTree:
return "ReplicatedReplacingMergeTree";
case DefaultTableEngine::Memory:
return "Memory";
default:
throw Exception("default_table_engine is set to unknown value", ErrorCodes::LOGICAL_ERROR);
}
}
void InterpreterCreateQuery::setDefaultTableEngine(ASTStorage & storage, ContextPtr local_context)
{
if (local_context->getSettingsRef().default_table_engine.value == DefaultTableEngine::None)
throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query");
auto engine_ast = std::make_shared<ASTFunction>();
auto default_table_engine = local_context->getSettingsRef().default_table_engine.value;
engine_ast->name = getTableEngineName(default_table_engine);
engine_ast->no_empty_args = true;
storage.set(storage.engine, engine_ast);
}
void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
{
if (create.as_table_function)
return;
if (create.storage || create.is_dictionary || create.isView())
{
if (create.temporary && create.storage && create.storage->engine && create.storage->engine->name != "Memory")
throw Exception(ErrorCodes::INCORRECT_QUERY,
"Temporary tables can only be created with ENGINE = Memory, not {}", create.storage->engine->name);
if (create.is_dictionary || create.is_ordinary_view || create.is_live_view || create.is_window_view)
return;
if (create.is_materialized_view && create.to_table_id)
return;
}
if (create.temporary)
{
if (create.storage && create.storage->engine && create.storage->engine->name != "Memory")
throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables can only be created with ENGINE = Memory, not {}",
create.storage->engine->name);
/// It's possible if some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not.
/// It makes sense when default_table_engine setting is used, but not for temporary tables.
/// For temporary tables we ignore this setting to allow CREATE TEMPORARY TABLE query without specifying ENGINE
/// even if setting is set to MergeTree or something like that (otherwise MergeTree will be substituted and query will fail).
if (create.storage && !create.storage->engine)
throw Exception(ErrorCodes::INCORRECT_QUERY, "Invalid storage definition for temporary table: must be either ENGINE = Memory or empty");
auto engine_ast = std::make_shared<ASTFunction>();
engine_ast->name = "Memory";
engine_ast->no_empty_args = true;
auto storage_ast = std::make_shared<ASTStorage>();
storage_ast->set(storage_ast->engine, engine_ast);
create.set(create.storage, storage_ast);
return;
}
else if (!create.as_table.empty())
if (create.storage)
{
/// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one.
if (!create.storage->engine)
setDefaultTableEngine(*create.storage, getContext());
return;
}
if (!create.as_table.empty())
{
/// NOTE Getting the structure from the table specified in the AS is done not atomically with the creation of the table.
@ -754,24 +829,16 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
const String qualified_name = backQuoteIfNeed(as_database_name) + "." + backQuoteIfNeed(as_table_name);
if (as_create.is_ordinary_view)
throw Exception(
"Cannot CREATE a table AS " + qualified_name + ", it is a View",
ErrorCodes::INCORRECT_QUERY);
throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a View", qualified_name);
if (as_create.is_live_view)
throw Exception(
"Cannot CREATE a table AS " + qualified_name + ", it is a Live View",
ErrorCodes::INCORRECT_QUERY);
throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Live View", qualified_name);
if (as_create.is_window_view)
throw Exception(
"Cannot CREATE a table AS " + qualified_name + ", it is a Window View",
ErrorCodes::INCORRECT_QUERY);
throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Window View", qualified_name);
if (as_create.is_dictionary)
throw Exception(
"Cannot CREATE a table AS " + qualified_name + ", it is a Dictionary",
ErrorCodes::INCORRECT_QUERY);
throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Dictionary", qualified_name);
if (as_create.storage)
create.set(create.storage, as_create.storage->ptr());
@ -779,7 +846,12 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
create.as_table_function = as_create.as_table_function->clone();
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set engine, it's a bug.");
return;
}
create.set(create.storage, std::make_shared<ASTStorage>());
setDefaultTableEngine(*create.storage, getContext());
}
static void generateUUIDForTable(ASTCreateQuery & create)

View File

@ -1,6 +1,7 @@
#pragma once
#include <Core/NamesAndAliases.h>
#include <Core/SettingsEnums.h>
#include <Access/Common/AccessRightsElement.h>
#include <Interpreters/IInterpreter.h>
#include <Storages/ColumnsDescription.h>
@ -15,6 +16,7 @@ namespace DB
class ASTCreateQuery;
class ASTExpressionList;
class ASTConstraintDeclaration;
class ASTStorage;
class IDatabase;
using DatabasePtr = std::shared_ptr<IDatabase>;
@ -81,6 +83,8 @@ private:
/// Calculate list of columns, constraints, indices, etc... of table. Rewrite query in canonical way.
TableProperties getTablePropertiesAndNormalizeCreateQuery(ASTCreateQuery & create) const;
void validateTableStructure(const ASTCreateQuery & create, const TableProperties & properties) const;
static String getTableEngineName(DefaultTableEngine default_table_engine);
static void setDefaultTableEngine(ASTStorage & storage, ContextPtr local_context);
void setEngine(ASTCreateQuery & create) const;
AccessRightsElements getRequiredAccess() const;

View File

@ -18,6 +18,7 @@
#include <Parsers/ParserSelectWithUnionQuery.h>
#include <Parsers/ParserSetQuery.h>
#include <Common/typeid_cast.h>
#include <Parsers/ASTColumnDeclaration.h>
namespace DB
@ -353,20 +354,26 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
ASTPtr ttl_table;
ASTPtr settings;
if (!s_engine.ignore(pos, expected))
return false;
bool storage_like = false;
s_eq.ignore(pos, expected);
if (s_engine.ignore(pos, expected))
{
s_eq.ignore(pos, expected);
if (!ident_with_optional_params_p.parse(pos, engine, expected))
return false;
if (!ident_with_optional_params_p.parse(pos, engine, expected))
return false;
storage_like = true;
}
while (true)
{
if (!partition_by && s_partition_by.ignore(pos, expected))
{
if (expression_p.parse(pos, partition_by, expected))
{
storage_like = true;
continue;
}
else
return false;
}
@ -374,7 +381,10 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
if (!primary_key && s_primary_key.ignore(pos, expected))
{
if (expression_p.parse(pos, primary_key, expected))
{
storage_like = true;
continue;
}
else
return false;
}
@ -382,7 +392,10 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
if (!order_by && s_order_by.ignore(pos, expected))
{
if (expression_p.parse(pos, order_by, expected))
{
storage_like = true;
continue;
}
else
return false;
}
@ -390,7 +403,10 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
if (!sample_by && s_sample_by.ignore(pos, expected))
{
if (expression_p.parse(pos, sample_by, expected))
{
storage_like = true;
continue;
}
else
return false;
}
@ -398,7 +414,10 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
if (!ttl_table && s_ttl.ignore(pos, expected))
{
if (parser_ttl_list.parse(pos, ttl_table, expected))
{
storage_like = true;
continue;
}
else
return false;
}
@ -407,10 +426,14 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
if (!settings_p.parse(pos, settings, expected))
return false;
storage_like = true;
}
break;
}
// If any part of storage definition is found create storage node
if (!storage_like)
return false;
auto storage = std::make_shared<ASTStorage>();
storage->set(storage->engine, engine);
@ -549,13 +572,11 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
if (!storage_parse_result && !is_temporary)
{
if (!s_as.ignore(pos, expected))
if (s_as.ignore(pos, expected) && !table_function_p.parse(pos, as_table_function, expected))
return false;
if (!table_function_p.parse(pos, as_table_function, expected))
{
return false;
}
}
/// Will set default table engine if Storage clause was not parsed
}
/** Create queries without list of columns:
* - CREATE|ATTACH TABLE ... AS ...
@ -590,10 +611,6 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
}
}
}
else if (!storage)
{
return false;
}
}
auto comment = parseComment(pos, expected);
@ -625,12 +642,14 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
if (comment)
query->set(query->comment, comment);
if (query->storage && query->columns_list && query->columns_list->primary_key)
if (query->columns_list && query->columns_list->primary_key)
{
if (query->storage->primary_key)
{
/// If engine is not set will use default one
if (!query->storage)
query->set(query->storage, std::make_shared<ASTStorage>());
else if (query->storage->primary_key)
throw Exception("Multiple primary keys are not allowed.", ErrorCodes::BAD_ARGUMENTS);
}
query->storage->primary_key = query->columns_list->primary_key;
}
@ -1263,8 +1282,8 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
if (is_materialized_view && !to_table)
{
/// Internal ENGINE for MATERIALIZED VIEW must be specified.
if (!storage_p.parse(pos, storage, expected))
return false;
/// Actually check it in Interpreter as default_table_engine can be set
storage_p.parse(pos, storage, expected);
if (s_populate.ignore(pos, expected))
is_populate = true;

View File

@ -276,7 +276,7 @@ protected:
class ParserIndexDeclaration : public IParserBase
{
public:
ParserIndexDeclaration() {}
ParserIndexDeclaration() = default;
protected:
const char * getName() const override { return "index declaration"; }
@ -336,7 +336,7 @@ protected:
/**
* ENGINE = name [PARTITION BY expr] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] [SETTINGS name = value, ...]
* [ENGINE = name] [PARTITION BY expr] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] [SETTINGS name = value, ...]
*/
class ParserStorage : public IParserBase
{
@ -391,7 +391,7 @@ class ParserTableOverrideDeclaration : public IParserBase
{
public:
const bool is_standalone;
ParserTableOverrideDeclaration(bool is_standalone_ = true) : is_standalone(is_standalone_) { }
explicit ParserTableOverrideDeclaration(bool is_standalone_ = true) : is_standalone(is_standalone_) { }
protected:
const char * getName() const override { return "table override declaration"; }

View File

@ -356,6 +356,7 @@ void registerInputFormatJSONEachRow(FormatFactory & factory)
});
factory.registerFileExtension("ndjson", "JSONEachRow");
factory.registerFileExtension("jsonl", "JSONEachRow");
factory.registerInputFormat("JSONStringsEachRow", [](
ReadBuffer & buf,

View File

@ -20,7 +20,7 @@ namespace ErrorCodes
TSKVRowInputFormat::TSKVRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSettings & format_settings_)
: IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), name_map(header_.columns())
: IRowInputFormat(std::move(header_), in_, std::move(params_)), format_settings(format_settings_), name_map(getPort().getHeader().columns())
{
const auto & sample_block = getPort().getHeader();
size_t num_columns = sample_block.columns();

View File

@ -921,8 +921,8 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
auto reader_settings = getMergeTreeReaderSettings(context);
bool use_skip_indexes = context->getSettings().use_skip_indexes;
if (select.final() && !context->getSettings().use_skip_indexes_if_final)
bool use_skip_indexes = settings.use_skip_indexes;
if (select.final() && !settings.use_skip_indexes_if_final)
use_skip_indexes = false;
result.parts_with_ranges = MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipIndexes(

View File

@ -44,8 +44,8 @@ public:
private:
SubqueryForSet subquery;
std::unique_ptr<PushingPipelineExecutor> executor;
QueryPipeline table_out;
std::unique_ptr<PushingPipelineExecutor> executor;
UInt64 read_rows = 0;
Stopwatch watch;

View File

@ -331,7 +331,7 @@ Chain buildPushingToViewsChain(
{
auto executing_inner_query = std::make_shared<ExecutingInnerQueryFromViewTransform>(
storage_header, views_data->views.back(), views_data);
executing_inner_query->setRuntimeData(view_thread_status, elapsed_counter_ms);
executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms);
out.addSource(std::move(executing_inner_query));
}
@ -381,7 +381,7 @@ Chain buildPushingToViewsChain(
processors.emplace_front(std::move(copying_data));
processors.emplace_back(std::move(finalizing_views));
result_chain = Chain(std::move(processors));
result_chain.setNumThreads(max_parallel_streams);
result_chain.setNumThreads(std::min(views_data->max_threads, max_parallel_streams));
}
if (auto * live_view = dynamic_cast<StorageLiveView *>(storage.get()))

View File

@ -18,7 +18,7 @@ namespace DB
struct ViewRuntimeData
{
/// A query we should run over inserted block befire pushing into inner storage.
/// A query we should run over inserted block before pushing into inner storage.
const ASTPtr query;
/// This structure is expected by inner storage. Will convert query result to it.
Block sample_block;

View File

@ -59,7 +59,7 @@ InputFormatPtr getInputFormatFromASTInsertQuery(
: std::make_unique<EmptyReadBuffer>();
/// Create a source from input buffer using format from query
auto source = context->getInputFormat(ast_insert_query->format, *input_buffer, header, context->getSettings().max_insert_block_size);
auto source = context->getInputFormat(ast_insert_query->format, *input_buffer, header, context->getSettingsRef().max_insert_block_size);
source->addBuffer(std::move(input_buffer));
return source;
}

View File

@ -51,6 +51,7 @@ using GRPCQueryInfo = clickhouse::grpc::QueryInfo;
using GRPCResult = clickhouse::grpc::Result;
using GRPCException = clickhouse::grpc::Exception;
using GRPCProgress = clickhouse::grpc::Progress;
using GRPCObsoleteTransportCompression = clickhouse::grpc::ObsoleteTransportCompression;
namespace DB
{
@ -101,62 +102,6 @@ namespace
});
}
grpc_compression_algorithm parseCompressionAlgorithm(const String & str)
{
if (str == "none")
return GRPC_COMPRESS_NONE;
else if (str == "deflate")
return GRPC_COMPRESS_DEFLATE;
else if (str == "gzip")
return GRPC_COMPRESS_GZIP;
else if (str == "stream_gzip")
return GRPC_COMPRESS_STREAM_GZIP;
else
throw Exception("Unknown compression algorithm: '" + str + "'", ErrorCodes::INVALID_CONFIG_PARAMETER);
}
grpc_compression_level parseCompressionLevel(const String & str)
{
if (str == "none")
return GRPC_COMPRESS_LEVEL_NONE;
else if (str == "low")
return GRPC_COMPRESS_LEVEL_LOW;
else if (str == "medium")
return GRPC_COMPRESS_LEVEL_MED;
else if (str == "high")
return GRPC_COMPRESS_LEVEL_HIGH;
else
throw Exception("Unknown compression level: '" + str + "'", ErrorCodes::INVALID_CONFIG_PARAMETER);
}
grpc_compression_algorithm convertCompressionAlgorithm(const ::clickhouse::grpc::CompressionAlgorithm & algorithm)
{
if (algorithm == ::clickhouse::grpc::NO_COMPRESSION)
return GRPC_COMPRESS_NONE;
else if (algorithm == ::clickhouse::grpc::DEFLATE)
return GRPC_COMPRESS_DEFLATE;
else if (algorithm == ::clickhouse::grpc::GZIP)
return GRPC_COMPRESS_GZIP;
else if (algorithm == ::clickhouse::grpc::STREAM_GZIP)
return GRPC_COMPRESS_STREAM_GZIP;
else
throw Exception("Unknown compression algorithm: '" + ::clickhouse::grpc::CompressionAlgorithm_Name(algorithm) + "'", ErrorCodes::INVALID_GRPC_QUERY_INFO);
}
grpc_compression_level convertCompressionLevel(const ::clickhouse::grpc::CompressionLevel & level)
{
if (level == ::clickhouse::grpc::COMPRESSION_NONE)
return GRPC_COMPRESS_LEVEL_NONE;
else if (level == ::clickhouse::grpc::COMPRESSION_LOW)
return GRPC_COMPRESS_LEVEL_LOW;
else if (level == ::clickhouse::grpc::COMPRESSION_MEDIUM)
return GRPC_COMPRESS_LEVEL_MED;
else if (level == ::clickhouse::grpc::COMPRESSION_HIGH)
return GRPC_COMPRESS_LEVEL_HIGH;
else
throw Exception("Unknown compression level: '" + ::clickhouse::grpc::CompressionLevel_Name(level) + "'", ErrorCodes::INVALID_GRPC_QUERY_INFO);
}
/// Gets file's contents as a string, throws an exception if failed.
String readFile(const String & filepath)
{
@ -193,6 +138,102 @@ namespace
return grpc::InsecureServerCredentials();
}
/// Transport compression makes gRPC library to compress packed Result messages before sending them through network.
struct TransportCompression
{
grpc_compression_algorithm algorithm;
grpc_compression_level level;
/// Extracts the settings of transport compression from a query info if possible.
static std::optional<TransportCompression> fromQueryInfo(const GRPCQueryInfo & query_info)
{
TransportCompression res;
if (!query_info.transport_compression_type().empty())
{
res.setAlgorithm(query_info.transport_compression_type(), ErrorCodes::INVALID_GRPC_QUERY_INFO);
res.setLevel(query_info.transport_compression_level(), ErrorCodes::INVALID_GRPC_QUERY_INFO);
return res;
}
if (query_info.has_obsolete_result_compression())
{
switch (query_info.obsolete_result_compression().algorithm())
{
case GRPCObsoleteTransportCompression::NO_COMPRESSION: res.algorithm = GRPC_COMPRESS_NONE; break;
case GRPCObsoleteTransportCompression::DEFLATE: res.algorithm = GRPC_COMPRESS_DEFLATE; break;
case GRPCObsoleteTransportCompression::GZIP: res.algorithm = GRPC_COMPRESS_GZIP; break;
case GRPCObsoleteTransportCompression::STREAM_GZIP: res.algorithm = GRPC_COMPRESS_STREAM_GZIP; break;
default: throw Exception(ErrorCodes::INVALID_GRPC_QUERY_INFO, "Unknown compression algorithm: {}", GRPCObsoleteTransportCompression::CompressionAlgorithm_Name(query_info.obsolete_result_compression().algorithm()));
}
switch (query_info.obsolete_result_compression().level())
{
case GRPCObsoleteTransportCompression::COMPRESSION_NONE: res.level = GRPC_COMPRESS_LEVEL_NONE; break;
case GRPCObsoleteTransportCompression::COMPRESSION_LOW: res.level = GRPC_COMPRESS_LEVEL_LOW; break;
case GRPCObsoleteTransportCompression::COMPRESSION_MEDIUM: res.level = GRPC_COMPRESS_LEVEL_MED; break;
case GRPCObsoleteTransportCompression::COMPRESSION_HIGH: res.level = GRPC_COMPRESS_LEVEL_HIGH; break;
default: throw Exception(ErrorCodes::INVALID_GRPC_QUERY_INFO, "Unknown compression level: {}", GRPCObsoleteTransportCompression::CompressionLevel_Name(query_info.obsolete_result_compression().level()));
}
return res;
}
return std::nullopt;
}
/// Extracts the settings of transport compression from the server configuration.
static TransportCompression fromConfiguration(const Poco::Util::AbstractConfiguration & config)
{
TransportCompression res;
if (config.has("grpc.transport_compression_type"))
{
res.setAlgorithm(config.getString("grpc.transport_compression_type"), ErrorCodes::INVALID_CONFIG_PARAMETER);
res.setLevel(config.getInt("grpc.transport_compression_level", 0), ErrorCodes::INVALID_CONFIG_PARAMETER);
}
else
{
res.setAlgorithm(config.getString("grpc.compression", "none"), ErrorCodes::INVALID_CONFIG_PARAMETER);
res.setLevel(config.getString("grpc.compression_level", "none"), ErrorCodes::INVALID_CONFIG_PARAMETER);
}
return res;
}
private:
void setAlgorithm(const String & str, int error_code)
{
if (str == "none")
algorithm = GRPC_COMPRESS_NONE;
else if (str == "deflate")
algorithm = GRPC_COMPRESS_DEFLATE;
else if (str == "gzip")
algorithm = GRPC_COMPRESS_GZIP;
else if (str == "stream_gzip")
algorithm = GRPC_COMPRESS_STREAM_GZIP;
else
throw Exception(error_code, "Unknown compression algorithm: '{}'", str);
}
void setLevel(const String & str, int error_code)
{
if (str == "none")
level = GRPC_COMPRESS_LEVEL_NONE;
else if (str == "low")
level = GRPC_COMPRESS_LEVEL_LOW;
else if (str == "medium")
level = GRPC_COMPRESS_LEVEL_MED;
else if (str == "high")
level = GRPC_COMPRESS_LEVEL_HIGH;
else
throw Exception(error_code, "Unknown compression level: '{}'", str);
}
void setLevel(int level_, int error_code)
{
if (0 <= level_ && level_ < GRPC_COMPRESS_LEVEL_COUNT)
level = static_cast<grpc_compression_level>(level_);
else
throw Exception(error_code, "Compression level {} is out of range 0..{}", level_, GRPC_COMPRESS_LEVEL_COUNT - 1);
}
};
/// Gets session's timeout from query info or from the server config.
std::chrono::steady_clock::duration getSessionTimeout(const GRPCQueryInfo & query_info, const Poco::Util::AbstractConfiguration & config)
@ -293,15 +334,10 @@ namespace
return std::nullopt;
}
void setResultCompression(grpc_compression_algorithm algorithm, grpc_compression_level level)
void setTransportCompression(const TransportCompression & transport_compression)
{
grpc_context.set_compression_algorithm(algorithm);
grpc_context.set_compression_level(level);
}
void setResultCompression(const ::clickhouse::grpc::Compression & compression)
{
setResultCompression(convertCompressionAlgorithm(compression.algorithm()), convertCompressionLevel(compression.level()));
grpc_context.set_compression_algorithm(transport_compression.algorithm);
grpc_context.set_compression_level(transport_compression.level);
}
protected:
@ -606,6 +642,9 @@ namespace
void throwIfFailedToReadQueryInfo();
bool isQueryCancelled();
void addQueryDetailsToResult();
void addOutputFormatToResult();
void addOutputColumnsNamesAndTypesToResult(const Block & headers);
void addProgressToResult();
void addTotalsToResult(const Block & totals);
void addExtremesToResult(const Block & extremes);
@ -628,10 +667,12 @@ namespace
ASTInsertQuery * insert_query = nullptr;
String input_format;
String input_data_delimiter;
CompressionMethod input_compression_method = CompressionMethod::None;
PODArray<char> output;
String output_format;
CompressionMethod compression_method = CompressionMethod::None;
int compression_level = 0;
bool send_output_columns_names_and_types = false;
CompressionMethod output_compression_method = CompressionMethod::None;
int output_compression_level = 0;
uint64_t interactive_delay = 100000;
bool send_exception_with_stacktrace = true;
@ -815,9 +856,9 @@ namespace
if (!query_info.database().empty())
query_context->setCurrentDatabase(query_info.database());
/// Apply compression settings for this call.
if (query_info.has_result_compression())
responder->setResultCompression(query_info.result_compression());
/// Apply transport compression for this call.
if (auto transport_compression = TransportCompression::fromQueryInfo(query_info))
responder->setTransportCompression(*transport_compression);
/// The interactive delay will be used to show progress.
interactive_delay = settings.interactive_delay;
@ -851,9 +892,19 @@ namespace
if (output_format.empty())
output_format = query_context->getDefaultFormat();
send_output_columns_names_and_types = query_info.send_output_columns();
/// Choose compression.
compression_method = chooseCompressionMethod("", query_info.compression_type());
compression_level = query_info.compression_level();
String input_compression_method_str = query_info.input_compression_type();
if (input_compression_method_str.empty())
input_compression_method_str = query_info.obsolete_compression_type();
input_compression_method = chooseCompressionMethod("", input_compression_method_str);
String output_compression_method_str = query_info.output_compression_type();
if (output_compression_method_str.empty())
output_compression_method_str = query_info.obsolete_compression_type();
output_compression_method = chooseCompressionMethod("", output_compression_method_str);
output_compression_level = query_info.output_compression_level();
/// Set callback to create and fill external tables
query_context->setExternalTablesInitializer([this] (ContextPtr context)
@ -984,7 +1035,7 @@ namespace
return {nullptr, 0}; /// no more input data
});
read_buffer = wrapReadBufferWithCompressionMethod(std::move(read_buffer), compression_method);
read_buffer = wrapReadBufferWithCompressionMethod(std::move(read_buffer), input_compression_method);
assert(!pipeline);
auto source = query_context->getInputFormat(
@ -1105,6 +1156,9 @@ namespace
void Call::generateOutput()
{
/// We add query_id and time_zone to the first result anyway.
addQueryDetailsToResult();
if (!io.pipeline.initialized() || io.pipeline.pushing())
return;
@ -1112,13 +1166,13 @@ namespace
if (io.pipeline.pulling())
header = io.pipeline.getHeader();
if (compression_method != CompressionMethod::None)
if (output_compression_method != CompressionMethod::None)
output.resize(DBMS_DEFAULT_BUFFER_SIZE); /// Must have enough space for compressed data.
write_buffer = std::make_unique<WriteBufferFromVector<PODArray<char>>>(output);
nested_write_buffer = static_cast<WriteBufferFromVector<PODArray<char>> *>(write_buffer.get());
if (compression_method != CompressionMethod::None)
if (output_compression_method != CompressionMethod::None)
{
write_buffer = wrapWriteBufferWithCompressionMethod(std::move(write_buffer), compression_method, compression_level);
write_buffer = wrapWriteBufferWithCompressionMethod(std::move(write_buffer), output_compression_method, output_compression_level);
compressing_write_buffer = write_buffer.get();
}
@ -1144,6 +1198,9 @@ namespace
return true;
};
addOutputFormatToResult();
addOutputColumnsNamesAndTypesToResult(header);
Block block;
while (check_for_cancel())
{
@ -1394,6 +1451,29 @@ namespace
return false;
}
void Call::addQueryDetailsToResult()
{
*result.mutable_query_id() = query_context->getClientInfo().current_query_id;
*result.mutable_time_zone() = DateLUT::instance().getTimeZone();
}
void Call::addOutputFormatToResult()
{
*result.mutable_output_format() = output_format;
}
void Call::addOutputColumnsNamesAndTypesToResult(const Block & header)
{
if (!send_output_columns_names_and_types)
return;
for (const auto & column : header)
{
auto & name_and_type = *result.add_output_columns();
*name_and_type.mutable_name() = column.name;
*name_and_type.mutable_type() = column.type->getName();
}
}
void Call::addProgressToResult()
{
auto values = progress.fetchAndResetPiecewiseAtomically();
@ -1414,10 +1494,10 @@ namespace
return;
PODArray<char> memory;
if (compression_method != CompressionMethod::None)
if (output_compression_method != CompressionMethod::None)
memory.resize(DBMS_DEFAULT_BUFFER_SIZE); /// Must have enough space for compressed data.
std::unique_ptr<WriteBuffer> buf = std::make_unique<WriteBufferFromVector<PODArray<char>>>(memory);
buf = wrapWriteBufferWithCompressionMethod(std::move(buf), compression_method, compression_level);
buf = wrapWriteBufferWithCompressionMethod(std::move(buf), output_compression_method, output_compression_level);
auto format = query_context->getOutputFormat(output_format, *buf, totals);
format->write(materializeBlock(totals));
format->finalize();
@ -1432,10 +1512,10 @@ namespace
return;
PODArray<char> memory;
if (compression_method != CompressionMethod::None)
if (output_compression_method != CompressionMethod::None)
memory.resize(DBMS_DEFAULT_BUFFER_SIZE); /// Must have enough space for compressed data.
std::unique_ptr<WriteBuffer> buf = std::make_unique<WriteBufferFromVector<PODArray<char>>>(memory);
buf = wrapWriteBufferWithCompressionMethod(std::move(buf), compression_method, compression_level);
buf = wrapWriteBufferWithCompressionMethod(std::move(buf), output_compression_method, output_compression_level);
auto format = query_context->getOutputFormat(output_format, *buf, extremes);
format->write(materializeBlock(extremes));
format->finalize();
@ -1772,8 +1852,9 @@ void GRPCServer::start()
builder.RegisterService(&grpc_service);
builder.SetMaxSendMessageSize(iserver.config().getInt("grpc.max_send_message_size", -1));
builder.SetMaxReceiveMessageSize(iserver.config().getInt("grpc.max_receive_message_size", -1));
builder.SetDefaultCompressionAlgorithm(parseCompressionAlgorithm(iserver.config().getString("grpc.compression", "none")));
builder.SetDefaultCompressionLevel(parseCompressionLevel(iserver.config().getString("grpc.compression_level", "none")));
auto default_transport_compression = TransportCompression::fromConfiguration(iserver.config());
builder.SetDefaultCompressionAlgorithm(default_transport_compression.algorithm);
builder.SetDefaultCompressionLevel(default_transport_compression.level);
queue = builder.AddCompletionQueue();
grpc_server = builder.BuildAndStart();

View File

@ -36,7 +36,7 @@ void HTTPServerConnection::run()
if (request.isSecure())
{
size_t hsts_max_age = context->getSettings().hsts_max_age.value;
size_t hsts_max_age = context->getSettingsRef().hsts_max_age.value;
if (hsts_max_age > 0)
response.add("Strict-Transport-Security", "max-age=" + std::to_string(hsts_max_age));

View File

@ -359,6 +359,7 @@ void TCPHandler::runImpl()
return true;
sendProgress();
sendProfileEvents();
sendLogs();
return false;

View File

@ -45,21 +45,19 @@ message ExternalTable {
map<string, string> settings = 5;
}
enum CompressionAlgorithm {
NO_COMPRESSION = 0;
DEFLATE = 1;
GZIP = 2;
STREAM_GZIP = 3;
}
enum CompressionLevel {
COMPRESSION_NONE = 0;
COMPRESSION_LOW = 1;
COMPRESSION_MEDIUM = 2;
COMPRESSION_HIGH = 3;
}
message Compression {
message ObsoleteTransportCompression {
enum CompressionAlgorithm {
NO_COMPRESSION = 0;
DEFLATE = 1;
GZIP = 2;
STREAM_GZIP = 3;
}
enum CompressionLevel {
COMPRESSION_NONE = 0;
COMPRESSION_LOW = 1;
COMPRESSION_MEDIUM = 2;
COMPRESSION_HIGH = 3;
}
CompressionAlgorithm algorithm = 1;
CompressionLevel level = 2;
}
@ -84,6 +82,9 @@ message QueryInfo {
// Default output format. If not specified, 'TabSeparated' is used.
string output_format = 7;
// Set it if you want the names and the types of output columns to be sent to the client.
bool send_output_columns = 24;
repeated ExternalTable external_tables = 8;
string user_name = 9;
@ -102,16 +103,16 @@ message QueryInfo {
// `next_query_info` is allowed to be set only if a method with streaming input (i.e. ExecuteQueryWithStreamInput() or ExecuteQueryWithStreamIO()) is used.
bool next_query_info = 16;
/// Controls how a ClickHouse server will compress query execution results before sending back to the client.
/// If not set the compression settings from the configuration file will be used.
Compression result_compression = 17;
// Compression type for `input_data`, `output_data`, `totals` and `extremes`.
// Compression type for `input_data`.
// Supported compression types: none, gzip(gz), deflate, brotli(br), lzma(xz), zstd(zst), lz4, bz2.
// When used for `input_data` the client is responsible to compress data before putting it into `input_data`.
// When used for `output_data` or `totals` or `extremes` the client receives compressed data and should decompress it by itself.
// In the latter case consider to specify also `compression_level`.
string compression_type = 18;
// The client is responsible to compress data before putting it into `input_data`.
string input_compression_type = 20;
// Compression type for `output_data`, `totals` and `extremes`.
// Supported compression types: none, gzip(gz), deflate, brotli(br), lzma(xz), zstd(zst), lz4, bz2.
// The client receives compressed data and should decompress it by itself.
// Consider also setting `output_compression_level`.
string output_compression_type = 21;
// Compression level.
// WARNING: If it's not specified the compression level is set to zero by default which might be not the best choice for some compression types (see below).
@ -123,7 +124,23 @@ message QueryInfo {
// zstd: 1..22; 3 is recommended by default (compression level 0 also means 3)
// lz4: 0..16; values < 0 mean fast acceleration
// bz2: 1..9
int32 compression_level = 19;
int32 output_compression_level = 19;
// Transport compression is an alternative way to make the server to compress its response.
// This kind of compression implies that instead of compressing just `output` the server will compress whole packed messages of the `Result` type,
// and then gRPC implementation on client side will decompress those messages so client code won't be bothered with decompression.
// Here is a big difference between the transport compression and the compression enabled by setting `output_compression_type` because
// in case of the transport compression the client code receives already decompressed data in `output`.
// If the transport compression is not set here it can still be enabled by the server configuration.
// Supported compression types: none, deflate, gzip, stream_gzip
// Supported compression levels: 0..3
// WARNING: Don't set `transport_compression` and `output_compression` at the same time because it will make the server to compress its output twice!
string transport_compression_type = 22;
int32 transport_compression_level = 23;
/// Obsolete fields, should not be used in new code.
ObsoleteTransportCompression obsolete_result_compression = 17;
string obsolete_compression_type = 18;
}
enum LogsLevel {
@ -173,7 +190,17 @@ message Exception {
// Result of execution of a query which is sent back by the ClickHouse server to the client.
message Result {
// Output of the query, represented in the `output_format` or in a format specified in `query`.
string query_id = 9;
string time_zone = 10;
// The format in which `output`, `totals` and `extremes` are written.
// It's either the same as `output_format` specified in `QueryInfo` or the format specified in the query itself.
string output_format = 11;
// The names and types of columns of the result written in `output`.
repeated NameAndType output_columns = 12;
// Output of the query, represented in the `output_format`.
bytes output = 1;
bytes totals = 2;
bytes extremes = 3;

View File

@ -372,44 +372,47 @@ String HDFSSource::getName() const
Chunk HDFSSource::generate()
{
if (!reader)
return {};
Chunk chunk;
if (reader->pull(chunk))
while (true)
{
Columns columns = chunk.getColumns();
UInt64 num_rows = chunk.getNumRows();
if (!reader || isCancelled())
break;
/// Enrich with virtual columns.
if (need_path_column)
Chunk chunk;
if (reader->pull(chunk))
{
auto column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumnConst(num_rows, current_path);
columns.push_back(column->convertToFullColumnIfConst());
Columns columns = chunk.getColumns();
UInt64 num_rows = chunk.getNumRows();
/// Enrich with virtual columns.
if (need_path_column)
{
auto column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumnConst(num_rows, current_path);
columns.push_back(column->convertToFullColumnIfConst());
}
if (need_file_column)
{
size_t last_slash_pos = current_path.find_last_of('/');
auto file_name = current_path.substr(last_slash_pos + 1);
auto column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumnConst(num_rows, std::move(file_name));
columns.push_back(column->convertToFullColumnIfConst());
}
return Chunk(std::move(columns), num_rows);
}
if (need_file_column)
{
size_t last_slash_pos = current_path.find_last_of('/');
auto file_name = current_path.substr(last_slash_pos + 1);
std::lock_guard lock(reader_mutex);
reader.reset();
pipeline.reset();
read_buf.reset();
auto column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumnConst(num_rows, std::move(file_name));
columns.push_back(column->convertToFullColumnIfConst());
if (!initialize())
break;
}
return Chunk(std::move(columns), num_rows);
}
{
std::lock_guard lock(reader_mutex);
reader.reset();
pipeline.reset();
read_buf.reset();
if (!initialize())
return {};
}
return generate();
return {};
}

View File

@ -69,7 +69,7 @@ Pipe StorageHDFSCluster::read(
size_t /*max_block_size*/,
unsigned /*num_streams*/)
{
auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettings());
auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef());
auto iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(context, uri);
auto callback = std::make_shared<HDFSSource::IteratorWrapper>([iterator]() mutable -> String

View File

@ -1292,8 +1292,8 @@ bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, ContextPtr context,
key_expr_type_not_null = key_expr_type;
bool cast_not_needed = is_set_const /// Set args are already casted inside Set::createFromAST
|| ((isNativeNumber(key_expr_type_not_null) || isDateTime(key_expr_type_not_null))
&& (isNativeNumber(const_type) || isDateTime(const_type))); /// Numbers and DateTime are accurately compared without cast.
|| ((isNativeInteger(key_expr_type_not_null) || isDateTime(key_expr_type_not_null))
&& (isNativeInteger(const_type) || isDateTime(const_type))); /// Native integers and DateTime are accurately compared without cast.
if (!cast_not_needed && !key_expr_type_not_null->equals(*const_type))
{

View File

@ -1,5 +1,7 @@
#include "PartitionedSink.h"
#include <Common/ArenaUtils.h>
#include <Functions/FunctionsConversion.h>
#include <Interpreters/Context.h>
@ -40,19 +42,18 @@ PartitionedSink::PartitionedSink(
}
SinkPtr PartitionedSink::getSinkForPartition(const String & partition_id)
SinkPtr PartitionedSink::getSinkForPartitionKey(StringRef partition_key)
{
auto it = sinks.find(partition_id);
if (it == sinks.end())
auto it = partition_id_to_sink.find(partition_key);
if (it == partition_id_to_sink.end())
{
auto sink = createSinkForPartition(partition_id);
std::tie(it, std::ignore) = sinks.emplace(partition_id, sink);
auto sink = createSinkForPartition(partition_key.toString());
std::tie(it, std::ignore) = partition_id_to_sink.emplace(partition_key, sink);
}
return it->second;
}
void PartitionedSink::consume(Chunk chunk)
{
const auto & columns = chunk.getColumns();
@ -61,45 +62,59 @@ void PartitionedSink::consume(Chunk chunk)
block_with_partition_by_expr.setColumns(columns);
partition_by_expr->execute(block_with_partition_by_expr);
const auto * column = block_with_partition_by_expr.getByName(partition_by_column_name).column.get();
const auto * partition_by_result_column = block_with_partition_by_expr.getByName(partition_by_column_name).column.get();
std::unordered_map<String, size_t> sub_chunks_indices;
IColumn::Selector selector;
for (size_t row = 0; row < chunk.getNumRows(); ++row)
size_t chunk_rows = chunk.getNumRows();
chunk_row_index_to_partition_index.resize(chunk_rows);
partition_id_to_chunk_index.clear();
for (size_t row = 0; row < chunk_rows; ++row)
{
auto value = column->getDataAt(row);
auto [it, inserted] = sub_chunks_indices.emplace(value, sub_chunks_indices.size());
selector.push_back(it->second);
auto partition_key = partition_by_result_column->getDataAt(row);
auto [it, inserted] = partition_id_to_chunk_index.insert(makePairNoInit(partition_key, partition_id_to_chunk_index.size()));
if (inserted)
it->value.first = copyStringInArena(partition_keys_arena, partition_key);
chunk_row_index_to_partition_index[row] = it->getMapped();
}
Chunks sub_chunks;
sub_chunks.reserve(sub_chunks_indices.size());
for (size_t column_index = 0; column_index < columns.size(); ++column_index)
size_t columns_size = columns.size();
size_t partitions_size = partition_id_to_chunk_index.size();
Chunks partition_index_to_chunk;
partition_index_to_chunk.reserve(partitions_size);
for (size_t column_index = 0; column_index < columns_size; ++column_index)
{
MutableColumns column_sub_chunks = columns[column_index]->scatter(sub_chunks_indices.size(), selector);
if (column_index == 0) /// Set sizes for sub-chunks.
MutableColumns partition_index_to_column_split = columns[column_index]->scatter(partitions_size, chunk_row_index_to_partition_index);
/// Add chunks into partition_index_to_chunk with sizes of result columns
if (column_index == 0)
{
for (const auto & column_sub_chunk : column_sub_chunks)
for (const auto & partition_column : partition_index_to_column_split)
{
sub_chunks.emplace_back(Columns(), column_sub_chunk->size());
partition_index_to_chunk.emplace_back(Columns(), partition_column->size());
}
}
for (size_t sub_chunk_index = 0; sub_chunk_index < column_sub_chunks.size(); ++sub_chunk_index)
for (size_t partition_index = 0; partition_index < partitions_size; ++partition_index)
{
sub_chunks[sub_chunk_index].addColumn(std::move(column_sub_chunks[sub_chunk_index]));
partition_index_to_chunk[partition_index].addColumn(std::move(partition_index_to_column_split[partition_index]));
}
}
for (const auto & [partition_id, sub_chunk_index] : sub_chunks_indices)
for (const auto & [partition_key, partition_index] : partition_id_to_chunk_index)
{
getSinkForPartition(partition_id)->consume(std::move(sub_chunks[sub_chunk_index]));
auto sink = getSinkForPartitionKey(partition_key);
sink->consume(std::move(partition_index_to_chunk[partition_index]));
}
}
void PartitionedSink::onFinish()
{
for (auto & [partition_id, sink] : sinks)
for (auto & [_, sink] : partition_id_to_sink)
{
sink->onFinish();
}

View File

@ -1,5 +1,8 @@
#pragma once
#include <Common/HashTable/HashMap.h>
#include <Common/Arena.h>
#include <absl/container/flat_hash_map.h>
#include <Processors/Sinks/SinkToStorage.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/Context_fwd.h>
@ -34,9 +37,13 @@ private:
ExpressionActionsPtr partition_by_expr;
String partition_by_column_name;
std::unordered_map<String, SinkPtr> sinks;
absl::flat_hash_map<StringRef, SinkPtr> partition_id_to_sink;
HashMapWithSavedHash<StringRef, size_t> partition_id_to_chunk_index;
IColumn::Selector chunk_row_index_to_partition_index;
Arena partition_keys_arena;
SinkPtr getSinkForPartitionKey(StringRef partition_key);
SinkPtr getSinkForPartition(const String & partition_id);
};
}

View File

@ -66,6 +66,7 @@ StoragePtr StorageFactory::get(
bool has_force_restore_data_flag) const
{
String name, comment;
ASTStorage * storage_def = query.storage;
bool has_engine_args = false;
@ -107,7 +108,10 @@ StoragePtr StorageFactory::get(
}
else
{
if (!storage_def)
if (!query.storage)
throw Exception("Incorrect CREATE query: storage required", ErrorCodes::INCORRECT_QUERY);
if (!storage_def->engine)
throw Exception("Incorrect CREATE query: ENGINE required", ErrorCodes::ENGINE_REQUIRED);
const ASTFunction & engine_def = *storage_def->engine;

View File

@ -199,18 +199,27 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user
fs_table_path = user_files_absolute_path / fs_table_path;
Strings paths;
/// Do not use fs::canonical or fs::weakly_canonical.
/// Otherwise it will not allow to work with symlinks in `user_files_path` directory.
String path = fs::absolute(fs_table_path).lexically_normal(); /// Normalize path.
if (path.find_first_of("*?{") == std::string::npos)
if (path.find(PartitionedSink::PARTITION_ID_WILDCARD) != std::string::npos)
{
paths.push_back(path);
}
else if (path.find_first_of("*?{") == std::string::npos)
{
std::error_code error;
if (fs::exists(path))
total_bytes_to_read += fs::file_size(path, error);
paths.push_back(path);
}
else
{
paths = listFilesWithRegexpMatching("/", path, total_bytes_to_read);
}
for (const auto & cur_path : paths)
checkCreationIsAllowed(context, user_files_absolute_path, cur_path);
@ -313,7 +322,11 @@ StorageFile::StorageFile(const std::string & table_path_, const std::string & us
is_db_table = false;
paths = getPathsList(table_path_, user_files_path, args.getContext(), total_bytes_to_read);
is_path_with_globs = paths.size() > 1;
path_for_partitioned_write = table_path_;
if (!paths.empty())
path_for_partitioned_write = paths.front();
else
path_for_partitioned_write = table_path_;
setStorageMetadata(args);
}
@ -853,6 +866,7 @@ SinkToStoragePtr StorageFile::write(
{
if (path_for_partitioned_write.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty path for partitioned write");
fs::create_directories(fs::path(path_for_partitioned_write).parent_path());
return std::make_shared<PartitionedStorageFileSink>(
@ -879,9 +893,10 @@ SinkToStoragePtr StorageFile::write(
path = paths.back();
fs::create_directories(fs::path(path).parent_path());
std::error_code error_code;
if (!context->getSettingsRef().engine_file_truncate_on_insert && !is_path_with_globs
&& !FormatFactory::instance().checkIfFormatSupportAppend(format_name, context, format_settings) && fs::exists(paths.back())
&& fs::file_size(paths.back()) != 0)
&& fs::file_size(paths.back(), error_code) != 0 && !error_code)
{
if (context->getSettingsRef().engine_file_allow_create_multiple_files)
{

View File

@ -732,8 +732,21 @@ void StorageLog::rename(const String & new_path_to_table_data, const StorageID &
renameInMemory(new_table_id);
}
void StorageLog::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &)
static std::chrono::seconds getLockTimeout(ContextPtr context)
{
const Settings & settings = context->getSettingsRef();
Int64 lock_timeout = settings.lock_acquire_timeout.totalSeconds();
if (settings.max_execution_time.totalSeconds() != 0 && settings.max_execution_time.totalSeconds() < lock_timeout)
lock_timeout = settings.max_execution_time.totalSeconds();
return std::chrono::seconds{lock_timeout};
}
void StorageLog::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr context, TableExclusiveLockHolder &)
{
WriteLock lock{rwlock, getLockTimeout(context)};
if (!lock)
throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED);
disk->clearDirectory(table_path);
for (auto & data_file : data_files)
@ -750,16 +763,6 @@ void StorageLog::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr
}
static std::chrono::seconds getLockTimeout(ContextPtr context)
{
const Settings & settings = context->getSettingsRef();
Int64 lock_timeout = settings.lock_acquire_timeout.totalSeconds();
if (settings.max_execution_time.totalSeconds() != 0 && settings.max_execution_time.totalSeconds() < lock_timeout)
lock_timeout = settings.max_execution_time.totalSeconds();
return std::chrono::seconds{lock_timeout};
}
Pipe StorageLog::read(
const Names & column_names,
const StorageMetadataPtr & metadata_snapshot,

View File

@ -302,40 +302,42 @@ String StorageS3Source::getName() const
Chunk StorageS3Source::generate()
{
if (!reader)
return {};
Chunk chunk;
if (reader->pull(chunk))
while (true)
{
UInt64 num_rows = chunk.getNumRows();
if (!reader || isCancelled())
break;
if (with_path_column)
chunk.addColumn(DataTypeLowCardinality{std::make_shared<DataTypeString>()}
.createColumnConst(num_rows, file_path)
->convertToFullColumnIfConst());
if (with_file_column)
Chunk chunk;
if (reader->pull(chunk))
{
size_t last_slash_pos = file_path.find_last_of('/');
chunk.addColumn(DataTypeLowCardinality{std::make_shared<DataTypeString>()}
.createColumnConst(num_rows, file_path.substr(last_slash_pos + 1))
->convertToFullColumnIfConst());
UInt64 num_rows = chunk.getNumRows();
if (with_path_column)
chunk.addColumn(DataTypeLowCardinality{std::make_shared<DataTypeString>()}
.createColumnConst(num_rows, file_path)
->convertToFullColumnIfConst());
if (with_file_column)
{
size_t last_slash_pos = file_path.find_last_of('/');
chunk.addColumn(DataTypeLowCardinality{std::make_shared<DataTypeString>()}
.createColumnConst(num_rows, file_path.substr(last_slash_pos + 1))
->convertToFullColumnIfConst());
}
return chunk;
}
return chunk;
{
std::lock_guard lock(reader_mutex);
reader.reset();
pipeline.reset();
read_buf.reset();
if (!initialize())
break;
}
}
{
std::lock_guard lock(reader_mutex);
reader.reset();
pipeline.reset();
read_buf.reset();
if (!initialize())
return {};
}
return generate();
return {};
}
static bool checkIfObjectExists(const std::shared_ptr<Aws::S3::S3Client> & client, const String & bucket, const String & key)

View File

@ -82,7 +82,7 @@ Pipe StorageS3Cluster::read(
{
StorageS3::updateClientAndAuthSettings(context, client_auth);
auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettings());
auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef());
StorageS3::updateClientAndAuthSettings(context, client_auth);
auto iterator = std::make_shared<StorageS3Source::DisclosedGlobIterator>(*client_auth.client, client_auth.uri);

View File

@ -346,9 +346,10 @@ StorageURLSink::StorageURLSink(
: SinkToStorage(sample_block)
{
std::string content_type = FormatFactory::instance().getContentType(format, context, format_settings);
std::string content_encoding = toContentEncodingName(compression_method);
write_buf = wrapWriteBufferWithCompressionMethod(
std::make_unique<WriteBufferFromHTTP>(Poco::URI(uri), http_method, content_type, timeouts),
std::make_unique<WriteBufferFromHTTP>(Poco::URI(uri), http_method, content_type, content_encoding, timeouts),
compression_method, 3);
writer = FormatFactory::instance().getOutputFormat(format, *write_buf, sample_block,
context, {} /* write callback */, format_settings);

View File

@ -14,7 +14,7 @@ class StorageSystemAsynchronousInserts final :
public IStorageSystemOneBlock<StorageSystemAsynchronousInserts>
{
public:
std::string getName() const override { return "AsynchronousInserts"; }
std::string getName() const override { return "SystemAsynchronousInserts"; }
static NamesAndTypesList getNamesAndTypes();
protected:

View File

@ -66,7 +66,7 @@ StoragesInfo::getParts(MergeTreeData::DataPartStateVector & state, bool has_stat
}
StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context)
: query_id(context->getCurrentQueryId()), settings(context->getSettings())
: query_id(context->getCurrentQueryId()), settings(context->getSettingsRef())
{
/// Will apply WHERE to subset of columns and then add more columns.
/// This is kind of complicated, but we use WHERE to do less work.

View File

@ -132,11 +132,11 @@ void registerStorages()
registerStorageKafka(factory);
#endif
#if USE_FILELOG
#if USE_FILELOG
registerStorageFileLog(factory);
#endif
#endif
#if USE_AMQPCPP
#if USE_AMQPCPP
registerStorageRabbitMQ(factory);
#endif

View File

@ -205,7 +205,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr
if (name != "clusterAllReplicas")
cluster = context->getCluster(cluster_name_expanded);
else
cluster = context->getCluster(cluster_name_expanded)->getClusterWithReplicasAsShards(context->getSettings());
cluster = context->getCluster(cluster_name_expanded)->getClusterWithReplicasAsShards(context->getSettingsRef());
}
else
{
@ -241,7 +241,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr
bool treat_local_as_remote = false;
bool treat_local_port_as_remote = context->getApplicationType() == Context::ApplicationType::LOCAL;
cluster = std::make_shared<Cluster>(
context->getSettings(),
context->getSettingsRef(),
names,
configuration.username,
configuration.password,

View File

@ -98,6 +98,7 @@ if __name__ == "__main__":
'server.log': os.path.join(workspace_path, 'server.log'),
'fuzzer.log': os.path.join(workspace_path, 'fuzzer.log'),
'report.html': os.path.join(workspace_path, 'report.html'),
'core.gz': os.path.join(workspace_path, 'core.gz'),
}
s3_helper = S3Helper('https://s3.amazonaws.com')

View File

@ -3,10 +3,11 @@ import argparse
import json
import logging
import os
import platform
import shutil
import subprocess
import time
from typing import List, Optional, Set, Tuple, Union
from typing import Dict, List, Optional, Set, Tuple, Union
from github import Github
@ -23,24 +24,32 @@ NAME = "Push to Dockerhub (actions)"
TEMP_PATH = os.path.join(RUNNER_TEMP, "docker_images_check")
ImagesDict = Dict[str, dict]
class DockerImage:
def __init__(
self,
path: str,
repo: str,
only_amd64: bool,
parent: Optional["DockerImage"] = None,
gh_repo_path: str = GITHUB_WORKSPACE,
):
self.path = path
self.full_path = os.path.join(gh_repo_path, path)
self.repo = repo
self.only_amd64 = only_amd64
self.parent = parent
self.built = False
def __eq__(self, other) -> bool: # type: ignore
"""Is used to check if DockerImage is in a set or not"""
return self.path == other.path and self.repo == self.repo
return (
self.path == other.path
and self.repo == self.repo
and self.only_amd64 == other.only_amd64
)
def __lt__(self, other) -> bool:
if not isinstance(other, DockerImage):
@ -65,9 +74,8 @@ class DockerImage:
return f"DockerImage(path={self.path},repo={self.repo},parent={self.parent})"
def get_changed_docker_images(
pr_info: PRInfo, repo_path: str, image_file_path: str
) -> Set[DockerImage]:
def get_images_dict(repo_path: str, image_file_path: str) -> ImagesDict:
"""Return images suppose to build on the current architecture host"""
images_dict = {}
path_to_images_file = os.path.join(repo_path, image_file_path)
if os.path.exists(path_to_images_file):
@ -78,6 +86,13 @@ def get_changed_docker_images(
"Image file %s doesnt exists in repo %s", image_file_path, repo_path
)
return images_dict
def get_changed_docker_images(
pr_info: PRInfo, images_dict: ImagesDict
) -> Set[DockerImage]:
if not images_dict:
return set()
@ -96,6 +111,7 @@ def get_changed_docker_images(
for f in files_changed:
if f.startswith(dockerfile_dir):
name = image_description["name"]
only_amd64 = image_description.get("only_amd64", False)
logging.info(
"Found changed file '%s' which affects "
"docker image '%s' with path '%s'",
@ -103,7 +119,7 @@ def get_changed_docker_images(
name,
dockerfile_dir,
)
changed_images.append(DockerImage(dockerfile_dir, name))
changed_images.append(DockerImage(dockerfile_dir, name, only_amd64))
break
# The order is important: dependents should go later than bases, so that
@ -118,9 +134,9 @@ def get_changed_docker_images(
dependent,
image,
)
changed_images.append(
DockerImage(dependent, images_dict[dependent]["name"], image)
)
name = images_dict[dependent]["name"]
only_amd64 = images_dict[dependent].get("only_amd64", False)
changed_images.append(DockerImage(dependent, name, only_amd64, image))
index += 1
if index > 5 * len(images_dict):
# Sanity check to prevent infinite loop.
@ -161,12 +177,43 @@ def gen_versions(
return versions, result_version
def build_and_push_dummy_image(
image: DockerImage,
version_string: str,
push: bool,
) -> Tuple[bool, str]:
dummy_source = "ubuntu:20.04"
logging.info("Building docker image %s as %s", image.repo, dummy_source)
build_log = os.path.join(
TEMP_PATH, f"build_and_push_log_{image.repo.replace('/', '_')}_{version_string}"
)
with open(build_log, "wb") as bl:
cmd = (
f"docker pull {dummy_source}; "
f"docker tag {dummy_source} {image.repo}:{version_string}; "
)
if push:
cmd += f"docker push {image.repo}:{version_string}"
logging.info("Docker command to run: %s", cmd)
with subprocess.Popen(cmd, shell=True, stderr=bl, stdout=bl) as proc:
retcode = proc.wait()
if retcode != 0:
return False, build_log
logging.info("Processing of %s successfully finished", image.repo)
return True, build_log
def build_and_push_one_image(
image: DockerImage,
version_string: str,
push: bool,
child: bool,
) -> Tuple[bool, str]:
if image.only_amd64 and platform.machine() not in ["amd64", "x86_64"]:
return build_and_push_dummy_image(image, version_string, push)
logging.info(
"Building docker image %s with version %s from path %s",
image.repo,
@ -290,10 +337,15 @@ def parse_args() -> argparse.Namespace:
default="clickhouse",
help="docker hub repository prefix",
)
parser.add_argument(
"--all",
action="store_true",
help="rebuild all images",
)
parser.add_argument(
"--image-path",
type=str,
action="append",
nargs="*",
help="list of image paths to build instead of using pr_info + diff URL, "
"e.g. 'docker/packager/binary'",
)
@ -336,15 +388,18 @@ def main():
shutil.rmtree(TEMP_PATH)
os.makedirs(TEMP_PATH)
if args.image_path:
images_dict = get_images_dict(GITHUB_WORKSPACE, "docker/images.json")
if args.all:
pr_info = PRInfo()
pr_info.changed_files = set(images_dict.keys())
elif args.image_path:
pr_info = PRInfo()
pr_info.changed_files = set(i for i in args.image_path)
else:
pr_info = PRInfo(need_changed_files=True)
changed_images = get_changed_docker_images(
pr_info, GITHUB_WORKSPACE, "docker/images.json"
)
changed_images = get_changed_docker_images(pr_info, images_dict)
logging.info("Has changed images %s", ", ".join([im.path for im in changed_images]))
image_versions, result_version = gen_versions(pr_info, args.suffix)

View File

@ -57,7 +57,7 @@ def parse_args() -> argparse.Namespace:
args = parser.parse_args()
if len(args.suffixes) < 2:
raise parser.error("more than two --suffix should be given")
parser.error("more than two --suffix should be given")
return args
@ -81,6 +81,7 @@ def strip_suffix(suffix: str, images: Images) -> Images:
def check_sources(to_merge: Dict[str, Images]) -> Images:
"""get a dict {arch1: Images, arch2: Images}"""
result = {} # type: Images
first_suffix = ""
for suffix, images in to_merge.items():

View File

@ -23,54 +23,69 @@ class TestDockerImageCheck(unittest.TestCase):
"docker/docs/builder",
}
images = sorted(
list(di.get_changed_docker_images(pr_info, "/", self.docker_images_path))
list(
di.get_changed_docker_images(
pr_info, di.get_images_dict("/", self.docker_images_path)
)
)
)
self.maxDiff = None
expected = sorted(
[
di.DockerImage("docker/test/base", "clickhouse/test-base"),
di.DockerImage("docker/docs/builder", "clickhouse/docs-builder"),
di.DockerImage("docker/test/base", "clickhouse/test-base", False),
di.DockerImage("docker/docs/builder", "clickhouse/docs-builder", True),
di.DockerImage(
"docker/test/stateless",
"clickhouse/stateless-test",
False,
"clickhouse/test-base",
),
di.DockerImage(
"docker/test/integration/base",
"clickhouse/integration-test",
False,
"clickhouse/test-base",
),
di.DockerImage(
"docker/test/fuzzer", "clickhouse/fuzzer", "clickhouse/test-base"
"docker/test/fuzzer",
"clickhouse/fuzzer",
False,
"clickhouse/test-base",
),
di.DockerImage(
"docker/test/keeper-jepsen",
"clickhouse/keeper-jepsen-test",
False,
"clickhouse/test-base",
),
di.DockerImage(
"docker/docs/check",
"clickhouse/docs-check",
False,
"clickhouse/docs-builder",
),
di.DockerImage(
"docker/docs/release",
"clickhouse/docs-release",
False,
"clickhouse/docs-builder",
),
di.DockerImage(
"docker/test/stateful",
"clickhouse/stateful-test",
False,
"clickhouse/stateless-test",
),
di.DockerImage(
"docker/test/unit",
"clickhouse/unit-test",
False,
"clickhouse/stateless-test",
),
di.DockerImage(
"docker/test/stress",
"clickhouse/stress-test",
False,
"clickhouse/stateful-test",
),
]
@ -92,13 +107,15 @@ class TestDockerImageCheck(unittest.TestCase):
@patch("builtins.open")
@patch("subprocess.Popen")
def test_build_and_push_one_image(self, mock_popen, mock_open):
@patch("platform.machine")
def test_build_and_push_one_image(self, mock_machine, mock_popen, mock_open):
mock_popen.return_value.__enter__.return_value.wait.return_value = 0
image = di.DockerImage("path", "name", gh_repo_path="")
image = di.DockerImage("path", "name", False, gh_repo_path="")
result, _ = di.build_and_push_one_image(image, "version", True, True)
mock_open.assert_called_once()
mock_popen.assert_called_once()
mock_machine.assert_not_called()
self.assertIn(
"docker buildx build --builder default --build-arg FROM_TAG=version "
"--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version --cache-from "
@ -106,11 +123,15 @@ class TestDockerImageCheck(unittest.TestCase):
mock_popen.call_args.args,
)
self.assertTrue(result)
mock_open.reset_mock()
mock_popen.reset_mock()
mock_machine.reset_mock()
mock_open.reset()
mock_popen.reset()
mock_popen.return_value.__enter__.return_value.wait.return_value = 0
result, _ = di.build_and_push_one_image(image, "version2", False, True)
mock_open.assert_called_once()
mock_popen.assert_called_once()
mock_machine.assert_not_called()
self.assertIn(
"docker buildx build --builder default --build-arg FROM_TAG=version2 "
"--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version2 --cache-from "
@ -119,8 +140,14 @@ class TestDockerImageCheck(unittest.TestCase):
)
self.assertTrue(result)
mock_open.reset_mock()
mock_popen.reset_mock()
mock_machine.reset_mock()
mock_popen.return_value.__enter__.return_value.wait.return_value = 1
result, _ = di.build_and_push_one_image(image, "version2", False, False)
mock_open.assert_called_once()
mock_popen.assert_called_once()
mock_machine.assert_not_called()
self.assertIn(
"docker buildx build --builder default "
"--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version2 --cache-from "
@ -129,13 +156,37 @@ class TestDockerImageCheck(unittest.TestCase):
)
self.assertFalse(result)
mock_open.reset_mock()
mock_popen.reset_mock()
mock_machine.reset_mock()
only_amd64_image = di.DockerImage("path", "name", True)
mock_popen.return_value.__enter__.return_value.wait.return_value = 0
result, _ = di.build_and_push_one_image(only_amd64_image, "version", True, True)
mock_open.assert_called_once()
mock_popen.assert_called_once()
mock_machine.assert_called_once()
self.assertIn(
"docker pull ubuntu:20.04; docker tag ubuntu:20.04 name:version; "
"docker push name:version",
mock_popen.call_args.args,
)
self.assertTrue(result)
result, _ = di.build_and_push_one_image(
only_amd64_image, "version", False, True
)
self.assertIn(
"docker pull ubuntu:20.04; docker tag ubuntu:20.04 name:version; ",
mock_popen.call_args.args,
)
@patch("docker_images_check.build_and_push_one_image")
def test_process_image_with_parents(self, mock_build):
mock_build.side_effect = lambda w, x, y, z: (True, f"{w.repo}_{x}.log")
im1 = di.DockerImage("path1", "repo1")
im2 = di.DockerImage("path2", "repo2", im1)
im3 = di.DockerImage("path3", "repo3", im2)
im4 = di.DockerImage("path4", "repo4", im1)
im1 = di.DockerImage("path1", "repo1", False)
im2 = di.DockerImage("path2", "repo2", False, im1)
im3 = di.DockerImage("path3", "repo3", False, im2)
im4 = di.DockerImage("path4", "repo4", False, im1)
# We use list to have determined order of image builgings
images = [im4, im1, im3, im2, im1]
results = [

Some files were not shown because too many files have changed in this diff Show More