Merge branch 'master' into database_atomic_improvements

This commit is contained in:
Alexander Tokmakov 2020-07-26 13:40:05 +03:00
commit 6bfe88e83f
81 changed files with 1139 additions and 476 deletions

View File

@ -48,8 +48,9 @@ then
mkdir /output/ch
git -C /output/ch init --bare
git -C /output/ch remote add origin /build
git -C /output/ch fetch --no-tags --depth 50 origin HEAD
git -C /output/ch reset --soft FETCH_HEAD
git -C /output/ch fetch --no-tags --depth 50 origin HEAD:pr
git -C /output/ch fetch --no-tags --depth 50 origin master:master
git -C /output/ch reset --soft pr
git -C /output/ch log -5
fi

View File

@ -72,7 +72,7 @@ function watchdog
function fuzz
{
./clickhouse-server --config-file db/config.xml -- --path db 2>&1 | tail -100000 > server.log &
./clickhouse-server --config-file db/config.xml -- --path db 2>&1 | tail -10000 > server.log &
server_pid=$!
kill -0 $server_pid
while ! ./clickhouse-client --query "select 1" && kill -0 $server_pid ; do echo . ; sleep 1 ; done
@ -83,7 +83,7 @@ function fuzz
fuzzer_exit_code=0
./clickhouse-client --query-fuzzer-runs=1000 \
< <(for f in $(ls ch/tests/queries/0_stateless/*.sql | sort -R); do cat "$f"; echo ';'; done) \
> >(tail -100000 > fuzzer.log) \
> >(tail -10000 > fuzzer.log) \
2>&1 \
|| fuzzer_exit_code=$?
@ -160,7 +160,7 @@ case "$stage" in
echo "success" > status.txt
else
echo "failure" > status.txt
if ! grep -a "received signal \|Logical error" server.log > description.txt
if ! grep -a "Received signal \|Logical error" server.log > description.txt
then
echo "Fuzzer exit code $fuzzer_exit_code. See the logs" > description.txt
fi

View File

@ -317,9 +317,11 @@ create view right_query_log as select *
'$(cat "right-query-log.tsv.columns")');
create view query_logs as
select *, 0 version from left_query_log
select 0 version, query_id, ProfileEvents.Names, ProfileEvents.Values,
query_duration_ms from left_query_log
union all
select *, 1 version from right_query_log
select 1 version, query_id, ProfileEvents.Names, ProfileEvents.Values,
query_duration_ms from right_query_log
;
-- This is a single source of truth on all metrics we have for query runs. The

View File

@ -6,7 +6,6 @@ trap 'kill $(jobs -pr) ||:' EXIT
mkdir db0 ||:
mkdir left ||:
mkdir right ||:
left_pr=$1
left_sha=$2
@ -24,7 +23,7 @@ dataset_paths["values"]="https://clickhouse-datasets.s3.yandex.net/values_with_e
function download
{
# Historically there were various path for the performance test package.
# Historically there were various paths for the performance test package.
# Test all of them.
for path in "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/"{,clickhouse_build_check/}"performance/performance.tgz"
do
@ -34,22 +33,13 @@ function download
fi
done
for path in "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/"{,clickhouse_build_check/}"performance/performance.tgz"
do
if curl --fail --head "$path"
then
right_path="$path"
fi
done
# might have the same version on left and right
if ! [ "$left_path" = "$right_path" ]
# Might have the same version on left and right (for testing).
if ! [ "$left_sha" = "$right_sha" ]
then
wget -nv -nd -c "$left_path" -O- | tar -C left --strip-components=1 -zxv &
wget -nv -nd -c "$right_path" -O- | tar -C right --strip-components=1 -zxv &
else
mkdir right ||:
wget -nv -nd -c "$left_path" -O- | tar -C left --strip-components=1 -zxv && cp -a left/* right &
mkdir left ||:
cp -a right/* left &
fi
for dataset_name in $datasets

View File

@ -1,38 +1,23 @@
#!/bin/bash
set -ex
chown nobody workspace output
chgrp nogroup workspace output
chmod 777 workspace output
cd workspace
# Fetch the repository to find and describe the compared revisions.
rm -rf ch ||:
time git clone --depth 50 --bare https://github.com/ClickHouse/ClickHouse ch
git -C ch fetch origin "$SHA_TO_TEST"
# Use the packaged repository to find the revision we will compare to.
function find_reference_sha
{
# If not master, try to fetch pull/.../{head,merge}
if [ "$PR_TO_TEST" != "0" ]
then
git -C ch fetch origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*"
fi
# Go back from the revision to be tested, trying to find the closest published
# testing release.
start_ref="$SHA_TO_TEST"~
# If we are testing a PR, and it merges with master successfully, we are
# building and testing not the nominal last SHA specified by pull/.../head
# and SHA_TO_TEST, but a revision that is merged with recent master, given
# by pull/.../merge ref.
# Master is the first parent of the pull/.../merge.
if git -C ch rev-parse "pull/$PR_TO_TEST/merge"
# testing release. The PR branch may be either pull/*/head which is the
# author's branch, or pull/*/merge, which is head merged with some master
# automatically by Github. We will use a merge base with master as a reference
# for tesing (or some older commit). A caveat is that if we're testing the
# master, the merge base is the tested commit itself, so we have to step back
# once.
start_ref=$(git -C ch merge-base origin/master pr)
if [ "PR_TO_TEST" == "0" ]
then
start_ref="pull/$PR_TO_TEST/merge~"
start_ref=$start_ref~
fi
# Loop back to find a commit that actually has a published perf test package.
while :
do
# FIXME the original idea was to compare to a closest testing tag, which
@ -48,10 +33,10 @@ function find_reference_sha
# dereference the tag to get the commit it points to, hence the '~0' thing.
REF_SHA=$(git -C ch rev-parse "$ref_tag~0")
# FIXME sometimes we have testing tags on commits without published builds --
# normally these are documentation commits. Loop to skip them.
# Historically there were various path for the performance test package.
# Test all of them.
# FIXME sometimes we have testing tags on commits without published builds.
# Normally these are documentation commits. Loop to skip them.
# Historically there were various path for the performance test package,
# test all of them.
unset found
for path in "https://clickhouse-builds.s3.yandex.net/0/$REF_SHA/"{,clickhouse_build_check/}"performance/performance.tgz"
do
@ -69,6 +54,24 @@ function find_reference_sha
REF_PR=0
}
chown nobody workspace output
chgrp nogroup workspace output
chmod 777 workspace output
cd workspace
# Download the package for the version we are going to test
for path in "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/"{,clickhouse_build_check/}"performance/performance.tgz"
do
if curl --fail --head "$path"
then
right_path="$path"
fi
done
mkdir right
wget -nv -nd -c "$right_path" -O- | tar -C right --strip-components=1 -zxv
# Find reference revision if not specified explicitly
if [ "$REF_SHA" == "" ]; then find_reference_sha; fi
if [ "$REF_SHA" == "" ]; then echo Reference SHA is not specified ; exit 1 ; fi

View File

@ -0,0 +1,24 @@
# Statement name (for example, SHOW USER)
Brief description of what the statement does.
Syntax:
```sql
Syntax of the statement.
```
## Other necessary sections of the description (Optional)
Examples of descriptions with a complicated structure:
- https://clickhouse.tech/docs/en/sql-reference/statements/grant/
- https://clickhouse.tech/docs/en/sql-reference/statements/revoke/
- https://clickhouse.tech/docs/en/sql-reference/statements/select/join/
## See Also (Optional)
Links to related topics as a list.
- [link](#)

View File

@ -1,3 +1,36 @@
## system.asynchronous\_metric\_log {#system-tables-async-log}
Contains the historical values for `system.asynchronous_log` (see [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics))
Contains the historical values for `system.asynchronous_metrics`, which are saved once per minute. This feature is enabled by default.
Columns:
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time.
- `name` ([String](../../sql-reference/data-types/string.md)) — Metric name.
- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value.
**Example**
``` sql
SELECT * FROM system.asynchronous_metric_log LIMIT 10
```
``` text
┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬────value─┐
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.arenas.all.pmuzzy │ 0 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.arenas.all.pdirty │ 4214 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.background_thread.run_intervals │ 0 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.background_thread.num_runs │ 0 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.retained │ 17657856 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.mapped │ 71471104 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.resident │ 61538304 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.metadata │ 6199264 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.allocated │ 38074336 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.epoch │ 2 │
└────────────┴─────────────────────┴──────────────────────────────────────────┴──────────┘
```
**See Also**
- [system.asynchronous\_metrics](../system-tables/asynchronous_metrics.md) — Contains metrics that are calculated periodically in the background.
- [system.metric_log](../operations/system-tables/metric_log) — Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk.

View File

@ -1,4 +1,4 @@
# system.asynchronous\_metrics {#system_tables-asynchronous_metrics}
# system.asynchronous_metrics {#system_tables-asynchronous_metrics}
Contains metrics that are calculated periodically in the background. For example, the amount of RAM in use.

View File

@ -49,7 +49,7 @@ CurrentMetric_ReplicatedChecks: 0
**See also**
- [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred.
- [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics.
- [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md) — Contains periodically calculated metrics.
- [system.events](../../operations/system-tables/events.md) — Contains a number of events that occurred.
- [system.metrics](../../operations/system-tables/metrics.md) — Contains instantly calculated metrics.
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.

View File

@ -23,7 +23,8 @@ For temporary data an unique temporary data directory is created by default. If
Basic usage:
``` bash
$ clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" -q "query"
$ clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" \
--query "query"
```
Arguments:
@ -46,7 +47,8 @@ Also there are arguments for each ClickHouse configuration variable which are mo
## Examples {#examples}
``` bash
$ echo -e "1,2\n3,4" | clickhouse-local -S "a Int64, b Int64" -if "CSV" -q "SELECT * FROM table"
$ echo -e "1,2\n3,4" | clickhouse-local --structure "a Int64, b Int64" \
--input-format "CSV" --query "SELECT * FROM table"
Read 2 rows, 32.00 B in 0.000 sec., 5182 rows/sec., 80.97 KiB/sec.
1 2
3 4
@ -55,16 +57,37 @@ Read 2 rows, 32.00 B in 0.000 sec., 5182 rows/sec., 80.97 KiB/sec.
Previous example is the same as:
``` bash
$ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin); SELECT a, b FROM table; DROP TABLE table"
$ echo -e "1,2\n3,4" | clickhouse-local --query "
CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin);
SELECT a, b FROM table;
DROP TABLE table"
Read 2 rows, 32.00 B in 0.000 sec., 4987 rows/sec., 77.93 KiB/sec.
1 2
3 4
```
You don't have to use `stdin` or `--file` argument, and can open any number of files using the [`file` table function](../../sql-reference/table-functions/file.md):
``` bash
$ echo 1 | tee 1.tsv
1
$ echo 2 | tee 2.tsv
2
$ clickhouse-local --query "
select * from file('1.tsv', TSV, 'a int') t1
cross join file('2.tsv', TSV, 'b int') t2"
1 2
```
Now lets output memory user for each Unix user:
``` bash
$ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' | clickhouse-local -S "user String, mem Float64" -q "SELECT user, round(sum(mem), 2) as memTotal FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty"
$ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \
| clickhouse-local --structure "user String, mem Float64" \
--query "SELECT user, round(sum(mem), 2) as memTotal
FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty"
```
``` text

View File

@ -503,3 +503,34 @@ Supported modifiers for Format:
| %% | a % sign | % |
[Original article](https://clickhouse.tech/docs/en/query_language/functions/date_time_functions/) <!--hide-->
## FROM_UNIXTIME
When there is only single argument of integer type, it act in the same way as `toDateTime` and return [DateTime](../../sql-reference/data-types/datetime.md).
type.
For example:
```sql
SELECT FROM_UNIXTIME(423543535)
```
```text
┌─FROM_UNIXTIME(423543535)─┐
│ 1983-06-04 10:58:55 │
└──────────────────────────┘
```
When there are two arguments, first is integer or DateTime, second is constant format string, it act in the same way as `formatDateTime` and return `String` type.
For example:
```sql
SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime
```
```text
┌─DateTime────────────┐
│ 2009-02-11 14:42:23 │
└─────────────────────┘
```

View File

@ -1351,6 +1351,44 @@ len: 30
- [randomPrintableASCII](../../sql-reference/functions/other-functions.md#randomascii)
## randomFixedString {#randomfixedstring}
Generates a binary string of the specified length filled with random bytes (including zero bytes).
**Syntax**
``` sql
randomFixedString(length);
```
**Parameters**
- `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md).
**Returned value(s)**
- String filled with random bytes.
Type: [FixedString](../../sql-reference/data-types/fixedstring.md).
**Example**
Query:
```sql
SELECT randomFixedString(13) as rnd, toTypeName(rnd)
```
Result:
```text
┌─rnd──────┬─toTypeName(randomFixedString(13))─┐
│ j▒h㋖HɨZ'▒ │ FixedString(13) │
└──────────┴───────────────────────────────────┘
```
## randomStringUTF8 {#randomstringutf8}
Generates a random string of a specified length. Result string contains valid UTF-8 code points. The value of code points may be outside of the range of assigned Unicode.

View File

@ -1,5 +1,6 @@
---
toc_priority: 42
toc_title: ATTACH
---
# ATTACH Statement {#attach}

View File

@ -1,5 +1,6 @@
---
toc_priority: 43
toc_title: CHECK
---
# CHECK TABLE Statement {#check-table}

View File

@ -1,5 +1,6 @@
---
toc_priority: 44
toc_title: DESCRIBE
---
# DESCRIBE TABLE Statement {#misc-describe-table}

View File

@ -1,5 +1,6 @@
---
toc_priority: 45
toc_title: DETACH
---
# DETACH Statement {#detach}
@ -11,6 +12,5 @@ DETACH TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster]
```
This does not delete the tables data or metadata. On the next server launch, the server will read the metadata and find out about the table again.
Similarly, a “detached” table can be re-attached using the `ATTACH` query (with the exception of system tables, which do not have metadata stored for them).
There is no `DETACH DATABASE` query.
Similarly, a “detached” table can be re-attached using the `ATTACH` query (with the exception of system tables, which do not have metadata stored for them).

View File

@ -1,5 +1,6 @@
---
toc_priority: 46
toc_title: DROP
---
# DROP Statements {#drop}

View File

@ -1,5 +1,6 @@
---
toc_priority: 47
toc_title: EXISTS
---
# EXISTS Statement {#exists-statement}

View File

@ -1,8 +1,9 @@
---
toc_priority: 48
toc_title: KILL
---
## KILL Statements {#kill-statements}
# KILL Statements {#kill-statements}
There are two kinds of kill statements: to kill a query and to kill a mutation

View File

@ -1,5 +1,6 @@
---
toc_priority: 49
toc_title: OPTIMIZE
---
# OPTIMIZE Statement {#misc_operations-optimize}

View File

@ -1,5 +1,6 @@
---
toc_priority: 50
toc_title: RENAME
---
# RENAME Statement {#misc_operations-rename}

View File

@ -1,5 +1,6 @@
---
toc_priority: 52
toc_title: SET ROLE
---
# SET ROLE Statement {#set-role-statement}

View File

@ -1,5 +1,6 @@
---
toc_priority: 51
toc_title: SET
---
# SET Statement {#query-set}

View File

@ -5,6 +5,8 @@ toc_title: SYSTEM
# SYSTEM Statements {#query-language-system}
The list of available `SYSTEM` statements:
- [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries)
- [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries)
- [RELOAD DICTIONARY](#query_language-system-reload-dictionary)
@ -36,7 +38,7 @@ toc_title: SYSTEM
- [RESTART REPLICA](#query_language-system-restart-replica)
- [RESTART REPLICAS](#query_language-system-restart-replicas)
## RELOAD EMBEDDED DICTIONARIES\] {#query_language-system-reload-emdedded-dictionaries}
## RELOAD EMBEDDED DICTIONARIES {#query_language-system-reload-emdedded-dictionaries}
Reload all [Internal dictionaries](../../sql-reference/dictionaries/internal-dicts.md).
By default, internal dictionaries are disabled.
@ -48,7 +50,7 @@ Reloads all dictionaries that have been successfully loaded before.
By default, dictionaries are loaded lazily (see [dictionaries\_lazy\_load](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load)), so instead of being loaded automatically at startup, they are initialized on first access through dictGet function or SELECT from tables with ENGINE = Dictionary. The `SYSTEM RELOAD DICTIONARIES` query reloads such dictionaries (LOADED).
Always returns `Ok.` regardless of the result of the dictionary update.
## RELOAD DICTIONARY Dictionary\_name {#query_language-system-reload-dictionary}
## RELOAD DICTIONARY {#query_language-system-reload-dictionary}
Completely reloads a dictionary `dictionary_name`, regardless of the state of the dictionary (LOADED / NOT\_LOADED / FAILED).
Always returns `Ok.` regardless of the result of updating the dictionary.

View File

@ -1,5 +1,6 @@
---
toc_priority: 53
toc_title: TRUNCATE
---
# TRUNCATE Statement {#truncate-statement}

View File

@ -1,5 +1,6 @@
---
toc_priority: 54
toc_title: USE
---
# USE Statement {#use}

View File

@ -46,6 +46,41 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10
- [system.events](#system_tables-events) — таблица с количеством произошедших событий.
- [system.metric\_log](#system_tables-metric_log) — таблица фиксирующая историю значений метрик из `system.metrics` и `system.events`.
## system.asynchronous\_metric\_log {#system-tables-async-log}
Содержит исторические значения метрик из таблицы `system.asynchronous_metrics`, которые сохраняются раз в минуту. По умолчанию включена.
Столбцы:
- `event_date` ([Date](../sql-reference/data-types/date.md)) — дата события.
- `event_time` ([DateTime](../sql-reference/data-types/datetime.md)) — время события.
- `name` ([String](../sql-reference/data-types/string.md)) — название метрики.
- `value` ([Float64](../sql-reference/data-types/float.md)) — значение метрики.
**Пример**
``` sql
SELECT * FROM system.asynchronous_metric_log LIMIT 10
```
``` text
┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬────value─┐
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.arenas.all.pmuzzy │ 0 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.arenas.all.pdirty │ 4214 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.background_thread.run_intervals │ 0 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.background_thread.num_runs │ 0 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.retained │ 17657856 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.mapped │ 71471104 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.resident │ 61538304 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.metadata │ 6199264 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.allocated │ 38074336 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.epoch │ 2 │
└────────────┴─────────────────────┴──────────────────────────────────────────┴──────────┘
```
**Смотрите также**
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous-metrics.md) — Содержит метрики, которые периодически вычисляются в фоновом режиме.
- [system.metric_log](#system_tables-metric_log) — таблица фиксирующая историю значений метрик из `system.metrics` и `system.events`.
## system.clusters {#system-clusters}
Содержит информацию о доступных в конфигурационном файле кластерах и серверах, которые в них входят.

View File

@ -1334,9 +1334,46 @@ len: 30
- [randomPrintableASCII](../../sql-reference/functions/other-functions.md#randomascii)
## randomFixedString {#randomfixedstring}
Генерирует бинарную строку заданной длины, заполненную случайными байтами, включая нулевые.
**Синтаксис**
``` sql
randomFixedString(length);
```
**Параметры**
- `length` — Длина строки в байтах. [UInt64](../../sql-reference/data-types/int-uint.md).
**Returned value(s)**
- Строка, заполненная случайными байтами.
Тип: [FixedString](../../sql-reference/data-types/fixedstring.md).
**Пример**
Запрос:
```sql
SELECT randomFixedString(13) as rnd, toTypeName(rnd)
```
Результат:
```text
┌─rnd──────┬─toTypeName(randomFixedString(13))─┐
│ j▒h㋖HɨZ'▒ │ FixedString(13) │
└──────────┴───────────────────────────────────┘
```
## randomStringUTF8 {#randomstringutf8}
Генерирует строку определенной длины со случайной строкой в кодировке UTF-8.
Генерирует строку заданной длины со случайными символами в кодировке UTF-8.
**Синтаксис**

View File

@ -35,4 +35,4 @@ soupsieve==2.0.1
termcolor==1.1.0
tornado==5.1.1
Unidecode==1.1.1
urllib3==1.25.9
urllib3==1.25.10

View File

@ -9,4 +9,4 @@ python-slugify==4.0.1
PyYAML==5.3.1
requests==2.24.0
text-unidecode==1.3
urllib3==1.25.9
urllib3==1.25.10

View File

@ -37,7 +37,7 @@ ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after]
使用指定的`name`, `type`, [`codec`](../../sql-reference/statements/create.md#codecs) 以及 `default_expr` (请参见 [Default expressions](../../sql-reference/statements/create.md#create-default-values)),往表中增加新的列。
如果sql中包含 `IF NOT EXISTS` 执行语句时如果列已经存在CH不会报错。如果指定`AFTER name_after`(表中另一个列的名称),则新的列会加在指定列的后面。否则,新的列将被添加到表的末尾。注意,不能新的列添加到表的开始位置, `name_after` 可以是执行该动作时已经在表中存在的任意列。
如果sql中包含 `IF NOT EXISTS` 执行语句时如果列已经存在CH不会报错。如果指定`AFTER name_after`(表中另一个列的名称),则新的列会加在指定列的后面。否则,新的列将被添加到表的末尾。注意,不能新的列添加到表的开始位置, `name_after` 可以是执行该动作时已经在表中存在的任意列。
添加列仅仅是改变原有表的结构不会对已有数据产生影响。执行完 `ALTER`后磁盘中也不会出现新的数据。如果查询表时列的数据为空那么CH会使用列的默认值来进行填充如果有默认表达式则使用这个或者用0或空字符串。当数据块完成合并(参见[MergeTree](../../engines/table-engines/mergetree-family/mergetree.md))后,磁盘中会出现该列的数据。
@ -166,7 +166,7 @@ MODIFY ORDER BY new_expression
该操作仅支持 [`MergeTree`](../../engines/table-engines/mergetree-family/mergetree.md) 系列表 (含 [replicated](../../engines/table-engines/mergetree-family/replication.md) 表)。它会将表的 [排序键](../../engines/table-engines/mergetree-family/mergetree.md)变成 `new_expression` (元组表达式)。主键仍保持不变。
该操作轻量级的,仅会改变元数据。
该操作轻量级的,仅会改变元数据。
### 跳过索引来更改数据 {#manipulations-with-data-skipping-indices}

View File

@ -323,7 +323,8 @@ struct Settings : public SettingsCollection<Settings>
M(SettingLogsLevel, send_logs_level, LogsLevel::fatal, "Send server text logs with specified minimum level to client. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \
M(SettingBool, enable_optimize_predicate_expression, 1, "If it is set to true, optimize predicates to subqueries.", 0) \
M(SettingBool, enable_optimize_predicate_expression_to_final_subquery, 1, "Allow push predicate to final subquery.", 0) \
\
M(SettingBool, allow_push_predicate_when_subquery_contains_with, 1, "Allows push predicate when subquery contains WITH clause", 0) \
\
M(SettingUInt64, low_cardinality_max_dictionary_size, 8192, "Maximum size (in rows) of shared global dictionary for LowCardinality type.", 0) \
M(SettingBool, low_cardinality_use_single_dictionary_for_part, false, "LowCardinality type serialization setting. If is true, than will use additional keys when global dictionary overflows. Otherwise, will create several shared dictionaries.", 0) \
M(SettingBool, decimal_check_overflow, true, "Check overflow of decimal arithmetic/comparison operations", 0) \

View File

@ -1,11 +1,108 @@
#include <DataStreams/ParallelParsingBlockInputStream.h>
#include <IO/ReadBuffer.h>
#include <Common/CurrentThread.h>
#include <Common/setThreadName.h>
#include <ext/scope_guard.h>
namespace DB
{
void ParallelParsingBlockInputStream::segmentatorThreadFunction()
ParallelParsingBlockInputStream::ParallelParsingBlockInputStream(const Params & params)
: header(params.input_creator_params.sample),
row_input_format_params(params.input_creator_params.row_input_format_params),
format_settings(params.input_creator_params.settings),
input_processor_creator(params.input_processor_creator),
min_chunk_bytes(params.min_chunk_bytes),
original_buffer(params.read_buffer),
// Subtract one thread that we use for segmentation and one for
// reading. After that, must have at least two threads left for
// parsing. See the assertion below.
pool(std::max(2, params.max_threads - 2)),
file_segmentation_engine(params.file_segmentation_engine)
{
// See comment above.
assert(params.max_threads >= 4);
// One unit for each thread, including segmentator and reader, plus a
// couple more units so that the segmentation thread doesn't spuriously
// bump into reader thread on wraparound.
processing_units.resize(params.max_threads + 2);
segmentator_thread = ThreadFromGlobalPool(
&ParallelParsingBlockInputStream::segmentatorThreadFunction, this, CurrentThread::getGroup());
}
ParallelParsingBlockInputStream::~ParallelParsingBlockInputStream()
{
finishAndWait();
}
void ParallelParsingBlockInputStream::cancel(bool kill)
{
/**
* Can be called multiple times, from different threads. Saturate the
* the kill flag with OR.
*/
if (kill)
is_killed = true;
is_cancelled = true;
/*
* The format parsers themselves are not being cancelled here, so we'll
* have to wait until they process the current block. Given that the
* chunk size is on the order of megabytes, this should't be too long.
* We can't call IInputFormat->cancel here, because the parser object is
* local to the parser thread, and we don't want to introduce any
* synchronization between parser threads and the other threads to get
* better performance. An ideal solution would be to add a callback to
* IInputFormat that checks whether it was cancelled.
*/
finishAndWait();
}
void ParallelParsingBlockInputStream::scheduleParserThreadForUnitWithNumber(size_t ticket_number)
{
pool.scheduleOrThrowOnError([this, ticket_number, group = CurrentThread::getGroup()]()
{
parserThreadFunction(group, ticket_number);
});
}
void ParallelParsingBlockInputStream::finishAndWait()
{
finished = true;
{
std::unique_lock<std::mutex> lock(mutex);
segmentator_condvar.notify_all();
reader_condvar.notify_all();
}
if (segmentator_thread.joinable())
segmentator_thread.join();
try
{
pool.wait();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
void ParallelParsingBlockInputStream::segmentatorThreadFunction(ThreadGroupStatusPtr thread_group)
{
SCOPE_EXIT(
if (thread_group)
CurrentThread::detachQueryIfNotDetached();
);
if (thread_group)
CurrentThread::attachTo(thread_group);
setThreadName("Segmentator");
try
{
while (!finished)
@ -49,12 +146,19 @@ void ParallelParsingBlockInputStream::segmentatorThreadFunction()
}
}
void ParallelParsingBlockInputStream::parserThreadFunction(size_t current_ticket_number)
void ParallelParsingBlockInputStream::parserThreadFunction(ThreadGroupStatusPtr thread_group, size_t current_ticket_number)
{
SCOPE_EXIT(
if (thread_group)
CurrentThread::detachQueryIfNotDetached();
);
if (thread_group)
CurrentThread::attachTo(thread_group);
setThreadName("ChunkParser");
try
{
setThreadName("ChunkParser");
const auto current_unit_number = current_ticket_number % processing_units.size();
auto & unit = processing_units[current_unit_number];

View File

@ -3,15 +3,14 @@
#include <DataStreams/IBlockInputStream.h>
#include <Formats/FormatFactory.h>
#include <Common/ThreadPool.h>
#include <Common/setThreadName.h>
#include <IO/BufferWithOwnMemory.h>
#include <IO/ReadBuffer.h>
#include <Processors/Formats/IRowInputFormat.h>
#include <Processors/Formats/InputStreamFromInputFormat.h>
namespace DB
{
class ReadBuffer;
/**
* ORDER-PRESERVING parallel parsing of data formats.
* It splits original data into chunks. Then each chunk is parsed by different thread.
@ -74,68 +73,16 @@ public:
size_t min_chunk_bytes;
};
explicit ParallelParsingBlockInputStream(const Params & params)
: header(params.input_creator_params.sample),
row_input_format_params(params.input_creator_params.row_input_format_params),
format_settings(params.input_creator_params.settings),
input_processor_creator(params.input_processor_creator),
min_chunk_bytes(params.min_chunk_bytes),
original_buffer(params.read_buffer),
// Subtract one thread that we use for segmentation and one for
// reading. After that, must have at least two threads left for
// parsing. See the assertion below.
pool(std::max(2, params.max_threads - 2)),
file_segmentation_engine(params.file_segmentation_engine)
{
// See comment above.
assert(params.max_threads >= 4);
// One unit for each thread, including segmentator and reader, plus a
// couple more units so that the segmentation thread doesn't spuriously
// bump into reader thread on wraparound.
processing_units.resize(params.max_threads + 2);
segmentator_thread = ThreadFromGlobalPool([this] { segmentatorThreadFunction(); });
}
explicit ParallelParsingBlockInputStream(const Params & params);
~ParallelParsingBlockInputStream() override;
String getName() const override { return "ParallelParsing"; }
Block getHeader() const override { return header; }
~ParallelParsingBlockInputStream() override
{
finishAndWait();
}
void cancel(bool kill) override
{
/**
* Can be called multiple times, from different threads. Saturate the
* the kill flag with OR.
*/
if (kill)
is_killed = true;
is_cancelled = true;
/*
* The format parsers themselves are not being cancelled here, so we'll
* have to wait until they process the current block. Given that the
* chunk size is on the order of megabytes, this should't be too long.
* We can't call IInputFormat->cancel here, because the parser object is
* local to the parser thread, and we don't want to introduce any
* synchronization between parser threads and the other threads to get
* better performance. An ideal solution would be to add a callback to
* IInputFormat that checks whether it was cancelled.
*/
finishAndWait();
}
Block getHeader() const override
{
return header;
}
void cancel(bool kill) override;
protected:
//Reader routine
// Reader routine
Block readImpl() override;
const BlockMissingValues & getMissingValues() const override
@ -212,36 +159,11 @@ private:
std::deque<ProcessingUnit> processing_units;
void scheduleParserThreadForUnitWithNumber(size_t ticket_number)
{
pool.scheduleOrThrowOnError(std::bind(&ParallelParsingBlockInputStream::parserThreadFunction, this, ticket_number));
}
void scheduleParserThreadForUnitWithNumber(size_t ticket_number);
void finishAndWait();
void finishAndWait()
{
finished = true;
{
std::unique_lock<std::mutex> lock(mutex);
segmentator_condvar.notify_all();
reader_condvar.notify_all();
}
if (segmentator_thread.joinable())
segmentator_thread.join();
try
{
pool.wait();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
void segmentatorThreadFunction();
void parserThreadFunction(size_t current_ticket_number);
void segmentatorThreadFunction(ThreadGroupStatusPtr thread_group);
void parserThreadFunction(ThreadGroupStatusPtr thread_group, size_t current_ticket_number);
// Save/log a background exception, set termination flag, wake up all
// threads. This function is used by segmentator and parsed threads.

View File

@ -37,8 +37,8 @@ struct DummyJSONParser
double getDouble() const { return 0; }
bool getBool() const { return false; }
std::string_view getString() const { return {}; }
Array getArray() const;
Object getObject() const;
Array getArray() const { return {}; }
Object getObject() const { return {}; }
};
/// References an array in a JSON document.

View File

@ -1,10 +1,10 @@
#include <Functions/FunctionJoinGet.h>
#include <Columns/ColumnString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Interpreters/Context.h>
#include <Interpreters/HashJoin.h>
#include <Columns/ColumnString.h>
#include <Storages/StorageJoin.h>
@ -16,35 +16,19 @@ namespace ErrorCodes
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
template <bool or_null>
void ExecutableFunctionJoinGet<or_null>::execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t)
{
Block keys;
for (size_t i = 2; i < arguments.size(); ++i)
{
auto key = block.getByPosition(arguments[i]);
keys.insert(std::move(key));
}
block.getByPosition(result) = join->joinGet(keys, result_block);
}
template <bool or_null>
ExecutableFunctionImplPtr FunctionJoinGet<or_null>::prepare(const Block &, const ColumnNumbers &, size_t) const
{
return std::make_unique<ExecutableFunctionJoinGet<or_null>>(join, Block{{return_type->createColumn(), return_type, attr_name}});
}
static auto getJoin(const ColumnsWithTypeAndName & arguments, const Context & context)
{
if (arguments.size() != 3)
throw Exception{"Function joinGet takes 3 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
String join_name;
if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
{
join_name = name_col->getValue<String>();
}
else
throw Exception(
"Illegal type " + arguments[0].type->getName() + " of first argument of function joinGet, expected a const string.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function joinGet, expected a const string.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
size_t dot = join_name.find('.');
String database_name;
@ -59,12 +43,10 @@ static auto getJoin(const ColumnsWithTypeAndName & arguments, const Context & co
++dot;
}
String table_name = join_name.substr(dot);
if (table_name.empty())
throw Exception("joinGet does not allow empty table name", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
auto table = DatabaseCatalog::instance().getTable({database_name, table_name}, context);
auto storage_join = std::dynamic_pointer_cast<StorageJoin>(table);
if (!storage_join)
throw Exception("Table " + join_name + " should have engine StorageJoin", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
throw Exception{"Table " + join_name + " should have engine StorageJoin", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
String attr_name;
if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get()))
@ -72,9 +54,9 @@ static auto getJoin(const ColumnsWithTypeAndName & arguments, const Context & co
attr_name = name_col->getValue<String>();
}
else
throw Exception(
"Illegal type " + arguments[1].type->getName() + " of second argument of function joinGet, expected a const string.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
throw Exception{"Illegal type " + arguments[1].type->getName()
+ " of second argument of function joinGet, expected a const string.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
return std::make_pair(storage_join, attr_name);
}
@ -83,22 +65,44 @@ FunctionBaseImplPtr JoinGetOverloadResolver<or_null>::build(const ColumnsWithTyp
{
auto [storage_join, attr_name] = getJoin(arguments, context);
auto join = storage_join->getJoin();
DataTypes data_types(arguments.size() - 2);
for (size_t i = 2; i < arguments.size(); ++i)
data_types[i - 2] = arguments[i].type;
auto return_type = join->joinGetCheckAndGetReturnType(data_types, attr_name, or_null);
DataTypes data_types(arguments.size());
auto table_lock = storage_join->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout);
for (size_t i = 0; i < arguments.size(); ++i)
data_types[i] = arguments[i].type;
auto return_type = join->joinGetReturnType(attr_name, or_null);
return std::make_unique<FunctionJoinGet<or_null>>(table_lock, storage_join, join, attr_name, data_types, return_type);
}
template <bool or_null>
void JoinGetOverloadResolver<or_null>::checkNumberOfArgumentsIfVariadic(size_t number_of_arguments) const
DataTypePtr JoinGetOverloadResolver<or_null>::getReturnType(const ColumnsWithTypeAndName & arguments) const
{
if (number_of_arguments < 3)
throw Exception(
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(number_of_arguments)
+ ", should be greater or equal to 3",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
auto [storage_join, attr_name] = getJoin(arguments, context);
auto join = storage_join->getJoin();
return join->joinGetReturnType(attr_name, or_null);
}
template <bool or_null>
void ExecutableFunctionJoinGet<or_null>::execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count)
{
auto ctn = block.getByPosition(arguments[2]);
if (isColumnConst(*ctn.column))
ctn.column = ctn.column->cloneResized(1);
ctn.name = ""; // make sure the key name never collide with the join columns
Block key_block = {ctn};
join->joinGet(key_block, attr_name, or_null);
auto & result_ctn = key_block.getByPosition(1);
if (isColumnConst(*ctn.column))
result_ctn.column = ColumnConst::create(result_ctn.column, input_rows_count);
block.getByPosition(result) = result_ctn;
}
template <bool or_null>
ExecutableFunctionImplPtr FunctionJoinGet<or_null>::prepare(const Block &, const ColumnNumbers &, size_t) const
{
return std::make_unique<ExecutableFunctionJoinGet<or_null>>(join, attr_name);
}
void registerFunctionJoinGet(FunctionFactory & factory)

View File

@ -13,14 +13,14 @@ template <bool or_null>
class ExecutableFunctionJoinGet final : public IExecutableFunctionImpl
{
public:
ExecutableFunctionJoinGet(HashJoinPtr join_, const Block & result_block_)
: join(std::move(join_)), result_block(result_block_) {}
ExecutableFunctionJoinGet(HashJoinPtr join_, String attr_name_)
: join(std::move(join_)), attr_name(std::move(attr_name_)) {}
static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet";
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return true; }
bool useDefaultImplementationForConstants() const override { return true; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return true; }
void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override;
@ -28,7 +28,7 @@ public:
private:
HashJoinPtr join;
Block result_block;
const String attr_name;
};
template <bool or_null>
@ -77,15 +77,13 @@ public:
String getName() const override { return name; }
FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const override;
DataTypePtr getReturnType(const ColumnsWithTypeAndName &) const override { return {}; } // Not used
DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const override;
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return true; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 1}; }
void checkNumberOfArgumentsIfVariadic(size_t number_of_arguments) const override;
private:
const Context & context;

View File

@ -221,11 +221,6 @@ class DefaultOverloadResolver : public IFunctionOverloadResolverImpl
public:
explicit DefaultOverloadResolver(std::shared_ptr<IFunction> function_) : function(std::move(function_)) {}
void checkNumberOfArgumentsIfVariadic(size_t number_of_arguments) const override
{
return function->checkNumberOfArgumentsIfVariadic(number_of_arguments);
}
bool isDeterministic() const override { return function->isDeterministic(); }
bool isDeterministicInScopeOfQuery() const override { return function->isDeterministicInScopeOfQuery(); }
bool isInjective(const Block &block) const override { return function->isInjective(block); }

View File

@ -156,12 +156,6 @@ public:
virtual bool isStateful() const { return false; }
virtual bool isVariadic() const { return false; }
/// Will be called if isVariadic returns true. You need to check if function can have specified number of arguments.
virtual void checkNumberOfArgumentsIfVariadic(size_t /*number_of_arguments*/) const
{
throw Exception("checkNumberOfArgumentsIfVariadic is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
virtual void getLambdaArgumentTypes(DataTypes & /*arguments*/) const
{
throw Exception("Function " + getName() + " can't have lambda-expressions as arguments", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -289,11 +283,6 @@ public:
virtual bool isVariadic() const { return false; }
virtual void checkNumberOfArgumentsIfVariadic(size_t /*number_of_arguments*/) const
{
throw Exception("checkNumberOfArgumentsIfVariadic is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
virtual void getLambdaArgumentTypes(DataTypes & /*arguments*/) const
{
throw Exception("Function " + getName() + " can't have lambda-expressions as arguments", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

View File

@ -4,10 +4,12 @@
#include <DataTypes/DataTypeDateTime64.h>
#include <Columns/ColumnString.h>
#include <Functions/IFunctionImpl.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionFactory.h>
#include <Functions/DateTimeTransforms.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionsConversion.h>
#include <Functions/IFunctionImpl.h>
#include <Functions/castTypeToEither.h>
#include <Functions/extractTimeZoneFromFunctionArguments.h>
#include <IO/WriteHelpers.h>
@ -34,12 +36,20 @@ namespace ErrorCodes
namespace
{
// in private namespace to avoid GCC 9 error: "explicit specialization in non-namespace scope"
template <typename DataType> struct ActionaValueTypeMap {};
template <> struct ActionaValueTypeMap<DataTypeDate> { using ActionValueType = UInt16; };
template <> struct ActionaValueTypeMap<DataTypeDateTime> { using ActionValueType = UInt32; };
template <typename DataType> struct ActionValueTypeMap {};
template <> struct ActionValueTypeMap<DataTypeInt8> { using ActionValueType = UInt32; };
template <> struct ActionValueTypeMap<DataTypeUInt8> { using ActionValueType = UInt32; };
template <> struct ActionValueTypeMap<DataTypeInt16> { using ActionValueType = UInt32; };
template <> struct ActionValueTypeMap<DataTypeUInt16> { using ActionValueType = UInt32; };
template <> struct ActionValueTypeMap<DataTypeInt32> { using ActionValueType = UInt32; };
template <> struct ActionValueTypeMap<DataTypeUInt32> { using ActionValueType = UInt32; };
template <> struct ActionValueTypeMap<DataTypeInt64> { using ActionValueType = UInt32; };
template <> struct ActionValueTypeMap<DataTypeUInt64> { using ActionValueType = UInt32; };
template <> struct ActionValueTypeMap<DataTypeDate> { using ActionValueType = UInt16; };
template <> struct ActionValueTypeMap<DataTypeDateTime> { using ActionValueType = UInt32; };
// TODO(vnemkov): once there is support for Int64 in LUT, make that Int64.
// TODO(vnemkov): to add sub-second format instruction, make that DateTime64 and do some math in Action<T>.
template <> struct ActionaValueTypeMap<DataTypeDateTime64> { using ActionValueType = UInt32; };
template <> struct ActionValueTypeMap<DataTypeDateTime64> { using ActionValueType = UInt32; };
}
/** formatDateTime(time, 'pattern')
@ -80,10 +90,25 @@ template <> struct ActionaValueTypeMap<DataTypeDateTime64> { using ActionValueTy
*
* PS. We can make this function to return FixedString. Currently it returns String.
*/
class FunctionFormatDateTime : public IFunction
template <typename Name, bool support_integer>
class FunctionFormatDateTimeImpl : public IFunction
{
private:
/// Time is either UInt32 for DateTime or UInt16 for Date.
template <typename F>
static bool castType(const IDataType * type, F && f)
{
return castTypeToEither<
DataTypeInt8,
DataTypeUInt8,
DataTypeInt16,
DataTypeUInt16,
DataTypeInt32,
DataTypeUInt32,
DataTypeInt64,
DataTypeUInt64>(type, std::forward<F>(f));
}
template <typename Time>
class Action
{
@ -251,9 +276,9 @@ private:
};
public:
static constexpr auto name = "formatDateTime";
static constexpr auto name = Name::name;
static FunctionPtr create(const Context &) { return std::make_shared<FunctionFormatDateTime>(); }
static FunctionPtr create(const Context &) { return std::make_shared<FunctionFormatDateTimeImpl>(); }
String getName() const override
{
@ -269,37 +294,103 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.size() != 2 && arguments.size() != 3)
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
+ toString(arguments.size()) + ", should be 2 or 3",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (!WhichDataType(arguments[0].type).isDateOrDateTime())
throw Exception("Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() +
". Should be a date or a date with time", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (!WhichDataType(arguments[1].type).isString())
throw Exception("Illegal type " + arguments[1].type->getName() + " of 2 argument of function " + getName() + ". Must be String.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (arguments.size() == 3)
if constexpr (support_integer)
{
if (!WhichDataType(arguments[2].type).isString())
throw Exception("Illegal type " + arguments[2].type->getName() + " of 3 argument of function " + getName() + ". Must be String.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (arguments.size() != 1 && arguments.size() != 2 && arguments.size() != 3)
throw Exception(
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
+ ", should be 1, 2 or 3",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (arguments.size() == 1 && !isInteger(arguments[0].type))
throw Exception(
"Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName()
+ " when arguments size is 1. Should be integer",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (arguments.size() > 1 && !(isInteger(arguments[0].type) || WhichDataType(arguments[0].type).isDateOrDateTime()))
throw Exception(
"Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName()
+ " when arguments size is 2 or 3. Should be a integer or a date with time",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
else
{
if (arguments.size() != 2 && arguments.size() != 3)
throw Exception(
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
+ ", should be 2 or 3",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (!WhichDataType(arguments[0].type).isDateOrDateTime())
throw Exception(
"Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName()
+ ". Should be a date or a date with time",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
if (arguments.size() == 2 && !WhichDataType(arguments[1].type).isString())
throw Exception(
"Illegal type " + arguments[1].type->getName() + " of 2 argument of function " + getName() + ". Must be String.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (arguments.size() == 3 && !WhichDataType(arguments[2].type).isString())
throw Exception(
"Illegal type " + arguments[2].type->getName() + " of 3 argument of function " + getName() + ". Must be String.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (arguments.size() == 1)
return std::make_shared<DataTypeDateTime>();
return std::make_shared<DataTypeString>();
}
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) const override
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, [[maybe_unused]] size_t input_rows_count) const override
{
if (!executeType<DataTypeDate>(block, arguments, result)
&& !executeType<DataTypeDateTime>(block, arguments, result)
&& !executeType<DataTypeDateTime64>(block, arguments, result))
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
+ " of function " + getName() + ", must be Date or DateTime",
if constexpr (support_integer)
{
if (arguments.size() == 1)
{
if (!castType(block.getByPosition(arguments[0]).type.get(), [&](const auto & type)
{
using FromDataType = std::decay_t<decltype(type)>;
ConvertImpl<FromDataType, DataTypeDateTime, Name>::execute(block, arguments, result, input_rows_count);
return true;
}))
{
throw Exception(
"Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of function " + getName()
+ ", must be Integer or DateTime when arguments size is 1.",
ErrorCodes::ILLEGAL_COLUMN);
}
}
else
{
if (!castType(block.getByPosition(arguments[0]).type.get(), [&](const auto & type)
{
using FromDataType = std::decay_t<decltype(type)>;
if (!executeType<FromDataType>(block, arguments, result))
throw Exception(
"Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of function " + getName()
+ ", must be Integer or DateTime.",
ErrorCodes::ILLEGAL_COLUMN);
return true;
}))
{
if (!executeType<DataTypeDate>(block, arguments, result) && !executeType<DataTypeDateTime>(block, arguments, result)
&& !executeType<DataTypeDateTime64>(block, arguments, result))
throw Exception(
"Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of function " + getName()
+ ", must be Integer or DateTime.",
ErrorCodes::ILLEGAL_COLUMN);
}
}
}
else
{
if (!executeType<DataTypeDate>(block, arguments, result) && !executeType<DataTypeDateTime>(block, arguments, result)
&& !executeType<DataTypeDateTime64>(block, arguments, result))
throw Exception(
"Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of function " + getName()
+ ", must be Date or DateTime.",
ErrorCodes::ILLEGAL_COLUMN);
}
}
template <typename DataType>
@ -318,13 +409,17 @@ public:
String pattern = pattern_column->getValue<String>();
using T = typename ActionaValueTypeMap<DataType>::ActionValueType;
using T = typename ActionValueTypeMap<DataType>::ActionValueType;
std::vector<Action<T>> instructions;
String pattern_to_fill = parsePattern(pattern, instructions);
size_t result_size = pattern_to_fill.size();
const DateLUTImpl * time_zone_tmp = nullptr;
if (std::is_same_v<DataType, DataTypeDateTime64> || std::is_same_v<DataType, DataTypeDateTime>)
if (castType(block.getByPosition(arguments[0]).type.get(), [&]([[maybe_unused]] const auto & type) { return true; }))
{
time_zone_tmp = &extractTimeZoneFromFunctionArguments(block, arguments, 2, 0);
}
else if (std::is_same_v<DataType, DataTypeDateTime64> || std::is_same_v<DataType, DataTypeDateTime>)
time_zone_tmp = &extractTimeZoneFromFunctionArguments(block, arguments, 2, 0);
else
time_zone_tmp = &DateLUT::instance();
@ -606,9 +701,23 @@ public:
}
};
struct NameFormatDateTime
{
static constexpr auto name = "formatDateTime";
};
struct NameFromUnixTime
{
static constexpr auto name = "FROM_UNIXTIME";
};
using FunctionFormatDateTime = FunctionFormatDateTimeImpl<NameFormatDateTime, false>;
using FunctionFROM_UNIXTIME = FunctionFormatDateTimeImpl<NameFromUnixTime, true>;
void registerFunctionFormatDateTime(FunctionFactory & factory)
{
factory.registerFunction<FunctionFormatDateTime>();
factory.registerFunction<FunctionFROM_UNIXTIME>();
}
}

View File

@ -76,20 +76,20 @@ public:
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
void checkNumberOfArgumentsIfVariadic(size_t number_of_arguments) const override
{
if (number_of_arguments > 1)
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
+ toString(number_of_arguments) + ", should be 0 or 1.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
static FunctionOverloadResolverImplPtr create(const Context &)
{
return std::make_unique<RandomConstantOverloadResolver<ToType, Name>>();
}
DataTypePtr getReturnType(const DataTypes &) const override { return std::make_shared<DataTypeNumber<ToType>>(); }
DataTypePtr getReturnType(const DataTypes & data_types) const override
{
size_t number_of_arguments = data_types.size();
if (number_of_arguments > 1)
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
+ toString(number_of_arguments) + ", should be 0 or 1.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
return std::make_shared<DataTypeNumber<ToType>>();
}
FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
{

View File

@ -1,98 +0,0 @@
#include <Common/typeid_cast.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Interpreters/AnyInputOptimize.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <IO/WriteHelpers.h>
#include <Parsers/ASTTablesInSelectQuery.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_AGGREGATION;
}
namespace
{
constexpr auto * any = "any";
constexpr auto * anyLast = "anyLast";
}
ASTPtr * getExactChild(const ASTPtr & ast, const size_t ind)
{
if (ast && ast->as<ASTFunction>()->arguments->children[ind])
return &ast->as<ASTFunction>()->arguments->children[ind];
return nullptr;
}
///recursive searching of identifiers
void changeAllIdentifiers(ASTPtr & ast, size_t ind, const std::string & name)
{
ASTPtr * exact_child = getExactChild(ast, ind);
if (!exact_child)
return;
if ((*exact_child)->as<ASTIdentifier>())
{
///put new any
ASTPtr old_ast = *exact_child;
*exact_child = makeASTFunction(name);
(*exact_child)->as<ASTFunction>()->arguments->children.push_back(old_ast);
}
else if ((*exact_child)->as<ASTFunction>())
{
if (AggregateFunctionFactory::instance().isAggregateFunctionName((*exact_child)->as<ASTFunction>()->name))
throw Exception("Aggregate function " + (*exact_child)->as<ASTFunction>()->name +
" is found inside aggregate function " + name + " in query", ErrorCodes::ILLEGAL_AGGREGATION);
for (size_t i = 0; i < (*exact_child)->as<ASTFunction>()->arguments->children.size(); i++)
changeAllIdentifiers(*exact_child, i, name);
}
}
///cut old any, put any to identifiers. any(functions(x)) -> functions(any(x))
void AnyInputMatcher::visit(ASTPtr & current_ast, Data data)
{
data = {};
if (!current_ast)
return;
auto * function_node = current_ast->as<ASTFunction>();
if (!function_node || function_node->arguments->children.empty())
return;
const auto & function_argument = function_node->arguments->children[0];
if ((function_node->name == any || function_node->name == anyLast)
&& function_argument && function_argument->as<ASTFunction>())
{
auto name = function_node->name;
auto alias = function_node->alias;
///cut any or anyLast
if (!function_argument->as<ASTFunction>()->arguments->children.empty())
{
current_ast = function_argument->clone();
current_ast->setAlias(alias);
for (size_t i = 0; i < current_ast->as<ASTFunction>()->arguments->children.size(); ++i)
changeAllIdentifiers(current_ast, i, name);
}
}
}
bool AnyInputMatcher::needChildVisit(const ASTPtr & node, const ASTPtr & child)
{
if (!child)
throw Exception("AST item should not have nullptr in children", ErrorCodes::LOGICAL_ERROR);
if (node->as<ASTTableExpression>() || node->as<ASTArrayJoin>())
return false; // NOLINT
return true;
}
}

View File

@ -1,19 +0,0 @@
#pragma once
#include <Parsers/IAST.h>
#include <Interpreters/InDepthNodeVisitor.h>
namespace DB
{
///This optimiser is similar to ArithmeticOperationsInAgrFunc optimizer, but for function any we can extract any functions.
class AnyInputMatcher
{
public:
struct Data {};
static void visit(ASTPtr & ast, Data data);
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
};
using AnyInputVisitor = InDepthNodeVisitor<AnyInputMatcher, true>;
}

View File

@ -153,7 +153,9 @@ void ArithmeticOperationsInAgrFuncMatcher::visit(ASTPtr & ast, Data & data)
bool ArithmeticOperationsInAgrFuncMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
{
return !node->as<ASTSubquery>() && !node->as<ASTTableExpression>();
return !node->as<ASTSubquery>() &&
!node->as<ASTTableExpression>() &&
!node->as<ASTArrayJoin>();
}
}

View File

@ -1563,7 +1563,7 @@ void Context::reloadClusterConfig()
return;
}
/// Clusters config has been suddenly changed, recompute clusters
// Clusters config has been suddenly changed, recompute clusters
}
}
}

View File

@ -42,7 +42,6 @@ namespace ErrorCodes
extern const int SYNTAX_ERROR;
extern const int SET_SIZE_LIMIT_EXCEEDED;
extern const int TYPE_MISMATCH;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
namespace
@ -1110,34 +1109,27 @@ void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed)
block = block.cloneWithColumns(std::move(dst_columns));
}
static void checkTypeOfKey(const Block & block_left, const Block & block_right)
{
const auto & [c1, left_type_origin, left_name] = block_left.safeGetByPosition(0);
const auto & [c2, right_type_origin, right_name] = block_right.safeGetByPosition(0);
auto left_type = removeNullable(left_type_origin);
auto right_type = removeNullable(right_type_origin);
DataTypePtr HashJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const
if (!left_type->equals(*right_type))
throw Exception("Type mismatch of columns to joinGet by: "
+ left_name + " " + left_type->getName() + " at left, "
+ right_name + " " + right_type->getName() + " at right",
ErrorCodes::TYPE_MISMATCH);
}
DataTypePtr HashJoin::joinGetReturnType(const String & column_name, bool or_null) const
{
std::shared_lock lock(data->rwlock);
size_t num_keys = data_types.size();
if (right_table_keys.columns() != num_keys)
throw Exception(
"Number of arguments for function joinGet" + toString(or_null ? "OrNull" : "")
+ " doesn't match: passed, should be equal to " + toString(num_keys),
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
for (size_t i = 0; i < num_keys; ++i)
{
const auto & left_type_origin = data_types[i];
const auto & [c2, right_type_origin, right_name] = right_table_keys.safeGetByPosition(i);
auto left_type = removeNullable(left_type_origin);
auto right_type = removeNullable(right_type_origin);
if (!left_type->equals(*right_type))
throw Exception(
"Type mismatch in joinGet key " + toString(i) + ": found type " + left_type->getName() + ", while the needed type is "
+ right_type->getName(),
ErrorCodes::TYPE_MISMATCH);
}
if (!sample_block_with_columns_to_add.has(column_name))
throw Exception("StorageJoin doesn't contain column " + column_name, ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
auto elem = sample_block_with_columns_to_add.getByName(column_name);
if (or_null)
elem.type = makeNullable(elem.type);
@ -1146,33 +1138,34 @@ DataTypePtr HashJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types,
template <typename Maps>
ColumnWithTypeAndName HashJoin::joinGetImpl(const Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const
void HashJoin::joinGetImpl(Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const
{
// Assemble the key block with correct names.
Block keys;
for (size_t i = 0; i < block.columns(); ++i)
{
auto key = block.getByPosition(i);
key.name = key_names_right[i];
keys.insert(std::move(key));
}
joinBlockImpl<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::Any>(
keys, key_names_right, block_with_columns_to_add, maps_);
return keys.getByPosition(keys.columns() - 1);
joinBlockImpl<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::RightAny>(
block, {block.getByPosition(0).name}, block_with_columns_to_add, maps_);
}
// TODO: support composite key
// TODO: return multiple columns as named tuple
// TODO: return array of values when strictness == ASTTableJoin::Strictness::All
ColumnWithTypeAndName HashJoin::joinGet(const Block & block, const Block & block_with_columns_to_add) const
void HashJoin::joinGet(Block & block, const String & column_name, bool or_null) const
{
std::shared_lock lock(data->rwlock);
if (key_names_right.size() != 1)
throw Exception("joinGet only supports StorageJoin containing exactly one key", ErrorCodes::UNSUPPORTED_JOIN_KEYS);
checkTypeOfKey(block, right_table_keys);
auto elem = sample_block_with_columns_to_add.getByName(column_name);
if (or_null)
elem.type = makeNullable(elem.type);
elem.column = elem.type->createColumn();
if ((strictness == ASTTableJoin::Strictness::Any || strictness == ASTTableJoin::Strictness::RightAny) &&
kind == ASTTableJoin::Kind::Left)
{
return joinGetImpl(block, block_with_columns_to_add, std::get<MapsOne>(data->maps));
joinGetImpl(block, {elem}, std::get<MapsOne>(data->maps));
}
else
throw Exception("joinGet only supports StorageJoin of type Left Any", ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN);

View File

@ -162,11 +162,11 @@ public:
*/
void joinBlock(Block & block, ExtraBlockPtr & not_processed) override;
/// Check joinGet arguments and infer the return type.
DataTypePtr joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const;
/// Infer the return type for joinGet function
DataTypePtr joinGetReturnType(const String & column_name, bool or_null) const;
/// Used by joinGet function that turns StorageJoin into a dictionary.
ColumnWithTypeAndName joinGet(const Block & block, const Block & block_with_columns_to_add) const;
/// Used by joinGet function that turns StorageJoin into a dictionary
void joinGet(Block & block, const String & column_name, bool or_null) const;
/** Keep "totals" (separate part of dataset, see WITH TOTALS) to use later.
*/
@ -383,7 +383,7 @@ private:
void joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const;
template <typename Maps>
ColumnWithTypeAndName joinGetImpl(const Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const;
void joinGetImpl(Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const;
static Type chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes);
};

View File

@ -16,6 +16,7 @@ namespace DB
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED;
extern const int LOGICAL_ERROR;
}
@ -150,6 +151,12 @@ private:
{
if (node.name == "in" || node.name == "notIn")
{
if (node.arguments->children.size() != 2)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Function '{}' expects two arguments, given: '{}'",
node.name, node.formatForErrorMessage());
}
auto & subquery = node.arguments->children.at(1);
std::vector<ASTPtr> renamed;
NonGlobalTableVisitor::Data table_data{data.checker, data.context, renamed, &node, nullptr};

View File

@ -21,6 +21,7 @@ PredicateExpressionsOptimizer::PredicateExpressionsOptimizer(
const Context & context_, const TablesWithColumns & tables_with_columns_, const Settings & settings)
: enable_optimize_predicate_expression(settings.enable_optimize_predicate_expression)
, enable_optimize_predicate_expression_to_final_subquery(settings.enable_optimize_predicate_expression_to_final_subquery)
, allow_push_predicate_when_subquery_contains_with(settings.allow_push_predicate_when_subquery_contains_with)
, context(context_)
, tables_with_columns(tables_with_columns_)
{
@ -151,7 +152,8 @@ bool PredicateExpressionsOptimizer::tryRewritePredicatesToTable(ASTPtr & table_e
if (!table_predicates.empty())
{
auto optimize_final = enable_optimize_predicate_expression_to_final_subquery;
PredicateRewriteVisitor::Data data(context, table_predicates, std::move(table_columns), optimize_final);
auto optimize_with = allow_push_predicate_when_subquery_contains_with;
PredicateRewriteVisitor::Data data(context, table_predicates, std::move(table_columns), optimize_final, optimize_with);
PredicateRewriteVisitor(data).visit(table_element);
return data.is_rewrite;

View File

@ -25,6 +25,7 @@ public:
private:
const bool enable_optimize_predicate_expression;
const bool enable_optimize_predicate_expression_to_final_subquery;
const bool allow_push_predicate_when_subquery_contains_with;
const Context & context;
const TablesWithColumns & tables_with_columns;

View File

@ -17,8 +17,8 @@ namespace DB
{
PredicateRewriteVisitorData::PredicateRewriteVisitorData(
const Context & context_, const ASTs & predicates_, Names && column_names_, bool optimize_final_)
: context(context_), predicates(predicates_), column_names(column_names_), optimize_final(optimize_final_)
const Context & context_, const ASTs & predicates_, Names && column_names_, bool optimize_final_, bool optimize_with_)
: context(context_), predicates(predicates_), column_names(column_names_), optimize_final(optimize_final_), optimize_with(optimize_with_)
{
}
@ -85,7 +85,8 @@ static void cleanAliasAndCollectIdentifiers(ASTPtr & predicate, std::vector<ASTI
bool PredicateRewriteVisitorData::rewriteSubquery(ASTSelectQuery & subquery, const Names & outer_columns, const Names & inner_columns)
{
if ((!optimize_final && subquery.final())
|| subquery.with() || subquery.withFill()
|| (!optimize_with && subquery.with())
|| subquery.withFill()
|| subquery.limitBy() || subquery.limitLength()
|| hasStatefulFunction(subquery.select(), context))
return false;

View File

@ -24,13 +24,14 @@ public:
return true;
}
PredicateRewriteVisitorData(const Context & context_, const ASTs & predicates_, Names && column_names_, bool optimize_final_);
PredicateRewriteVisitorData(const Context & context_, const ASTs & predicates_, Names && column_names_, bool optimize_final_, bool optimize_with_);
private:
const Context & context;
const ASTs & predicates;
const Names column_names;
bool optimize_final;
bool optimize_with;
void visitFirstInternalSelect(ASTSelectQuery & select_query, ASTPtr &);

View File

@ -0,0 +1,96 @@
#include <Common/typeid_cast.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTSubquery.h>
#include <Interpreters/RewriteAnyFunctionVisitor.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <IO/WriteHelpers.h>
#include <Parsers/ASTTablesInSelectQuery.h>
namespace DB
{
namespace
{
bool extractIdentifiers(const ASTFunction & func, std::vector<ASTPtr *> & identifiers)
{
for (auto & arg : func.arguments->children)
{
if (const auto * arg_func = arg->as<ASTFunction>())
{
if (arg_func->name == "lambda")
return false;
if (AggregateFunctionFactory::instance().isAggregateFunctionName(arg_func->name))
return false;
if (!extractIdentifiers(*arg_func, identifiers))
return false;
}
else if (arg->as<ASTIdentifier>())
identifiers.emplace_back(&arg);
}
return true;
}
}
void RewriteAnyFunctionMatcher::visit(ASTPtr & ast, Data & data)
{
if (auto * func = ast->as<ASTFunction>())
visit(*func, ast, data);
}
void RewriteAnyFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data & data)
{
if (func.arguments->children.empty() || !func.arguments->children[0])
return;
if (func.name != "any" && func.name != "anyLast")
return;
auto & func_arguments = func.arguments->children;
const auto * first_arg_func = func_arguments[0]->as<ASTFunction>();
if (!first_arg_func || first_arg_func->arguments->children.empty())
return;
/// We have rewritten this function. Just unwrap its argument.
if (data.rewritten.count(ast.get()))
{
func_arguments[0]->setAlias(func.alias);
ast = func_arguments[0];
return;
}
std::vector<ASTPtr *> identifiers;
if (!extractIdentifiers(func, identifiers))
return;
/// Wrap identifiers: any(f(x, y, g(z))) -> any(f(any(x), any(y), g(any(z))))
for (auto * ast_to_change : identifiers)
{
ASTPtr identifier_ast = *ast_to_change;
*ast_to_change = makeASTFunction(func.name);
(*ast_to_change)->as<ASTFunction>()->arguments->children.emplace_back(identifier_ast);
}
data.rewritten.insert(ast.get());
/// Unwrap function: any(f(any(x), any(y), g(any(z)))) -> f(any(x), any(y), g(any(z)))
func_arguments[0]->setAlias(func.alias);
ast = func_arguments[0];
}
bool RewriteAnyFunctionMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
{
return !node->as<ASTSubquery>() &&
!node->as<ASTTableExpression>() &&
!node->as<ASTArrayJoin>();
}
}

View File

@ -0,0 +1,29 @@
#pragma once
#include <unordered_set>
#include <Parsers/IAST.h>
#include <Interpreters/InDepthNodeVisitor.h>
namespace DB
{
class ASTFunction;
/// Rewrite 'any' and 'anyLast' functions pushing them inside original function.
/// any(f(x, y, g(z))) -> f(any(x), any(y), g(any(z)))
class RewriteAnyFunctionMatcher
{
public:
struct Data
{
std::unordered_set<IAST *> rewritten;
};
static void visit(ASTPtr & ast, Data & data);
static void visit(const ASTFunction &, ASTPtr & ast, Data & data);
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
};
using RewriteAnyFunctionVisitor = InDepthNodeVisitor<RewriteAnyFunctionMatcher, false>;
}

View File

@ -8,7 +8,7 @@
#include <Interpreters/DuplicateOrderByVisitor.h>
#include <Interpreters/GroupByFunctionKeysVisitor.h>
#include <Interpreters/AggregateFunctionOfGroupByKeysVisitor.h>
#include <Interpreters/AnyInputOptimize.h>
#include <Interpreters/RewriteAnyFunctionVisitor.h>
#include <Interpreters/RemoveInjectiveFunctionsVisitor.h>
#include <Interpreters/RedundantFunctionsInOrderByVisitor.h>
#include <Interpreters/MonotonicityCheckVisitor.h>
@ -459,11 +459,10 @@ void optimizeAggregationFunctions(ASTPtr & query)
ArithmeticOperationsInAgrFuncVisitor(data).visit(query);
}
void optimizeAnyInput(ASTPtr & query)
void optimizeAnyFunctions(ASTPtr & query)
{
/// Removing arithmetic operations from functions
AnyInputVisitor::Data data = {};
AnyInputVisitor(data).visit(query);
RewriteAnyFunctionVisitor::Data data = {};
RewriteAnyFunctionVisitor(data).visit(query);
}
void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, const Context & context)
@ -521,9 +520,9 @@ void TreeOptimizer::apply(ASTPtr & query, Aliases & aliases, const NameSet & sou
if (settings.optimize_group_by_function_keys)
optimizeGroupByFunctionKeys(select_query);
///Move all operations out of any function
/// Move all operations out of any function
if (settings.optimize_move_functions_out_of_any)
optimizeAnyInput(query);
optimizeAnyFunctions(query);
/// Remove injective functions inside uniq
if (settings.optimize_injective_functions_inside_uniq)

View File

@ -28,7 +28,7 @@ inline bool functionIsLikeOperator(const std::string & name)
inline bool functionIsJoinGet(const std::string & name)
{
return startsWith(name, "joinGet");
return name == "joinGet" || startsWith(name, "dictGet");
}
inline bool functionIsDictGet(const std::string & name)

View File

@ -20,7 +20,6 @@ SRCS(
addTypeConversionToAST.cpp
AggregateDescription.cpp
Aggregator.cpp
AnyInputOptimize.cpp
ArithmeticOperationsInAgrFuncOptimize.cpp
ArithmeticOperationsInAgrFuncOptimize.h
ArrayJoinAction.cpp
@ -127,6 +126,7 @@ SRCS(
ReplaceQueryParameterVisitor.cpp
RequiredSourceColumnsData.cpp
RequiredSourceColumnsVisitor.cpp
RewriteAnyFunctionVisitor.cpp
RowRefs.cpp
Set.cpp
SetVariants.cpp

View File

@ -12,6 +12,7 @@ using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>;
class SourceFromInputStream : public ISourceWithProgress
{
public:
/// If force_add_aggregating_info is enabled, AggregatedChunkInfo (with bucket number and is_overflows flag) will be added to result chunk.
explicit SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_ = false);
String getName() const override { return "SourceFromInputStream"; }

View File

@ -506,7 +506,6 @@ MergeTreeRangeReader::MergeTreeRangeReader(
if (prewhere->alias_actions)
prewhere->alias_actions->execute(sample_block, true);
sample_block_before_prewhere = sample_block;
if (prewhere->prewhere_actions)
prewhere->prewhere_actions->execute(sample_block, true);
@ -826,7 +825,7 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r
for (auto name_and_type = header.begin(); pos < num_columns; ++pos, ++name_and_type)
block.insert({result.columns[pos], name_and_type->type, name_and_type->name});
if (prewhere && prewhere->alias_actions)
if (prewhere->alias_actions)
prewhere->alias_actions->execute(block);
/// Columns might be projected out. We need to store them here so that default columns can be evaluated later.

View File

@ -222,7 +222,6 @@ private:
Stream stream;
Block sample_block;
Block sample_block_before_prewhere;
bool last_reader_in_chain = false;
bool is_initialized = false;

View File

@ -37,4 +37,11 @@ list (APPEND storages_system_sources ${GENERATED_LICENSES_SRC})
set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w)
add_library(clickhouse_storages_system ${storages_system_headers} ${storages_system_sources})
target_link_libraries(clickhouse_storages_system PRIVATE dbms common string_utils clickhouse_common_zookeeper clickhouse_parsers)
target_link_libraries(clickhouse_storages_system PRIVATE
dbms
common
string_utils
clickhouse_common_zookeeper
clickhouse_parsers
Poco::JSON
)

View File

@ -0,0 +1,4 @@
<yandex>
<!-- make it fail earlier -->
<max_server_memory_usage>3000000000</max_server_memory_usage> <!-- 3GB -->
</yandex>

View File

@ -0,0 +1,33 @@
# pylint: disable=unused-argument
# pylint: disable=redefined-outer-name
# pylint: disable=line-too-long
import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
instance = cluster.add_instance('instance', main_configs=['configs/conf.xml'])
@pytest.fixture(scope='module', autouse=True)
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
# max_memory_usage_for_user cannot be used, since the memory for user accounted
# correctly, only total is not
def test_memory_tracking_total():
instance.query('''
CREATE TABLE null (row String) ENGINE=Null;
''')
instance.exec_in_container(['bash', '-c',
'clickhouse client -q "SELECT arrayStringConcat(arrayMap(x->toString(cityHash64(x)), range(1000)), \' \') from numbers(10000)" > data.json'])
for it in range(0, 20):
# the problem can be triggered only via HTTP,
# since clickhouse-client parses the data by itself.
assert instance.exec_in_container(['curl', '--silent', '--show-error', '--data-binary', '@data.json',
'http://127.1:8123/?query=INSERT%20INTO%20null%20FORMAT%20TSV']) == '', 'Failed on {} iteration'.format(it)

View File

@ -1,12 +1,12 @@
DROP TABLE IF EXISTS test_joinGet;
DROP TABLE IF EXISTS test_join_joinGet;
CREATE TABLE test_joinGet(user_id Nullable(Int32), name String) Engine = Join(ANY, LEFT, user_id);
CREATE TABLE test_joinGet(id Int32, user_id Nullable(Int32)) Engine = Memory();
CREATE TABLE test_join_joinGet(user_id Int32, name String) Engine = Join(ANY, LEFT, user_id);
INSERT INTO test_joinGet VALUES (2, 'a'), (6, 'b'), (10, 'c'), (null, 'd');
INSERT INTO test_join_joinGet VALUES (2, 'a'), (6, 'b'), (10, 'c');
SELECT toNullable(toInt32(2)) user_id WHERE joinGet(test_joinGet, 'name', user_id) != '';
-- If the JOIN keys are Nullable fields, the rows where at least one of the keys has the value NULL are not joined.
SELECT cast(null AS Nullable(Int32)) user_id WHERE joinGet(test_joinGet, 'name', user_id) != '';
SELECT 2 id, toNullable(toInt32(2)) user_id WHERE joinGet(test_join_joinGet, 'name', user_id) != '';
DROP TABLE test_joinGet;
DROP TABLE test_join_joinGet;

View File

@ -1,3 +1,30 @@
9
SELECT any(number) + (any(number) * 2)
FROM numbers(3, 10)
FROM numbers(1, 2)
3
SELECT anyLast(number) + (anyLast(number) * 2)
FROM numbers(1, 2)
6
WITH any(number) * 3 AS x
SELECT x
FROM numbers(1, 2)
3
SELECT
anyLast(number) * 3 AS x,
x
FROM numbers(1, 2)
6 6
SELECT any(number + (number * 2))
FROM numbers(1, 2)
3
SELECT anyLast(number + (number * 2))
FROM numbers(1, 2)
6
WITH any(number * 3) AS x
SELECT x
FROM numbers(1, 2)
3
SELECT
anyLast(number * 3) AS x,
x
FROM numbers(1, 2)
6 6

View File

@ -1,4 +1,32 @@
SET optimize_move_functions_out_of_any=1;
SET enable_debug_queries=1;
SELECT any(number + number * 2) FROM numbers(3, 10);
ANALYZE SELECT any(number + number * 2) FROM numbers(3, 10);
SET enable_debug_queries = 1;
SET optimize_move_functions_out_of_any = 1;
ANALYZE SELECT any(number + number * 2) FROM numbers(1, 2);
SELECT any(number + number * 2) FROM numbers(1, 2);
ANALYZE SELECT anyLast(number + number * 2) FROM numbers(1, 2);
SELECT anyLast(number + number * 2) FROM numbers(1, 2);
ANALYZE WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
ANALYZE SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }
SET optimize_move_functions_out_of_any = 0;
ANALYZE SELECT any(number + number * 2) FROM numbers(1, 2);
SELECT any(number + number * 2) FROM numbers(1, 2);
ANALYZE SELECT anyLast(number + number * 2) FROM numbers(1, 2);
SELECT anyLast(number + number * 2) FROM numbers(1, 2);
ANALYZE WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
ANALYZE SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }

View File

@ -1,9 +0,0 @@
DROP TABLE IF EXISTS test_joinGet;
CREATE TABLE test_joinGet(a String, b String, c Float64) ENGINE = Join(any, left, a, b);
INSERT INTO test_joinGet VALUES ('ab', '1', 0.1), ('ab', '2', 0.2), ('cd', '3', 0.3);
SELECT joinGet(test_joinGet, 'c', 'ab', '1');
DROP TABLE test_joinGet;

View File

@ -0,0 +1,29 @@
1970-01-01 00:02:03
1973-11-29 21:33:09
2038-07-12 01:15:36
19
11
1970-01-15
1970-01-15 06:52:36
20
02
01/02/18
2
2018-01-02
22
02
10
11
12
001
366
01
33
\n
AM
AM
PM
22:33
44
\t
22:33:44

View File

@ -0,0 +1,29 @@
SELECT formatDateTime(FROM_UNIXTIME(123), '%Y-%m-%d %R:%S', 'UTC');
SELECT formatDateTime(FROM_UNIXTIME(123456789), '%Y-%m-%d %R:%S', 'UTC');
SELECT formatDateTime(FROM_UNIXTIME(6457477432), '%Y-%m-%d %R:%S', 'UTC');
SELECT FROM_UNIXTIME(5345345, '%C', 'UTC');
SELECT FROM_UNIXTIME(645123, '%H', 'UTC');
SELECT FROM_UNIXTIME(1232456, '%Y-%m-%d', 'UTC');
SELECT FROM_UNIXTIME(1234356, '%Y-%m-%d %R:%S', 'UTC');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%C');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%d');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%D');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%e');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%F');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%H');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 02:33:44'), '%H');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%I');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 11:33:44'), '%I');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 00:33:44'), '%I');
SELECT FROM_UNIXTIME(toDateTime('2018-01-01 00:33:44'), '%j');
SELECT FROM_UNIXTIME(toDateTime('2000-12-31 00:33:44'), '%j');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%m');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%M');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%n');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 00:33:44'), '%p');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 11:33:44'), '%p');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 12:33:44'), '%p');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%R');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%S');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%t');
SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%T');

View File

@ -0,0 +1,4 @@
2020-07-10
2020-07-11
0000-00-00

View File

@ -0,0 +1,34 @@
DROP TABLE IF EXISTS tracking_events_tmp;
DROP TABLE IF EXISTS open_events_tmp;
CREATE TABLE tracking_events_tmp (`APIKey` UInt32, `EventDate` Date) ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (APIKey, EventDate);
CREATE TABLE open_events_tmp (`APIKey` UInt32, `EventDate` Date) ENGINE = MergeTree PARTITION BY toMonday(EventDate) ORDER BY (APIKey, EventDate);
insert into open_events_tmp select 2, '2020-07-10' from numbers(32);
insert into open_events_tmp select 2, '2020-07-11' from numbers(31);
insert into tracking_events_tmp select 2, '2020-07-10' from numbers(1881);
insert into tracking_events_tmp select 2, '2020-07-11' from numbers(1623);
SELECT EventDate
FROM
(
SELECT EventDate
FROM tracking_events_tmp AS t1
WHERE (EventDate >= toDate('2020-07-10')) AND (EventDate <= toDate('2020-07-11')) AND (APIKey = 2)
GROUP BY EventDate
)
FULL OUTER JOIN
(
SELECT EventDate
FROM remote('127.0.0.{1,3}', currentDatabase(), open_events_tmp) AS t2
WHERE (EventDate >= toDate('2020-07-10')) AND (EventDate <= toDate('2020-07-11')) AND (APIKey = 2)
GROUP BY EventDate
WITH TOTALS
) USING EventDate
ORDER BY EventDate
settings totals_mode = 'after_having_auto', group_by_overflow_mode = 'any', max_rows_to_group_by = 10000000, joined_subquery_requires_alias=0;
DROP TABLE IF EXISTS tracking_events_tmp;
DROP TABLE IF EXISTS open_events_tmp;

View File

@ -0,0 +1,26 @@
1 hello
2 world
all_1_1_0 0
---
all_1_1_0 1
---
1 goodbye
2 world
all_1_1_0 1
all_1_1_0_2 0
---
1 goodbye
2 world
all_1_1_0 1
all_1_1_0_2 0
all_1_1_0_3 0
---
all_1_1_0 1
all_1_1_0_2 0
all_1_1_0_3 1
---
1 hello
2 world
all_1_1_0 1
all_1_1_0_2 0
all_1_1_0_3 1

View File

@ -0,0 +1,35 @@
-- In previous ClickHouse versions, parts were not 100% immutable and FREEZE may prevent subsequent ALTERs.
-- It's not longer the case. Let's prove it.
DROP TABLE IF EXISTS t;
CREATE TABLE t (k UInt64, s String) ENGINE = MergeTree ORDER BY k;
INSERT INTO t VALUES (1, 'hello'), (2, 'world');
SELECT * FROM t;
SELECT name, is_frozen FROM system.parts WHERE database = currentDatabase() AND table = 't';
SELECT '---';
ALTER TABLE t FREEZE;
SELECT name, is_frozen FROM system.parts WHERE database = currentDatabase() AND table = 't';
SELECT '---';
SET mutations_sync = 1;
ALTER TABLE t UPDATE s = 'goodbye' WHERE k = 1;
SELECT * FROM t;
SELECT name, is_frozen FROM system.parts WHERE database = currentDatabase() AND table = 't';
SELECT '---';
ALTER TABLE t MODIFY COLUMN s Enum('goodbye' = 1, 'world' = 2);
SELECT * FROM t;
SELECT name, is_frozen FROM system.parts WHERE database = currentDatabase() AND table = 't';
SELECT '---';
ALTER TABLE t FREEZE;
SELECT name, is_frozen FROM system.parts WHERE database = currentDatabase() AND table = 't';
SELECT '---';
ALTER TABLE t MODIFY COLUMN s Enum('hello' = 1, 'world' = 2);
SELECT * FROM t;
SELECT name, is_frozen FROM system.parts WHERE database = currentDatabase() AND table = 't';
DROP TABLE t;

View File

@ -0,0 +1 @@
0

View File

@ -0,0 +1,18 @@
DROP TABLE IF EXISTS test;
CREATE TABLE test
(
`Source.C1` Array(UInt64),
`Source.C2` Array(UInt64)
)
ENGINE = MergeTree()
ORDER BY tuple();
SET optimize_move_functions_out_of_any = 1;
SELECT any(arrayFilter((c, d) -> (4 = d), `Source.C1`, `Source.C2`)[1]) AS x
FROM test
WHERE 0
GROUP BY 42;
DROP TABLE test;

View File

@ -0,0 +1,15 @@
999 1998
999 1998
SELECT
number,
square_number
FROM
(
WITH number * 2 AS square_number
SELECT
number,
square_number
FROM numbers_indexed
WHERE number = 999
) AS squares
WHERE number = 999

View File

@ -0,0 +1,17 @@
DROP TABLE IF EXISTS numbers_indexed;
DROP TABLE IF EXISTS squares;
CREATE TABLE numbers_indexed Engine=MergeTree ORDER BY number PARTITION BY bitShiftRight(number,8) SETTINGS index_granularity=8 AS SELECT * FROM numbers(16384);
CREATE VIEW squares AS WITH number*2 AS square_number SELECT number, square_number FROM numbers_indexed;
SET max_rows_to_read=8, read_overflow_mode='throw';
WITH number * 2 AS square_number SELECT number, square_number FROM numbers_indexed WHERE number = 999;
SELECT * FROM squares WHERE number = 999;
EXPLAIN SYNTAX SELECT number, square_number FROM ( WITH number * 2 AS square_number SELECT number, square_number FROM numbers_indexed) AS squares WHERE number = 999;
DROP TABLE IF EXISTS squares;
DROP TABLE IF EXISTS numbers_indexed;

View File

@ -6,7 +6,7 @@ trap 'kill -9 $(jobs -p)' EXIT
function thread()
{
while true; do
./clickhouse-test --order random 2>&1 | awk '/^\w+:/ { printf("\033[0;%s%sm \033[0m", ('$1' % 2 ? "4" : "10"), (int('$1' / 2) % 8)) }'
./clickhouse-test --client-option="query-fuzzer-runs=10" --order random 2>&1 | awk '/^\w+:/ { printf("\033[0;%s%sm \033[0m", ('$1' % 2 ? "4" : "10"), (int('$1' / 2) % 8)) }'
done
}