From 0f7f8ace7388fd6aa700d21fbc946d48cc8eae43 Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Tue, 26 Jan 2021 01:39:23 +0300 Subject: [PATCH 001/149] DOCSUP-5266: Add changes from PR --- .../operations/utilities/clickhouse-local.md | 4 ++ .../functions/date-time-functions.md | 12 ++++-- .../operations/utilities/clickhouse-local.md | 7 +++- .../data-types/simpleaggregatefunction.md | 3 ++ .../functions/date-time-functions.md | 38 +++++++++++++++++++ 5 files changed, 59 insertions(+), 5 deletions(-) diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index 04f9f3660b5..cfabf42bff1 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -91,6 +91,8 @@ $ clickhouse-local --query " Now let’s output memory user for each Unix user: +Query: + ``` bash $ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \ | clickhouse-local --structure "user String, mem Float64" \ @@ -98,6 +100,8 @@ $ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \ FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty" ``` +Result: + ``` text Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec. ┏━━━━━━━━━━┳━━━━━━━━━━┓ diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 9de780fb596..b73d13c59a4 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -661,8 +661,6 @@ Result: └────────────────────────────────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/query_language/functions/date_time_functions/) - ## FROM\_UNIXTIME {#fromunixfime} When there is only single argument of integer type, it act in the same way as `toDateTime` and return [DateTime](../../sql-reference/data-types/datetime.md). @@ -670,10 +668,14 @@ type. For example: +Query: + ```sql -SELECT FROM_UNIXTIME(423543535) +SELECT FROM_UNIXTIME(423543535); ``` +Result: + ```text ┌─FROM_UNIXTIME(423543535)─┐ │ 1983-06-04 10:58:55 │ @@ -685,7 +687,7 @@ When there are two arguments, first is integer or DateTime, second is constant f For example: ```sql -SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime +SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime; ``` ```text @@ -837,3 +839,5 @@ Result: │ 2020-01-01 │ └────────────────────────────────────┘ ``` + +[Original article](https://clickhouse.tech/docs/en/query_language/functions/date_time_functions/) \ No newline at end of file diff --git a/docs/ru/operations/utilities/clickhouse-local.md b/docs/ru/operations/utilities/clickhouse-local.md index 2b5c9b119e2..e3c421ac75e 100644 --- a/docs/ru/operations/utilities/clickhouse-local.md +++ b/docs/ru/operations/utilities/clickhouse-local.md @@ -21,7 +21,8 @@ toc_title: clickhouse-local Основной формат вызова: ``` bash -$ clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" -q "query" +$ clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" \ + --query "query" ``` Ключи команды: @@ -78,6 +79,8 @@ $ clickhouse-local --query " А теперь давайте выведем на экран объём оперативной памяти, занимаемой пользователями (Unix): +Запрос: + ``` bash $ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \ | clickhouse-local --structure "user String, mem Float64" \ @@ -85,6 +88,8 @@ $ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \ FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty" ``` +Ответ: + ``` text Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec. ┏━━━━━━━━━━┳━━━━━━━━━━┓ diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index 52f0412a177..3ff4e5fd662 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -15,6 +15,9 @@ The following aggregate functions are supported: - [`groupBitXor`](../../sql-reference/aggregate-functions/reference/groupbitxor.md#groupbitxor) - [`groupArrayArray`](../../sql-reference/aggregate-functions/reference/grouparray.md#agg_function-grouparray) - [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference/groupuniqarray.md#groupuniqarray) +- [`sumMap`](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap) +- [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap) +- [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap) Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes. `SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function. diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 31482cde77f..e923de8ebd2 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -665,4 +665,42 @@ SELECT formatDateTime(toDate('2010-01-04'), '%g') └────────────────────────────────────────────┘ ``` +## FROM\_UNIXTIME {#fromunixfime} + +Когда есть только один аргумент целочисленного типа, он действует так же, как `toDateTime` и возвращает тип [DateTime](../../sql-reference/data-types/datetime.md). + +**Пример** + +Запрос: + +```sql +SELECT FROM_UNIXTIME(423543535); +``` + +Ответ: + +```text +┌─FROM_UNIXTIME(423543535)─┐ +│ 1983-06-04 10:58:55 │ +└──────────────────────────┘ +``` + +В случае, когда есть два аргумента, первый типа `Integer` или `DateTime`, а второй — является строкой постоянного формата, функция работает таким же образом, как `formatdatetime` и возвращает значение типа `String`. + +**Пример** + +Запрос: + +```sql +SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime; +``` + +Ответ: + +```text +┌─DateTime────────────┐ +│ 2009-02-11 14:42:23 │ +└─────────────────────┘ +``` + [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/date_time_functions/) From 097c9362bdad12d3ffbc7a817fc3bfda81a82156 Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Tue, 26 Jan 2021 14:00:52 +0300 Subject: [PATCH 002/149] Update date-time-functions.md --- docs/en/sql-reference/functions/date-time-functions.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index b73d13c59a4..856ce830abe 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -602,7 +602,7 @@ This is necessary for searching for pageviews in the corresponding session. ## formatDateTime {#formatdatetime} -Function formats a Time according given Format string. N.B.: Format is a constant expression, e.g. you can not have multiple formats for single result column. +Function formats a Time according to the given Format string. N.B.: Format is a constant expression, e.g. you cannot have multiple formats for a single result column. **Syntax** @@ -663,7 +663,7 @@ Result: ## FROM\_UNIXTIME {#fromunixfime} -When there is only single argument of integer type, it act in the same way as `toDateTime` and return [DateTime](../../sql-reference/data-types/datetime.md). +When there is only a single argument of integer type, it acts in the same way as `toDateTime` and return [DateTime](../../sql-reference/data-types/datetime.md). type. For example: @@ -682,7 +682,7 @@ Result: └──────────────────────────┘ ``` -When there are two arguments, first is integer or DateTime, second is constant format string, it act in the same way as `formatDateTime` and return `String` type. +When there are two arguments: first is an integer or DateTime, second is a constant format string - it acts in the same way as `formatDateTime` and return `String` type. For example: @@ -840,4 +840,4 @@ Result: └────────────────────────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/query_language/functions/date_time_functions/) \ No newline at end of file +[Original article](https://clickhouse.tech/docs/en/query_language/functions/date_time_functions/) From 1834c5ccae9da4b456544dbfa22d01f16ad0393f Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Tue, 26 Jan 2021 14:04:39 +0300 Subject: [PATCH 003/149] Update date-time-functions.md --- docs/ru/sql-reference/functions/date-time-functions.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index e923de8ebd2..4db244d2388 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -665,9 +665,9 @@ SELECT formatDateTime(toDate('2010-01-04'), '%g') └────────────────────────────────────────────┘ ``` -## FROM\_UNIXTIME {#fromunixfime} +## FROM\_UNIXTIME {#fromunixtime} -Когда есть только один аргумент целочисленного типа, он действует так же, как `toDateTime` и возвращает тип [DateTime](../../sql-reference/data-types/datetime.md). +Когда указан только один аргумент целочисленного типа, то функция действует так же, как `toDateTime`, и возвращает тип [DateTime](../../sql-reference/data-types/datetime.md). **Пример** @@ -685,7 +685,7 @@ SELECT FROM_UNIXTIME(423543535); └──────────────────────────┘ ``` -В случае, когда есть два аргумента, первый типа `Integer` или `DateTime`, а второй — является строкой постоянного формата, функция работает таким же образом, как `formatdatetime` и возвращает значение типа `String`. +В случае, когда есть два аргумента: первый типа `Integer` или `DateTime`, а второй является строкой постоянного формата — функция работает таким же образом, как `formatDateTime`, и возвращает значение типа `String`. **Пример** From 04531f14d9fb55c3eca1ac23070262d200828d60 Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Tue, 26 Jan 2021 14:06:08 +0300 Subject: [PATCH 004/149] Fix hyphen --- docs/en/sql-reference/functions/date-time-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 856ce830abe..f11bec55697 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -682,7 +682,7 @@ Result: └──────────────────────────┘ ``` -When there are two arguments: first is an integer or DateTime, second is a constant format string - it acts in the same way as `formatDateTime` and return `String` type. +When there are two arguments: first is an integer or DateTime, second is a constant format string — it acts in the same way as `formatDateTime` and return `String` type. For example: From 1f22ba4bbb384c72f6fc57538c7ebb13dacd73ca Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Wed, 27 Jan 2021 12:35:08 +0300 Subject: [PATCH 005/149] DOCSUP-5266: fix PR and ticket comments --- .../data-types/simpleaggregatefunction.md | 6 +++++- .../data-types/simpleaggregatefunction.md | 21 ++++++++++++------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md index 2d2746f85d3..015972d7dbe 100644 --- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md @@ -21,7 +21,11 @@ The following aggregate functions are supported: - [`argMin`](../../sql-reference/aggregate-functions/reference/argmin.md) - [`argMax`](../../sql-reference/aggregate-functions/reference/argmax.md) -Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes. `SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function. + +!!! note "Note" + Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes. + + `SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function. **Parameters** diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index 3ff4e5fd662..84e20877866 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -1,8 +1,9 @@ # SimpleAggregateFunction {#data-type-simpleaggregatefunction} -`SimpleAggregateFunction(name, types_of_arguments…)` data type stores current value of the aggregate function, and does not store its full state as [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) does. This optimization can be applied to functions for which the following property holds: the result of applying a function `f` to a row set `S1 UNION ALL S2` can be obtained by applying `f` to parts of the row set separately, and then again applying `f` to the results: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. This property guarantees that partial aggregation results are enough to compute the combined one, so we don’t have to store and process any extra data. +Тип данных `SimpleAggregateFunction(name, types_of_arguments…)` хранит только текущее значение агрегатной функции и не сохраняет ее полное состояние, как это делает [`AggregateFunction`] (../../sql-reference/data-types/aggregatefunction.md). Такая оптимизация может быть применена к функциям, для которых выполняется следующее свойство: результат выполнения функции `f` к набору строк `S1 UNION ALL S2` может быть получен путем выполнения `f` к отдельным частям набора строк, +а затем повторного выполнения `f` к результатам: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. Это свойство гарантирует, что результатов частичной агрегации достаточно для вычисления комбинированной, поэтому нам не нужно хранить и обрабатывать какие-либо дополнительные данные. -The following aggregate functions are supported: +Поддерживаются следующие агрегатные функции: - [`any`](../../sql-reference/aggregate-functions/reference/any.md#agg_function-any) - [`anyLast`](../../sql-reference/aggregate-functions/reference/anylast.md#anylastx) @@ -19,14 +20,18 @@ The following aggregate functions are supported: - [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap) - [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap) -Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes. `SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function. +!!! note "Примечание" + Значения `SimpleAggregateFunction(func, Type)` отображаются и хранятся так же, как и `Type`, поэтому вам не требуется применять функции с суффиксами `-Merge`/`-State`. + + `SimpleAggregateFunction` имеет лучшую производительность, чем `AggregateFunction` с той же агрегатной функцией. -**Parameters** -- Name of the aggregate function. -- Types of the aggregate function arguments. +**Параметры** -**Example** +- имя агрегатной функции. +- типы аргументов агрегатной функции. + +**Пример** ``` sql CREATE TABLE t @@ -36,4 +41,4 @@ CREATE TABLE t ) ENGINE = ... ``` -[Original article](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) +[Оригинальная статья](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) From 68119d78680b0e6dc181caf81eb8e7724ce8c535 Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Wed, 27 Jan 2021 12:50:49 +0300 Subject: [PATCH 006/149] DOCSUP-5266: fix PR and ticket comments --- docs/ru/sql-reference/data-types/simpleaggregatefunction.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index 84e20877866..c1b3ac240f0 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -1,6 +1,6 @@ # SimpleAggregateFunction {#data-type-simpleaggregatefunction} -Тип данных `SimpleAggregateFunction(name, types_of_arguments…)` хранит только текущее значение агрегатной функции и не сохраняет ее полное состояние, как это делает [`AggregateFunction`] (../../sql-reference/data-types/aggregatefunction.md). Такая оптимизация может быть применена к функциям, для которых выполняется следующее свойство: результат выполнения функции `f` к набору строк `S1 UNION ALL S2` может быть получен путем выполнения `f` к отдельным частям набора строк, +Тип данных `SimpleAggregateFunction(name, types_of_arguments…)` хранит только текущее значение агрегатной функции и не сохраняет ее полное состояние, как это делает [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md). Такая оптимизация может быть применена к функциям, для которых выполняется следующее свойство: результат выполнения функции `f` к набору строк `S1 UNION ALL S2` может быть получен путем выполнения `f` к отдельным частям набора строк, а затем повторного выполнения `f` к результатам: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. Это свойство гарантирует, что результатов частичной агрегации достаточно для вычисления комбинированной, поэтому нам не нужно хранить и обрабатывать какие-либо дополнительные данные. Поддерживаются следующие агрегатные функции: From ddd828e7847da270d457b0c7e747b96c7a8ad81d Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Sat, 30 Jan 2021 18:46:26 +0300 Subject: [PATCH 007/149] Update docs/en/sql-reference/functions/date-time-functions.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/en/sql-reference/functions/date-time-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index f11bec55697..624e04ca21c 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -602,7 +602,7 @@ This is necessary for searching for pageviews in the corresponding session. ## formatDateTime {#formatdatetime} -Function formats a Time according to the given Format string. N.B.: Format is a constant expression, e.g. you cannot have multiple formats for a single result column. +Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column. **Syntax** From 7a9863194a9310270c8b6f8ebd1d75195f7bae59 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Sat, 30 Jan 2021 18:46:36 +0300 Subject: [PATCH 008/149] Update docs/ru/operations/utilities/clickhouse-local.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/operations/utilities/clickhouse-local.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/utilities/clickhouse-local.md b/docs/ru/operations/utilities/clickhouse-local.md index e3c421ac75e..f439049401c 100644 --- a/docs/ru/operations/utilities/clickhouse-local.md +++ b/docs/ru/operations/utilities/clickhouse-local.md @@ -77,7 +77,7 @@ $ clickhouse-local --query " 1 2 ``` -А теперь давайте выведем на экран объём оперативной памяти, занимаемой пользователями (Unix): +Объём оперативной памяти, занимаемой пользователями (Unix): Запрос: From 9e0d5c4c9819914d682806f1a7e550bff4125d61 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Sat, 30 Jan 2021 18:46:53 +0300 Subject: [PATCH 009/149] Update docs/ru/sql-reference/data-types/simpleaggregatefunction.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/sql-reference/data-types/simpleaggregatefunction.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index c1b3ac240f0..2ca949843b7 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -1,6 +1,6 @@ # SimpleAggregateFunction {#data-type-simpleaggregatefunction} -Тип данных `SimpleAggregateFunction(name, types_of_arguments…)` хранит только текущее значение агрегатной функции и не сохраняет ее полное состояние, как это делает [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md). Такая оптимизация может быть применена к функциям, для которых выполняется следующее свойство: результат выполнения функции `f` к набору строк `S1 UNION ALL S2` может быть получен путем выполнения `f` к отдельным частям набора строк, +Хранит только текущее значение агрегатной функции и не сохраняет ее полное состояние, как это делает [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md). Такая оптимизация может быть применена к функциям, которые обладают следующим свойством: результат выполнения функции `f` к набору строк `S1 UNION ALL S2` может быть получен путем выполнения `f` к отдельным частям набора строк, а затем повторного выполнения `f` к результатам: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. Это свойство гарантирует, что результатов частичной агрегации достаточно для вычисления комбинированной, поэтому нам не нужно хранить и обрабатывать какие-либо дополнительные данные. Поддерживаются следующие агрегатные функции: From 320e78dea614311bd8fcd7451906be1c90f71538 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Sat, 30 Jan 2021 18:47:15 +0300 Subject: [PATCH 010/149] Update docs/ru/sql-reference/data-types/simpleaggregatefunction.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/sql-reference/data-types/simpleaggregatefunction.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index 2ca949843b7..cb6c4b8208d 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -21,7 +21,7 @@ - [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap) !!! note "Примечание" - Значения `SimpleAggregateFunction(func, Type)` отображаются и хранятся так же, как и `Type`, поэтому вам не требуется применять функции с суффиксами `-Merge`/`-State`. + Значения `SimpleAggregateFunction(func, Type)` отображаются и хранятся так же, как и `Type`, поэтому комбинаторы [-Merge](../../sql-reference/aggregate-functions/combinators.md#aggregate_functions_combinators-merge) и [-State]((../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state) не требуются. `SimpleAggregateFunction` имеет лучшую производительность, чем `AggregateFunction` с той же агрегатной функцией. From 7126ca376995fa58eb3f07a2c55ba4a5cd88a11f Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Sat, 30 Jan 2021 18:47:25 +0300 Subject: [PATCH 011/149] Update docs/ru/sql-reference/data-types/simpleaggregatefunction.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/sql-reference/data-types/simpleaggregatefunction.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index cb6c4b8208d..b906a56516f 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -29,7 +29,7 @@ **Параметры** - имя агрегатной функции. -- типы аргументов агрегатной функции. +- `type` — типы аргументов агрегатной функции. **Пример** From ac0ec2753c9a3021b3efaee1b7dbc4898242942f Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Sat, 30 Jan 2021 18:47:33 +0300 Subject: [PATCH 012/149] Update docs/ru/sql-reference/data-types/simpleaggregatefunction.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/sql-reference/data-types/simpleaggregatefunction.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index b906a56516f..bf866f7bc58 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -1,4 +1,4 @@ -# SimpleAggregateFunction {#data-type-simpleaggregatefunction} +# SimpleAggregateFunction(func, type) {#data-type-simpleaggregatefunction} Хранит только текущее значение агрегатной функции и не сохраняет ее полное состояние, как это делает [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md). Такая оптимизация может быть применена к функциям, которые обладают следующим свойством: результат выполнения функции `f` к набору строк `S1 UNION ALL S2` может быть получен путем выполнения `f` к отдельным частям набора строк, а затем повторного выполнения `f` к результатам: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. Это свойство гарантирует, что результатов частичной агрегации достаточно для вычисления комбинированной, поэтому нам не нужно хранить и обрабатывать какие-либо дополнительные данные. From 004b9dd09823c729a800310c8449f56ad28bb51a Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Sat, 30 Jan 2021 18:47:41 +0300 Subject: [PATCH 013/149] Update docs/ru/sql-reference/data-types/simpleaggregatefunction.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/sql-reference/data-types/simpleaggregatefunction.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index bf866f7bc58..39f3ef99b1c 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -28,7 +28,7 @@ **Параметры** -- имя агрегатной функции. +- `func` — имя агрегатной функции. - `type` — типы аргументов агрегатной функции. **Пример** From ebcee0525d24464222534c002632589b9d1ad318 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Sat, 30 Jan 2021 18:47:50 +0300 Subject: [PATCH 014/149] Update docs/ru/sql-reference/data-types/simpleaggregatefunction.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/sql-reference/data-types/simpleaggregatefunction.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index 39f3ef99b1c..10daad93cc6 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -1,7 +1,7 @@ # SimpleAggregateFunction(func, type) {#data-type-simpleaggregatefunction} Хранит только текущее значение агрегатной функции и не сохраняет ее полное состояние, как это делает [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md). Такая оптимизация может быть применена к функциям, которые обладают следующим свойством: результат выполнения функции `f` к набору строк `S1 UNION ALL S2` может быть получен путем выполнения `f` к отдельным частям набора строк, -а затем повторного выполнения `f` к результатам: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. Это свойство гарантирует, что результатов частичной агрегации достаточно для вычисления комбинированной, поэтому нам не нужно хранить и обрабатывать какие-либо дополнительные данные. +а затем повторного выполнения `f` к результатам: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. Это свойство гарантирует, что результатов частичной агрегации достаточно для вычисления комбинированной, поэтому хранить и обрабатывать какие-либо дополнительные данные не требуется. Поддерживаются следующие агрегатные функции: From a937bf26a137544e8c6bfcbce4077c999af0a0ef Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Mon, 1 Feb 2021 21:11:47 +0300 Subject: [PATCH 015/149] DOCSUP-5266: Fix ticket comments. --- .../data-types/simpleaggregatefunction.md | 2 +- .../functions/date-time-functions.md | 7 ++-- .../operations/utilities/clickhouse-local.md | 2 +- .../data-types/simpleaggregatefunction.md | 2 +- .../functions/date-time-functions.md | 35 ++++++++++++++----- 5 files changed, 33 insertions(+), 15 deletions(-) diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md index 015972d7dbe..155a7e1f858 100644 --- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md @@ -32,7 +32,7 @@ The following aggregate functions are supported: - Name of the aggregate function. - Types of the aggregate function arguments. -**Example** +**Syntax** ``` sql CREATE TABLE t diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 624e04ca21c..c995ce32cd4 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -663,10 +663,9 @@ Result: ## FROM\_UNIXTIME {#fromunixfime} -When there is only a single argument of integer type, it acts in the same way as `toDateTime` and return [DateTime](../../sql-reference/data-types/datetime.md). -type. +Function converts Unix timestamp to date. When there is only a single argument of integer type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type. -For example: +**Example:** Query: @@ -682,7 +681,7 @@ Result: └──────────────────────────┘ ``` -When there are two arguments: first is an integer or DateTime, second is a constant format string — it acts in the same way as `formatDateTime` and return `String` type. +When there are two arguments: first is an [Integer](../../sql-reference/data-types/int-uint.md/#int-ranges) or [DateTime](../../sql-reference/data-types/datetime.md), second is a constant format string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type. For example: diff --git a/docs/ru/operations/utilities/clickhouse-local.md b/docs/ru/operations/utilities/clickhouse-local.md index f439049401c..8ecbbfcce8c 100644 --- a/docs/ru/operations/utilities/clickhouse-local.md +++ b/docs/ru/operations/utilities/clickhouse-local.md @@ -88,7 +88,7 @@ $ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \ FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty" ``` -Ответ: +Результат: ``` text Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec. diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index 10daad93cc6..9605706442e 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -31,7 +31,7 @@ - `func` — имя агрегатной функции. - `type` — типы аргументов агрегатной функции. -**Пример** +**Синтаксис** ``` sql CREATE TABLE t diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 4db244d2388..bc35589363f 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -305,7 +305,9 @@ WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(d Переводит дату-с-временем или дату в число типа UInt16, содержащее номер ISO года. ISO год отличается от обычного года, потому что в соответствии с [ISO 8601:1988](https://en.wikipedia.org/wiki/ISO_8601) ISO год начинается необязательно первого января. -Пример: +**Пример:** + +Запрос: ```sql SELECT @@ -313,6 +315,9 @@ SELECT toYear(date), toISOYear(date) ``` + +Результат: + ```text ┌───────date─┬─toYear(toDate('2017-01-01'))─┬─toISOYear(toDate('2017-01-01'))─┐ │ 2017-01-01 │ 2017 │ 2016 │ @@ -326,12 +331,18 @@ SELECT 1 Января 2017 г. - воскресение, т.е. первая ISO неделя 2017 года началась в понедельник 2 января, поэтому 1 января 2017 это последняя неделя 2016 года. +**Пример** + +Запрос: + ```sql SELECT toISOWeek(toDate('2017-01-01')) AS ISOWeek20170101, toISOWeek(toDate('2017-01-02')) AS ISOWeek20170102 ``` +Результат: + ```text ┌─ISOWeek20170101─┬─ISOWeek20170102─┐ │ 52 │ 1 │ @@ -368,10 +379,14 @@ SELECT **Пример** +Запрос: + ```sql SELECT toDate('2016-12-27') AS date, toWeek(date) AS week0, toWeek(date,1) AS week1, toWeek(date,9) AS week9; ``` +Результат: + ```text ┌───────date─┬─week0─┬─week1─┬─week9─┐ │ 2016-12-27 │ 52 │ 52 │ 1 │ @@ -387,10 +402,14 @@ SELECT toDate('2016-12-27') AS date, toWeek(date) AS week0, toWeek(date,1) AS we **Пример** +Запрос: + ```sql SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9; ``` +Результат: + ```text ┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┐ │ 2016-12-27 │ 201652 │ 201652 │ 201701 │ @@ -573,7 +592,7 @@ dateDiff('unit', startdate, enddate, [timezone]) SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); ``` -Ответ: +Результат: ``` text ┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐ @@ -654,10 +673,10 @@ formatDateTime(Time, Format\[, Timezone\]) Запрос: ``` sql -SELECT formatDateTime(toDate('2010-01-04'), '%g') +SELECT formatDateTime(toDate('2010-01-04'), '%g'); ``` -Ответ: +Результат: ``` ┌─formatDateTime(toDate('2010-01-04'), '%g')─┐ @@ -667,7 +686,7 @@ SELECT formatDateTime(toDate('2010-01-04'), '%g') ## FROM\_UNIXTIME {#fromunixtime} -Когда указан только один аргумент целочисленного типа, то функция действует так же, как `toDateTime`, и возвращает тип [DateTime](../../sql-reference/data-types/datetime.md). +Функция преобразует метку времени Unix в дату. Если указан только один аргумент типа [Integer](../../sql-reference/data-types/int-uint.md/#int-ranges), то функция действует так же, как [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime), и возвращает тип [DateTime](../../sql-reference/data-types/datetime.md). **Пример** @@ -677,7 +696,7 @@ SELECT formatDateTime(toDate('2010-01-04'), '%g') SELECT FROM_UNIXTIME(423543535); ``` -Ответ: +Результат: ```text ┌─FROM_UNIXTIME(423543535)─┐ @@ -685,7 +704,7 @@ SELECT FROM_UNIXTIME(423543535); └──────────────────────────┘ ``` -В случае, когда есть два аргумента: первый типа `Integer` или `DateTime`, а второй является строкой постоянного формата — функция работает таким же образом, как `formatDateTime`, и возвращает значение типа `String`. +В случае, когда есть два аргумента: первый типа [Integer](../../sql-reference/data-types/int-uint.md/#int-ranges) или [DateTime](../../sql-reference/data-types/datetime.md), а второй является строкой постоянного формата — функция работает таким же образом, как [formatDateTime](#formatdatetime), и возвращает значение типа [String](../../sql-reference/data-types/string.md#string). **Пример** @@ -695,7 +714,7 @@ SELECT FROM_UNIXTIME(423543535); SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime; ``` -Ответ: +Результат: ```text ┌─DateTime────────────┐ From f58ae0ffa15f53c0249ba9c349977475d79f8433 Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Mon, 1 Feb 2021 21:27:13 +0300 Subject: [PATCH 016/149] DOCSUP-5266: Fix ticket comments. --- docs/en/sql-reference/functions/date-time-functions.md | 2 +- docs/ru/sql-reference/functions/date-time-functions.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index c995ce32cd4..0ac1d325fbc 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -681,7 +681,7 @@ Result: └──────────────────────────┘ ``` -When there are two arguments: first is an [Integer](../../sql-reference/data-types/int-uint.md/#int-ranges) or [DateTime](../../sql-reference/data-types/datetime.md), second is a constant format string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type. +When there are two arguments: first is an [Integer](../../sql-reference/data-types/int-uint.md#int-ranges) or [DateTime](../../sql-reference/data-types/datetime.md), second is a constant format string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type. For example: diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index bc35589363f..a822c4f9778 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -686,7 +686,7 @@ SELECT formatDateTime(toDate('2010-01-04'), '%g'); ## FROM\_UNIXTIME {#fromunixtime} -Функция преобразует метку времени Unix в дату. Если указан только один аргумент типа [Integer](../../sql-reference/data-types/int-uint.md/#int-ranges), то функция действует так же, как [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime), и возвращает тип [DateTime](../../sql-reference/data-types/datetime.md). +Функция преобразует метку времени Unix в дату. Если указан только один аргумент типа [Integer](../../sql-reference/data-types/int-uint.md#int-ranges), то функция действует так же, как [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime), и возвращает тип [DateTime](../../sql-reference/data-types/datetime.md). **Пример** @@ -704,7 +704,7 @@ SELECT FROM_UNIXTIME(423543535); └──────────────────────────┘ ``` -В случае, когда есть два аргумента: первый типа [Integer](../../sql-reference/data-types/int-uint.md/#int-ranges) или [DateTime](../../sql-reference/data-types/datetime.md), а второй является строкой постоянного формата — функция работает таким же образом, как [formatDateTime](#formatdatetime), и возвращает значение типа [String](../../sql-reference/data-types/string.md#string). +В случае, когда есть два аргумента: первый типа [Integer](../../sql-reference/data-types/int-uint.md#int-ranges) или [DateTime](../../sql-reference/data-types/datetime.md), а второй является строкой постоянного формата — функция работает таким же образом, как [formatDateTime](#formatdatetime), и возвращает значение типа [String](../../sql-reference/data-types/string.md#string). **Пример** From 23914860b07ea5d4ebfe7b639fff5999c78afd3c Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Mon, 1 Feb 2021 21:43:38 +0300 Subject: [PATCH 017/149] DOCSUP-5266: Fix ticket comments. --- docs/en/sql-reference/functions/date-time-functions.md | 4 ++-- docs/ru/sql-reference/functions/date-time-functions.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 0ac1d325fbc..ce2092a7818 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -663,7 +663,7 @@ Result: ## FROM\_UNIXTIME {#fromunixfime} -Function converts Unix timestamp to date. When there is only a single argument of integer type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type. +Function converts Unix timestamp to date. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type. **Example:** @@ -681,7 +681,7 @@ Result: └──────────────────────────┘ ``` -When there are two arguments: first is an [Integer](../../sql-reference/data-types/int-uint.md#int-ranges) or [DateTime](../../sql-reference/data-types/datetime.md), second is a constant format string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type. +When there are two arguments: first is an [Integer](../../sql-reference/data-types/int-uint.md) or [DateTime](../../sql-reference/data-types/datetime.md), second is a constant format string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type. For example: diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index a822c4f9778..b23862ccce2 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -686,7 +686,7 @@ SELECT formatDateTime(toDate('2010-01-04'), '%g'); ## FROM\_UNIXTIME {#fromunixtime} -Функция преобразует метку времени Unix в дату. Если указан только один аргумент типа [Integer](../../sql-reference/data-types/int-uint.md#int-ranges), то функция действует так же, как [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime), и возвращает тип [DateTime](../../sql-reference/data-types/datetime.md). +Функция преобразует метку времени Unix в дату. Если указан только один аргумент типа [Integer](../../sql-reference/data-types/int-uint.md), то функция действует так же, как [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime), и возвращает тип [DateTime](../../sql-reference/data-types/datetime.md). **Пример** @@ -704,7 +704,7 @@ SELECT FROM_UNIXTIME(423543535); └──────────────────────────┘ ``` -В случае, когда есть два аргумента: первый типа [Integer](../../sql-reference/data-types/int-uint.md#int-ranges) или [DateTime](../../sql-reference/data-types/datetime.md), а второй является строкой постоянного формата — функция работает таким же образом, как [formatDateTime](#formatdatetime), и возвращает значение типа [String](../../sql-reference/data-types/string.md#string). +В случае, когда есть два аргумента: первый типа [Integer](../../sql-reference/data-types/int-uint.md) или [DateTime](../../sql-reference/data-types/datetime.md), а второй является строкой постоянного формата — функция работает таким же образом, как [formatDateTime](#formatdatetime), и возвращает значение типа [String](../../sql-reference/data-types/string.md#string). **Пример** From 1bd80f6c521432c916d08c9f4d91bc3c45cd0589 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Thu, 4 Feb 2021 23:59:00 +0300 Subject: [PATCH 018/149] Update docs/ru/sql-reference/functions/date-time-functions.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/sql-reference/functions/date-time-functions.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index b23862ccce2..aa03874d54f 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -688,7 +688,9 @@ SELECT formatDateTime(toDate('2010-01-04'), '%g'); Функция преобразует метку времени Unix в дату. Если указан только один аргумент типа [Integer](../../sql-reference/data-types/int-uint.md), то функция действует так же, как [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime), и возвращает тип [DateTime](../../sql-reference/data-types/datetime.md). -**Пример** +**Примеры** + +Если указан только один аргумент типа [Integer](../../sql-reference/data-types/int-uint.md), то функция действует так же, как [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime), и возвращает тип [DateTime](../../sql-reference/data-types/datetime.md). Запрос: From c1328a963885058eec375f527500c40f5b121973 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Thu, 4 Feb 2021 23:59:18 +0300 Subject: [PATCH 019/149] Update docs/ru/sql-reference/functions/date-time-functions.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/sql-reference/functions/date-time-functions.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index aa03874d54f..14c7ebc7ae9 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -708,7 +708,6 @@ SELECT FROM_UNIXTIME(423543535); В случае, когда есть два аргумента: первый типа [Integer](../../sql-reference/data-types/int-uint.md) или [DateTime](../../sql-reference/data-types/datetime.md), а второй является строкой постоянного формата — функция работает таким же образом, как [formatDateTime](#formatdatetime), и возвращает значение типа [String](../../sql-reference/data-types/string.md#string). -**Пример** Запрос: From d7098e56782187e7740fadaca93304ca2eb6310e Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Fri, 5 Feb 2021 00:00:04 +0300 Subject: [PATCH 020/149] Update docs/ru/sql-reference/functions/date-time-functions.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/sql-reference/functions/date-time-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 14c7ebc7ae9..0acb9e3cd39 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -686,7 +686,7 @@ SELECT formatDateTime(toDate('2010-01-04'), '%g'); ## FROM\_UNIXTIME {#fromunixtime} -Функция преобразует метку времени Unix в дату. Если указан только один аргумент типа [Integer](../../sql-reference/data-types/int-uint.md), то функция действует так же, как [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime), и возвращает тип [DateTime](../../sql-reference/data-types/datetime.md). +Функция преобразует метку времени Unix в дату. **Примеры** From 0fbb3473079e171d3d9903c06b326e5cc9d84627 Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Fri, 5 Feb 2021 00:39:14 +0300 Subject: [PATCH 021/149] DOCSUP-5266: Fix ticket comments. --- .../data-types/simpleaggregatefunction.md | 20 +++++++++++++----- .../data-types/simpleaggregatefunction.md | 21 +++++++++++++------ 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md index 155a7e1f858..9ea5a586981 100644 --- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md @@ -32,14 +32,24 @@ The following aggregate functions are supported: - Name of the aggregate function. - Types of the aggregate function arguments. -**Syntax** +**Example** + +Query: ``` sql -CREATE TABLE t +CREATE TABLE simple (id UInt64,val SimpleAggregateFunction(sum,Double)) ENGINE=AggregatingMergeTree ORDER BY id; +``` + +Result: + +``` text +CREATE TABLE simple ( - column1 SimpleAggregateFunction(sum, UInt64), - column2 SimpleAggregateFunction(any, String) -) ENGINE = ... + `id` UInt64, + `val` SimpleAggregateFunction(sum, Double) +) +ENGINE = AggregatingMergeTree +ORDER BY id ``` [Original article](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index 9605706442e..7441ceae655 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -25,20 +25,29 @@ `SimpleAggregateFunction` имеет лучшую производительность, чем `AggregateFunction` с той же агрегатной функцией. - **Параметры** - `func` — имя агрегатной функции. - `type` — типы аргументов агрегатной функции. -**Синтаксис** +**Пример** + +Запрос: ``` sql -CREATE TABLE t +CREATE TABLE simple (id UInt64,val SimpleAggregateFunction(sum,Double)) ENGINE=AggregatingMergeTree ORDER BY id; +``` + +Ответ: + +``` text +CREATE TABLE simple ( - column1 SimpleAggregateFunction(sum, UInt64), - column2 SimpleAggregateFunction(any, String) -) ENGINE = ... + `id` UInt64, + `val` SimpleAggregateFunction(sum, Double) +) +ENGINE = AggregatingMergeTree +ORDER BY id ``` [Оригинальная статья](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) From 610798aa487ee1b2ef6007b9185a1c1b27a11660 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Sat, 6 Feb 2021 15:32:49 +0800 Subject: [PATCH 022/149] fix the toMinute bug which will cause toDateTime or toString printing wrong time --- base/common/DateLUTImpl.h | 10 +++- src/Functions/ya.make | 1 + .../0_stateless/01698_fix_toMinute.reference | 24 ++++++++ .../0_stateless/01698_fix_toMinute.sql | 16 +++++ .../01699_timezoneOffset.reference | 58 +++++-------------- .../0_stateless/01699_timezoneOffset.sql | 3 +- 6 files changed, 65 insertions(+), 47 deletions(-) create mode 100644 tests/queries/0_stateless/01698_fix_toMinute.reference create mode 100644 tests/queries/0_stateless/01698_fix_toMinute.sql diff --git a/base/common/DateLUTImpl.h b/base/common/DateLUTImpl.h index 23c78f6e7fc..8991f69d3f3 100644 --- a/base/common/DateLUTImpl.h +++ b/base/common/DateLUTImpl.h @@ -317,8 +317,14 @@ public: if (offset_is_whole_number_of_hours_everytime) return (t / 60) % 60; - UInt32 date = find(t).date; - return (UInt32(t) - date) / 60 % 60; + /// To consider the DST changing situation within this day. + /// also make the special timezones with no whole hour offset such as 'Australia/Lord_Howe' been taken into account + DayNum index = findIndex(t); + time_t res = t - lut[index].date; + if (lut[index].amount_of_offset_change != 0 && t >= lut[index].date + lut[index].time_at_offset_change) + res += lut[index].amount_of_offset_change; + + return res / 60 % 60; } inline time_t toStartOfMinute(time_t t) const { return t / 60 * 60; } diff --git a/src/Functions/ya.make b/src/Functions/ya.make index b97a4a187e9..647bbbb47cb 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -452,6 +452,7 @@ SRCS( timeSlot.cpp timeSlots.cpp timezone.cpp + timezoneOffset.cpp toColumnTypeName.cpp toCustomWeek.cpp toDayOfMonth.cpp diff --git a/tests/queries/0_stateless/01698_fix_toMinute.reference b/tests/queries/0_stateless/01698_fix_toMinute.reference new file mode 100644 index 00000000000..5df800c9fef --- /dev/null +++ b/tests/queries/0_stateless/01698_fix_toMinute.reference @@ -0,0 +1,24 @@ +Check the bug causing situation: the special Australia/Lord_Howe time zone. tooDateTime and toString functions are all tested at once +1554559200 2019-04-07 01:00:00 2019-04-07 01:00:00 +1554559800 2019-04-07 01:10:00 2019-04-07 01:10:00 +1554560400 2019-04-07 01:20:00 2019-04-07 01:20:00 +1554561000 2019-04-07 01:30:00 2019-04-07 01:30:00 +1554561600 2019-04-07 01:40:00 2019-04-07 01:40:00 +1554562200 2019-04-07 01:50:00 2019-04-07 01:50:00 +1554562800 2019-04-07 01:30:00 2019-04-07 01:30:00 +1554563400 2019-04-07 01:40:00 2019-04-07 01:40:00 +1554564000 2019-04-07 01:50:00 2019-04-07 01:50:00 +1554564600 2019-04-07 02:00:00 2019-04-07 02:00:00 +1554565200 2019-04-07 02:10:00 2019-04-07 02:10:00 +1554565800 2019-04-07 02:20:00 2019-04-07 02:20:00 +1554566400 2019-04-07 02:30:00 2019-04-07 02:30:00 +1554567000 2019-04-07 02:40:00 2019-04-07 02:40:00 +1554567600 2019-04-07 02:50:00 2019-04-07 02:50:00 +1554568200 2019-04-07 03:00:00 2019-04-07 03:00:00 +1554568800 2019-04-07 03:10:00 2019-04-07 03:10:00 +1554569400 2019-04-07 03:20:00 2019-04-07 03:20:00 +1554570000 2019-04-07 03:30:00 2019-04-07 03:30:00 +1554570600 2019-04-07 03:40:00 2019-04-07 03:40:00 +4 days test in batch comparing with manually computation result for Europe/Moscow whose timezone epoc is of whole hour: +4 days test in batch comparing with manually computation result for Asia/Tehran whose timezone epoc is of half hour: +4 days test in batch comparing with manually computation result for Australia/Lord_Howe whose timezone epoc is of half hour and also its DST offset is half hour: diff --git a/tests/queries/0_stateless/01698_fix_toMinute.sql b/tests/queries/0_stateless/01698_fix_toMinute.sql new file mode 100644 index 00000000000..293741b6957 --- /dev/null +++ b/tests/queries/0_stateless/01698_fix_toMinute.sql @@ -0,0 +1,16 @@ +/* toDateTime or toString or other functions which should call the toMinute() function will all meet this bug. tests below will verify the toDateTime and toString. */ +SELECT 'Check the bug causing situation: the special Australia/Lord_Howe time zone. tooDateTime and toString functions are all tested at once'; +SELECT toUnixTimestamp(x) as tt, (toDateTime('2019-04-07 01:00:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS x, toString(x) as xx FROM numbers(20); + +/* The Batch Part. Test period is whole 4 days*/ +SELECT '4 days test in batch comparing with manually computation result for Europe/Moscow whose timezone epoc is of whole hour:'; +SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-04-01 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; +SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-09-30 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; + +SELECT '4 days test in batch comparing with manually computation result for Asia/Tehran whose timezone epoc is of half hour:'; +SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-03-21 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; +SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-09-20 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; + +SELECT '4 days test in batch comparing with manually computation result for Australia/Lord_Howe whose timezone epoc is of half hour and also its DST offset is half hour:'; +SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-10-04 01:40:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; +SELECT toUnixTimestamp(x) as tt, (toDateTime('2019-04-07 01:00:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; diff --git a/tests/queries/0_stateless/01699_timezoneOffset.reference b/tests/queries/0_stateless/01699_timezoneOffset.reference index e70c5fa62ee..45f30314f5a 100644 --- a/tests/queries/0_stateless/01699_timezoneOffset.reference +++ b/tests/queries/0_stateless/01699_timezoneOffset.reference @@ -50,57 +50,29 @@ DST boundary test for Australia/Lord_Howe. This is a special timezone with DST o DST boundary test for Australia/Lord_Howe: 0 2020-10-04 01:40:00 37800 1601737800 1 2020-10-04 01:50:00 37800 1601738400 -2 2020-10-04 02:00:00 39600 1601739000 -3 2020-10-04 02:10:00 39600 1601739600 +2 2020-10-04 02:30:00 39600 1601739000 +3 2020-10-04 02:40:00 39600 1601739600 0 2019-04-07 01:00:00 39600 1554559200 1 2019-04-07 01:10:00 39600 1554559800 2 2019-04-07 01:20:00 39600 1554560400 3 2019-04-07 01:30:00 39600 1554561000 4 2019-04-07 01:40:00 39600 1554561600 5 2019-04-07 01:50:00 39600 1554562200 -6 2019-04-07 01:00:00 37800 1554562800 -7 2019-04-07 01:10:00 37800 1554563400 -8 2019-04-07 01:20:00 37800 1554564000 -9 2019-04-07 02:30:00 37800 1554564600 -10 2019-04-07 02:40:00 37800 1554565200 -11 2019-04-07 02:50:00 37800 1554565800 -12 2019-04-07 02:00:00 37800 1554566400 -13 2019-04-07 02:10:00 37800 1554567000 -14 2019-04-07 02:20:00 37800 1554567600 -15 2019-04-07 03:30:00 37800 1554568200 -16 2019-04-07 03:40:00 37800 1554568800 -17 2019-04-07 03:50:00 37800 1554569400 +6 2019-04-07 01:30:00 37800 1554562800 +7 2019-04-07 01:40:00 37800 1554563400 +8 2019-04-07 01:50:00 37800 1554564000 +9 2019-04-07 02:00:00 37800 1554564600 +10 2019-04-07 02:10:00 37800 1554565200 +11 2019-04-07 02:20:00 37800 1554565800 +12 2019-04-07 02:30:00 37800 1554566400 +13 2019-04-07 02:40:00 37800 1554567000 +14 2019-04-07 02:50:00 37800 1554567600 +15 2019-04-07 03:00:00 37800 1554568200 +16 2019-04-07 03:10:00 37800 1554568800 +17 2019-04-07 03:20:00 37800 1554569400 4 days test in batch comparing with manually computation result for Europe/Moscow: 4 days test in batch comparing with manually computation result for Asia/Tehran: -The result maybe wrong for toDateTime processing Australia/Lord_Howe -1601739000 2020-10-04 02:00:00 39600 37800 -1601739600 2020-10-04 02:10:00 39600 37800 -1601740200 2020-10-04 02:20:00 39600 37800 -1601740800 2020-10-04 03:30:00 39600 41400 -1601741400 2020-10-04 03:40:00 39600 41400 -1601742000 2020-10-04 03:50:00 39600 41400 -1601742600 2020-10-04 03:00:00 39600 37800 -1601743200 2020-10-04 03:10:00 39600 37800 -1601743800 2020-10-04 03:20:00 39600 37800 -1601744400 2020-10-04 04:30:00 39600 41400 -1601745000 2020-10-04 04:40:00 39600 41400 -1601745600 2020-10-04 04:50:00 39600 41400 -1601746200 2020-10-04 04:00:00 39600 37800 -1601746800 2020-10-04 04:10:00 39600 37800 -1601747400 2020-10-04 04:20:00 39600 37800 -1601748000 2020-10-04 05:30:00 39600 41400 -1554562800 2019-04-07 01:00:00 37800 36000 -1554563400 2019-04-07 01:10:00 37800 36000 -1554564000 2019-04-07 01:20:00 37800 36000 -1554564600 2019-04-07 02:30:00 37800 39600 -1554565200 2019-04-07 02:40:00 37800 39600 -1554565800 2019-04-07 02:50:00 37800 39600 -1554566400 2019-04-07 02:00:00 37800 36000 -1554567000 2019-04-07 02:10:00 37800 36000 -1554567600 2019-04-07 02:20:00 37800 36000 -1554568200 2019-04-07 03:30:00 37800 39600 -1554568800 2019-04-07 03:40:00 37800 39600 -1554569400 2019-04-07 03:50:00 37800 39600 +4 days test in batch comparing with manually computation result for Australia/Lord_Howe Moscow DST Years: 11 1981-06-01 00:00:00 14400 12 1982-06-01 00:00:00 14400 diff --git a/tests/queries/0_stateless/01699_timezoneOffset.sql b/tests/queries/0_stateless/01699_timezoneOffset.sql index 1b3f05ecdd7..8cabb23c4de 100644 --- a/tests/queries/0_stateless/01699_timezoneOffset.sql +++ b/tests/queries/0_stateless/01699_timezoneOffset.sql @@ -26,8 +26,7 @@ SELECT '4 days test in batch comparing with manually computation result for Asia SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-03-21 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-09-20 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; -/* During this test we got unexpected result comes from the toDateTime() function when process the special time zone of 'Australia/Lord_Howe', which may be some kind of bugs. */ -SELECT 'The result maybe wrong for toDateTime processing Australia/Lord_Howe'; +SELECT '4 days test in batch comparing with manually computation result for Australia/Lord_Howe'; SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-10-04 01:40:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(18) where res != calc; SELECT toUnixTimestamp(x) as tt, (toDateTime('2019-04-07 01:00:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(18) where res != calc; From 740c1c72e6eed901e56d7256f1067304e265dcf9 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Sat, 6 Feb 2021 16:55:46 +0800 Subject: [PATCH 023/149] little fix --- tests/queries/0_stateless/01698_fix_toMinute.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01698_fix_toMinute.sql b/tests/queries/0_stateless/01698_fix_toMinute.sql index 293741b6957..f582806719d 100644 --- a/tests/queries/0_stateless/01698_fix_toMinute.sql +++ b/tests/queries/0_stateless/01698_fix_toMinute.sql @@ -1,5 +1,5 @@ /* toDateTime or toString or other functions which should call the toMinute() function will all meet this bug. tests below will verify the toDateTime and toString. */ -SELECT 'Check the bug causing situation: the special Australia/Lord_Howe time zone. tooDateTime and toString functions are all tested at once'; +SELECT 'Check the bug causing situation: the special Australia/Lord_Howe time zone. toDateTime and toString functions are all tested at once'; SELECT toUnixTimestamp(x) as tt, (toDateTime('2019-04-07 01:00:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS x, toString(x) as xx FROM numbers(20); /* The Batch Part. Test period is whole 4 days*/ From 34af94accfc03fb6335aae9b8ca27f6e6992d49d Mon Sep 17 00:00:00 2001 From: keenwolf Date: Sat, 6 Feb 2021 16:59:01 +0800 Subject: [PATCH 024/149] little fix --- tests/queries/0_stateless/01698_fix_toMinute.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01698_fix_toMinute.reference b/tests/queries/0_stateless/01698_fix_toMinute.reference index 5df800c9fef..7675aad3a57 100644 --- a/tests/queries/0_stateless/01698_fix_toMinute.reference +++ b/tests/queries/0_stateless/01698_fix_toMinute.reference @@ -1,4 +1,4 @@ -Check the bug causing situation: the special Australia/Lord_Howe time zone. tooDateTime and toString functions are all tested at once +Check the bug causing situation: the special Australia/Lord_Howe time zone. toDateTime and toString functions are all tested at once 1554559200 2019-04-07 01:00:00 2019-04-07 01:00:00 1554559800 2019-04-07 01:10:00 2019-04-07 01:10:00 1554560400 2019-04-07 01:20:00 2019-04-07 01:20:00 From e252b138420cb9621dbc26aff3ef411d43177161 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Mon, 8 Feb 2021 23:54:28 +0300 Subject: [PATCH 025/149] Update simpleaggregatefunction.md Remove output of creating table example. --- .../data-types/simpleaggregatefunction.md | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index 7441ceae655..b80826803de 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -32,22 +32,8 @@ **Пример** -Запрос: - ``` sql CREATE TABLE simple (id UInt64,val SimpleAggregateFunction(sum,Double)) ENGINE=AggregatingMergeTree ORDER BY id; ``` -Ответ: - -``` text -CREATE TABLE simple -( - `id` UInt64, - `val` SimpleAggregateFunction(sum, Double) -) -ENGINE = AggregatingMergeTree -ORDER BY id -``` - [Оригинальная статья](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) From 436954dc26de1263b9071d530101b9468ac8c2eb Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Mon, 8 Feb 2021 23:54:52 +0300 Subject: [PATCH 026/149] Update simpleaggregatefunction.md --- .../data-types/simpleaggregatefunction.md | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md index 9ea5a586981..e25d4803613 100644 --- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md @@ -34,22 +34,8 @@ The following aggregate functions are supported: **Example** -Query: - ``` sql CREATE TABLE simple (id UInt64,val SimpleAggregateFunction(sum,Double)) ENGINE=AggregatingMergeTree ORDER BY id; ``` -Result: - -``` text -CREATE TABLE simple -( - `id` UInt64, - `val` SimpleAggregateFunction(sum, Double) -) -ENGINE = AggregatingMergeTree -ORDER BY id -``` - [Original article](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) From 94a489ce97eef31f4036759b04d9651f4cd5512e Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Tue, 9 Feb 2021 17:25:04 +0300 Subject: [PATCH 027/149] Update docs/ru/sql-reference/functions/date-time-functions.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/sql-reference/functions/date-time-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 0acb9e3cd39..d019c18a688 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -686,7 +686,7 @@ SELECT formatDateTime(toDate('2010-01-04'), '%g'); ## FROM\_UNIXTIME {#fromunixtime} -Функция преобразует метку времени Unix в дату. +Функция преобразует Unix timestamp в календарную дату и время. **Примеры** From 79a1a5741f723374b41325953c78f927fc4a92a4 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Tue, 9 Feb 2021 17:25:38 +0300 Subject: [PATCH 028/149] Update docs/en/sql-reference/data-types/simpleaggregatefunction.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/en/sql-reference/data-types/simpleaggregatefunction.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md index e25d4803613..244779c5ca8 100644 --- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md @@ -35,7 +35,7 @@ The following aggregate functions are supported: **Example** ``` sql -CREATE TABLE simple (id UInt64,val SimpleAggregateFunction(sum,Double)) ENGINE=AggregatingMergeTree ORDER BY id; +CREATE TABLE simple (id UInt64, val SimpleAggregateFunction(sum, Double)) ENGINE=AggregatingMergeTree ORDER BY id; ``` [Original article](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) From 55727f511df2baa19584f32a7289d4e2ae298add Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Tue, 9 Feb 2021 17:27:39 +0300 Subject: [PATCH 029/149] Update docs/en/sql-reference/functions/date-time-functions.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/en/sql-reference/functions/date-time-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index ce2092a7818..ca62d2a61e5 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -663,7 +663,7 @@ Result: ## FROM\_UNIXTIME {#fromunixfime} -Function converts Unix timestamp to date. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type. +Function converts Unix timestamp to a calendar date and a time of a day. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type. **Example:** From 44e857b5ea3ca2bbf49d3746af1c1941ac3a2f33 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Tue, 9 Feb 2021 17:30:16 +0300 Subject: [PATCH 030/149] Update simpleaggregatefunction.md --- docs/ru/sql-reference/data-types/simpleaggregatefunction.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index b80826803de..7677b64e924 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -33,7 +33,7 @@ **Пример** ``` sql -CREATE TABLE simple (id UInt64,val SimpleAggregateFunction(sum,Double)) ENGINE=AggregatingMergeTree ORDER BY id; +CREATE TABLE simple (id UInt64, val SimpleAggregateFunction(sum, Double)) ENGINE=AggregatingMergeTree ORDER BY id; ``` [Оригинальная статья](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) From 178ada23f811354e47683677ab0c787c6170750e Mon Sep 17 00:00:00 2001 From: George Date: Wed, 10 Feb 2021 15:55:18 +0300 Subject: [PATCH 031/149] early draft --- .../functions/tuple-map-functions.md | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index a46c36395b8..50015cd996e 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -112,4 +112,34 @@ Result: └──────────────────────────────┴───────────────────────────────────┘ ``` +## mapContains {#mapcontains} + +Determines whether `map.keys` contains the `key` parameter. + +**Syntax** + +``` sql +mapContains(map, key) +``` + +**Parameters** + +- `map` — Map. [Type name](relative/path/to/type/dscr.md#type). +- `key` — Key. Type matches the type of `map.keys`. + +**Returned value** + +- `1` if `map.keys` contains `key`, `0` if not. + +Type: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Example** + +Query: + + +## mapKeys {#mapKeys} + +## mapValues {#mapvalues} + [Original article](https://clickhouse.tech/docs/en/query_language/functions/tuple-map-functions/) From 48f6f7e490754880ad179c3568d2c118454d0db9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 10 Feb 2021 19:26:49 +0300 Subject: [PATCH 032/149] Split filter for predicate push down. --- src/Interpreters/ActionsDAG.cpp | 194 +++++++++++++++++++++++++++++++- src/Interpreters/ActionsDAG.h | 9 +- 2 files changed, 201 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 176745c707d..223b4341f46 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -338,7 +339,7 @@ void ActionsDAG::removeUnusedActions(const std::vector & required_nodes) removeUnusedActions(); } -void ActionsDAG::removeUnusedActions() +void ActionsDAG::removeUnusedActions(bool allow_remove_inputs) { std::unordered_set visited_nodes; std::stack stack; @@ -357,6 +358,9 @@ void ActionsDAG::removeUnusedActions() visited_nodes.insert(&node); stack.push(&node); } + + if (node.type == ActionType::INPUT && !allow_remove_inputs) + visited_nodes.insert(&node); } while (!stack.empty()) @@ -1153,4 +1157,192 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsForFilter(const std::string & co return split(split_nodes); } +ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, const Names & available_inputs) +{ + std::unordered_map> inputs_map; + for (const auto & input : inputs) + inputs_map[input->result_name].emplace_back(input); + + std::unordered_set allowed_nodes; + for (const auto & name : available_inputs) + { + auto & inputs_list = inputs_map[name]; + if (inputs_list.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find input {} in ActionsDAG. DAG:\n{}", name, dumpDAG()); + + allowed_nodes.emplace(inputs_list.front()); + inputs_list.pop_front(); + } + + auto it = index.begin(); + for (; it != index.end(); ++it) + if ((*it)->result_name == filter_name) + break; + + if (it == index.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Index for ActionsDAG does not contain filter column name {}. DAG:\n{}", + filter_name, dumpDAG()); + + std::unordered_set selected_predicates; + + { + struct Frame + { + const Node * node; + bool is_predicate = false; + size_t next_child_to_visit = 0; + size_t num_allowed_children = 0; + }; + + std::stack stack; + std::unordered_set visited_nodes; + + stack.push(Frame{.node = *it, .is_predicate = true}); + visited_nodes.insert(*it); + while (!stack.empty()) + { + auto & cur = stack.top(); + bool is_conjunction = cur.is_predicate + && cur.node->type == ActionType::FUNCTION + && cur.node->function_base->getName() == "and"; + + /// At first, visit all children. + while (cur.next_child_to_visit < cur.node->children.size()) + { + auto * child = cur.node->children[cur.next_child_to_visit]; + + if (visited_nodes.count(child) == 0) + { + visited_nodes.insert(child); + stack.push({.node = child, .is_predicate = is_conjunction}); + break; + } + + if (allowed_nodes.contains(child)) + ++cur.num_allowed_children; + ++cur.next_child_to_visit; + } + + if (cur.next_child_to_visit == cur.node->children.size()) + { + if (cur.num_allowed_children == cur.node->children.size()) + { + if (cur.node->type != ActionType::ARRAY_JOIN && cur.node->type != ActionType::INPUT) + allowed_nodes.emplace(cur.node); + } + else if (is_conjunction) + { + for (auto * child : cur.node->children) + if (allowed_nodes.count(child)) + selected_predicates.insert(child); + } + + stack.pop(); + } + } + } + + if (selected_predicates.empty()) + { + if (allowed_nodes.count(*it)) + selected_predicates.insert(*it); + else + return nullptr; + } + + auto actions = cloneEmpty(); + actions->settings.project_input = false; + + std::unordered_map nodes_mapping; + + { + struct Frame + { + const Node * node; + size_t next_child_to_visit = 0; + }; + + std::stack stack; + + for (const auto * predicate : selected_predicates) + { + if (nodes_mapping.count(predicate)) + continue; + + stack.push({.node = predicate}); + while (!stack.empty()) + { + auto & cur = stack.top(); + /// At first, visit all children. + while (cur.next_child_to_visit < cur.node->children.size()) + { + auto * child = cur.node->children[cur.next_child_to_visit]; + + if (nodes_mapping.count(child) == 0) + { + stack.push({.node = child}); + break; + } + + ++cur.next_child_to_visit; + } + + if (cur.next_child_to_visit == cur.node->children.size()) + { + auto & node = actions->nodes.emplace_back(*cur.node); + nodes_mapping[cur.node] = &node; + + for (auto & child : node.children) + child = nodes_mapping[child]; + + if (node.type == ActionType::INPUT) + { + actions->inputs.emplace_back(&node); + actions->index.insert(&node); + } + } + } + } + + Node * result_predicate = nodes_mapping[*selected_predicates.begin()]; + + if (selected_predicates.size() > 1) + { + FunctionOverloadResolverPtr func_builder_and = + std::make_shared( + std::make_unique( + std::make_shared())); + + std::vector args; + args.reserve(selected_predicates.size()); + for (const auto * predicate : selected_predicates) + args.emplace_back(nodes_mapping[predicate]); + + result_predicate = &actions->addFunction(func_builder_and, args, {}, true); + } + + actions->index.insert(result_predicate); + } + + + + /// Replace all predicates which are copied to constants. + /// Note: This also keeps valid const propagation. AND is constant only if all elements are. + /// But if all elements are constant, AND should is moved to split actions and replaced itself. + for (const auto & predicate : selected_predicates) + { + Node node; + node.type = ActionType::COLUMN; + node.result_name = std::move(predicate->result_name); + node.result_type = std::move(predicate->result_type); + node.column = node.result_type->createColumnConst(0, 1); + *predicate = std::move(node); + } + + removeUnusedActions(false); + + return actions; +} + } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index e13a9bd62b3..6fd4e14568a 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -274,6 +274,13 @@ public: /// Index of initial actions must contain column_name. SplitResult splitActionsForFilter(const std::string & column_name) const; + /// Create actions which may calculate part of filter using only available_inputs. + /// If nothing may be calculated, returns nullptr. + /// Otherwise, return actions which inputs are from available_inputs. + /// Returned actions add single column which may be used for filter. + /// Also, replace some nodes of current inputs to constant 1 in case they are filtered. + ActionsDAGPtr splitActionsForFilter(const std::string & filter_name, const Names & available_inputs); + private: Node & addNode(Node node, bool can_replace = false); Node & getNode(const std::string & name); @@ -297,7 +304,7 @@ private: } void removeUnusedActions(const std::vector & required_nodes); - void removeUnusedActions(); + void removeUnusedActions(bool allow_remove_inputs = true); void addAliases(const NamesWithAliases & aliases, std::vector & result_nodes); void compileFunctions(); From a83885392e8233a9b9faa462eea371c71df2c745 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 10 Feb 2021 20:47:48 +0300 Subject: [PATCH 033/149] Split filter for predicate push down. --- src/Interpreters/ActionsDAG.cpp | 117 ++++++++++++++++++++++++++------ src/Interpreters/ActionsDAG.h | 2 +- 2 files changed, 98 insertions(+), 21 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 223b4341f46..eb1ff9ad998 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1157,7 +1157,7 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsForFilter(const std::string & co return split(split_nodes); } -ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, const Names & available_inputs) +ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, bool can_remove_filter, const Names & available_inputs) { std::unordered_map> inputs_map; for (const auto & input : inputs) @@ -1185,6 +1185,7 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, filter_name, dumpDAG()); std::unordered_set selected_predicates; + std::unordered_set other_predicates; { struct Frame @@ -1234,8 +1235,12 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, else if (is_conjunction) { for (auto * child : cur.node->children) + { if (allowed_nodes.count(child)) selected_predicates.insert(child); + else + other_predicates.insert(child); + } } stack.pop(); @@ -1254,6 +1259,11 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, auto actions = cloneEmpty(); actions->settings.project_input = false; + FunctionOverloadResolverPtr func_builder_and = + std::make_shared( + std::make_unique( + std::make_shared())); + std::unordered_map nodes_mapping; { @@ -1309,11 +1319,6 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, if (selected_predicates.size() > 1) { - FunctionOverloadResolverPtr func_builder_and = - std::make_shared( - std::make_unique( - std::make_shared())); - std::vector args; args.reserve(selected_predicates.size()); for (const auto * predicate : selected_predicates) @@ -1325,22 +1330,94 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, actions->index.insert(result_predicate); } - - - /// Replace all predicates which are copied to constants. - /// Note: This also keeps valid const propagation. AND is constant only if all elements are. - /// But if all elements are constant, AND should is moved to split actions and replaced itself. - for (const auto & predicate : selected_predicates) + if (selected_predicates.count(*it)) { - Node node; - node.type = ActionType::COLUMN; - node.result_name = std::move(predicate->result_name); - node.result_type = std::move(predicate->result_type); - node.column = node.result_type->createColumnConst(0, 1); - *predicate = std::move(node); - } + /// The whole predicate was split. + if (can_remove_filter) + { + for (auto i = index.begin(); i != index.end(); ++i) + { + if (*i == *it) + { + index.remove(i); + break; + } + } + } + else + { + Node node; + node.type = ActionType::COLUMN; + node.result_name = std::move((*it)->result_name); + node.result_type = std::move((*it)->result_type); + node.column = node.result_type->createColumnConst(0, 1); + *(*it) = std::move(node); + } - removeUnusedActions(false); + removeUnusedActions(false); + } + else if ((*it)->type == ActionType::FUNCTION && (*it)->function_base->getName() == "and") + { + std::vector new_children(other_predicates.begin(), other_predicates.end()); + + if (new_children.size() == 1) + { + if (new_children.front()->result_type->equals(*((*it)->result_type))) + { + Node node; + node.type = ActionType::ALIAS; + node.result_name = (*it)->result_name; + node.result_type = (*it)->result_type; + node.children.swap(new_children); + *(*it) = std::move(node); + } + else + { + (*it)->children.swap(new_children); + ColumnsWithTypeAndName arguments; + arguments.reserve((*it)->children.size()); + + for (const auto * child : (*it)->children) + { + ColumnWithTypeAndName argument; + argument.column = child->column; + argument.type = child->result_type; + argument.name = child->result_name; + + arguments.emplace_back(std::move(argument)); + } + + FunctionOverloadResolverPtr func_builder_cast = + std::make_shared( + CastOverloadResolver::createImpl(false)); + + (*it)->function_builder = func_builder_cast; + (*it)->function_base = (*it)->function_builder->build(arguments); + (*it)->function = (*it)->function_base->prepare(arguments); + } + } + else + { + (*it)->children.swap(new_children); + ColumnsWithTypeAndName arguments; + arguments.reserve((*it)->children.size()); + + for (const auto * child : (*it)->children) + { + ColumnWithTypeAndName argument; + argument.column = child->column; + argument.type = child->result_type; + argument.name = child->result_name; + + arguments.emplace_back(std::move(argument)); + } + + (*it)->function_base = (*it)->function_builder->build(arguments); + (*it)->function = (*it)->function_base->prepare(arguments); + } + + removeUnusedActions(false); + } return actions; } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 6fd4e14568a..112c507e79f 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -279,7 +279,7 @@ public: /// Otherwise, return actions which inputs are from available_inputs. /// Returned actions add single column which may be used for filter. /// Also, replace some nodes of current inputs to constant 1 in case they are filtered. - ActionsDAGPtr splitActionsForFilter(const std::string & filter_name, const Names & available_inputs); + ActionsDAGPtr splitActionsForFilter(const std::string & filter_name, bool can_remove_filter, const Names & available_inputs); private: Node & addNode(Node node, bool can_replace = false); From b574d8331b2cd6c2cd8dfe7d36ad8257b392db83 Mon Sep 17 00:00:00 2001 From: George Date: Thu, 11 Feb 2021 11:46:31 +0300 Subject: [PATCH 034/149] Updated description --- .../functions/tuple-map-functions.md | 97 ++++++++++++++++++- 1 file changed, 96 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 50015cd996e..d3503937af2 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -137,9 +137,104 @@ Type: [UInt8](../../sql-reference/data-types/int-uint.md). Query: +```sql +CREATE TABLE test (a Map(String,String)) ENGINE = Memory; -## mapKeys {#mapKeys} +INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); + +SELECT mapContains(a, 'name') FROM test; + +``` + +Result: + +```text +┌─mapContains(a, 'name')─┐ +│ 1 │ +│ 0 │ +└────────────────────────┘ +``` + +## mapKeys {#mapkeys} + +Returns all the keys from `map` parameter. + +**Syntax** + +```sql +mapKeys(map) +``` + +**Parameters** + +- `map`- Map. + +**Returned value** + +- Array containing all the keys from `map`. + +Type: [Array](../../sql-reference/data-types/array.md). + +**Example** + +Query: + +```sql +CREATE TABLE test (a Map(String,String)) ENGINE = Memory; + +INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); + +SELECT mapKeys(a) FROM test; +``` + +Result: + +```text +┌─mapKeys(a)────────────┐ +│ ['name','age'] │ +│ ['number','position'] │ +└───────────────────────┘ +``` ## mapValues {#mapvalues} +Returns all the values from `map` parameter. + +**Syntax** + +```sql +mapKeys(map) +``` + +**Parameters** + +- `map`- Map. + +**Returned value** + +- Array containing all the values from `map`. + +Type: [Array](../../sql-reference/data-types/array.md). + +**Example** + +Query: + +```sql +CREATE TABLE test (a Map(String,String)) ENGINE = Memory; + +INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); + +SELECT mapValues(a) FROM test; +``` + +Result: + +```text +┌─mapValues(a)─────┐ +│ ['eleven','11'] │ +│ ['twelve','6.0'] │ +└──────────────────┘ +``` + [Original article](https://clickhouse.tech/docs/en/query_language/functions/tuple-map-functions/) From 3a020d2dd5c4ffda10fb4dd79509f5e04f45e692 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 11 Feb 2021 11:49:12 +0300 Subject: [PATCH 035/149] filter push down for Aggregating --- src/Processors/QueryPlan/AggregatingStep.h | 2 + .../QueryPlan/Optimizations/Optimizations.h | 7 +- .../Optimizations/filterPushDown.cpp | 77 +++++++++++++++++++ 3 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 src/Processors/QueryPlan/Optimizations/filterPushDown.cpp diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h index 853173895b3..6be92394fab 100644 --- a/src/Processors/QueryPlan/AggregatingStep.h +++ b/src/Processors/QueryPlan/AggregatingStep.h @@ -32,6 +32,8 @@ public: void describeActions(FormatSettings &) const override; void describePipeline(FormatSettings & settings) const override; + const Aggregator::Params & getParams() const { return params; } + private: Aggregator::Params params; bool final; diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index 454eab9649a..be7f81e5db0 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -38,14 +38,19 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes); /// Replace chain `FilterStep -> ExpressionStep` to single FilterStep size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &); +/// Move FilterStep down if possible. +/// May split FilterStep and push down only part of it. +size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes); + inline const auto & getOptimizations() { - static const std::array optimizations = + static const std::array optimizations = {{ {tryLiftUpArrayJoin, "liftUpArrayJoin"}, {tryPushDownLimit, "pushDownLimit"}, {trySplitFilter, "splitFilter"}, {tryMergeExpressions, "mergeExpressions"}, + {tryPushDownLimit, "pushDownFilter"}, }}; return optimizations; diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp new file mode 100644 index 00000000000..82704bcbce9 --- /dev/null +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -0,0 +1,77 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB::ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace DB::QueryPlanOptimizations +{ + +size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) +{ + if (parent_node->children.size() != 1) + return 0; + + QueryPlan::Node * child_node = parent_node->children.front(); + + auto & parent = parent_node->step; + auto & child = child_node->step; + auto * filter = typeid_cast(parent.get()); + + if (!filter) + return 0; + + const auto & expression = filter->getExpression(); + const auto & filter_column_name = filter->getFilterColumnName(); + bool removes_filter = filter->removesFilterColumn(); + + if (auto * aggregating = typeid_cast(child.get())) + { + const auto & params = aggregating->getParams(); + + Names keys; + keys.reserve(params.keys.size()); + for (auto pos : params.keys) + keys.push_back(params.src_header.getByPosition(pos).name); + + if (auto split_filter = expression->splitActionsForFilter(filter_column_name, removes_filter, keys)) + { + auto it = expression->getIndex().find(filter_column_name); + if (it == expression->getIndex().end()) + { + if (!removes_filter) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}", + filter_column_name, expression->dumpDAG()); + + parent = std::make_unique(child->getOutputStream(), expression); + } + + /// Add new Filter step before Aggregating. + /// Expression/Filter -> Aggregating -> Something + auto & node = nodes.emplace_back(); + node.children.swap(child_node->children); + child_node->children.emplace_back(&node); + /// Expression/Filter -> Aggregating -> Filter -> Something + + /// New filter column is added to the end. + auto split_filter_column_name = (*split_filter->getIndex().rbegin())->result_name; + node.step = std::make_unique( + node.children.at(0)->step->getOutputStream(), + std::move(split_filter), std::move(split_filter_column_name), true); + + return 3; + } + } + + return 0; +} + +} From 8b4d9e421a1037f132f8c6511b92ee1a3a21580b Mon Sep 17 00:00:00 2001 From: George Date: Thu, 11 Feb 2021 12:21:59 +0300 Subject: [PATCH 036/149] Added translation --- .../functions/tuple-map-functions.md | 4 +- .../functions/tuple-map-functions.md | 127 +++++++++++++++++- 2 files changed, 128 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index d3503937af2..a08ca70e851 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -124,7 +124,7 @@ mapContains(map, key) **Parameters** -- `map` — Map. [Type name](relative/path/to/type/dscr.md#type). +- `map` — Map. - `key` — Key. Type matches the type of `map.keys`. **Returned value** @@ -237,4 +237,4 @@ Result: └──────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/query_language/functions/tuple-map-functions/) +[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/tuple-map-functions/) diff --git a/docs/ru/sql-reference/functions/tuple-map-functions.md b/docs/ru/sql-reference/functions/tuple-map-functions.md index a2b25e68fe5..6461412aec5 100644 --- a/docs/ru/sql-reference/functions/tuple-map-functions.md +++ b/docs/ru/sql-reference/functions/tuple-map-functions.md @@ -116,4 +116,129 @@ select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type └──────────────────────────────┴───────────────────────────────────┘ ``` -[Оригинальная статья](https://clickhouse.tech/docs/en/query_language/functions/tuple-map-functions/) +## mapContains {#mapcontains} + +Определяет, включает ли в себя `map.keys` параметр `key`. + +**Синтаксис** + +``` sql +mapContains(map, key) +``` + +**Параметры** + +- `map` — Map. +- `key` — ключ. Тип соответстует типу `map.keys`. + +**Возвращаемое значение** + +- `1` если `map.keys` включает `key`, иначе `0`. + +Тип: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Пример** + +Запрос: + +```sql +CREATE TABLE test (a Map(String,String)) ENGINE = Memory; + +INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); + +SELECT mapContains(a, 'name') FROM test; + +``` + +Результат: + +```text +┌─mapContains(a, 'name')─┐ +│ 1 │ +│ 0 │ +└────────────────────────┘ +``` + +## mapKeys {#mapkeys} + +Возвращает все ключи контейнера `map`. + +**Синтаксис** + +```sql +mapKeys(map) +``` + +**Параметры** + +- `map`- map. + +**Возвращаемое значение** + +- Массив со всеми ключами контейнера `map`. + +Тип: [Array](../../sql-reference/data-types/array.md). + +**Пример** + +Запрос: + +```sql +CREATE TABLE test (a Map(String,String)) ENGINE = Memory; + +INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); + +SELECT mapKeys(a) FROM test; +``` + +Результат: + +```text +┌─mapKeys(a)────────────┐ +│ ['name','age'] │ +│ ['number','position'] │ +└───────────────────────┘ +``` + +## mapValues {#mapvalues} + +Возвращает все значения контейнера `map`. + +**Синтаксис** + +```sql +mapKeys(map) +``` + +**Параметры** + +- `map`- map. + +**Возвращаемое значение** + +- Массив со всеми значениями `map`. + +Тип: [Array](../../sql-reference/data-types/array.md). + +**Примеры** + +Запрос: + +```sql +CREATE TABLE test (a Map(String,String)) ENGINE = Memory; + +INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); + +SELECT mapValues(a) FROM test; +``` + +Результат: + +```text +┌─mapValues(a)─────┐ +│ ['eleven','11'] │ +│ ['twelve','6.0'] │ +└──────────────────┘ +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/tuple-map-functions/) From 48b8685d6ef0e690ee7055f0ba1812fa8dfa50e1 Mon Sep 17 00:00:00 2001 From: George Date: Thu, 11 Feb 2021 12:46:14 +0300 Subject: [PATCH 037/149] minor fixes --- docs/en/sql-reference/functions/tuple-map-functions.md | 4 ++-- docs/ru/sql-reference/functions/tuple-map-functions.md | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index a08ca70e851..f8755f1e2a9 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -167,7 +167,7 @@ mapKeys(map) **Parameters** -- `map`- Map. +- `map` — Map. **Returned value** @@ -208,7 +208,7 @@ mapKeys(map) **Parameters** -- `map`- Map. +- `map` — Map. **Returned value** diff --git a/docs/ru/sql-reference/functions/tuple-map-functions.md b/docs/ru/sql-reference/functions/tuple-map-functions.md index 6461412aec5..22bf1e98369 100644 --- a/docs/ru/sql-reference/functions/tuple-map-functions.md +++ b/docs/ru/sql-reference/functions/tuple-map-functions.md @@ -128,7 +128,7 @@ mapContains(map, key) **Параметры** -- `map` — Map. +- `map` — контейнер map. - `key` — ключ. Тип соответстует типу `map.keys`. **Возвращаемое значение** @@ -171,7 +171,7 @@ mapKeys(map) **Параметры** -- `map`- map. +- `map` — контейнер map. **Возвращаемое значение** @@ -212,7 +212,7 @@ mapKeys(map) **Параметры** -- `map`- map. +- `map` — контейнер map. **Возвращаемое значение** From e24b8e8a13ecea65e9d35e53cbe1a7fa44917680 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 11 Feb 2021 15:06:28 +0300 Subject: [PATCH 038/149] Fix ActionsDAG::splitActionsForFilter --- src/Interpreters/ActionsDAG.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index eb1ff9ad998..cd3a2853687 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1311,6 +1311,8 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, actions->inputs.emplace_back(&node); actions->index.insert(&node); } + + stack.pop(); } } } From 2deff0d9d09bab61a149b62acfc49a34e6d4011f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 11 Feb 2021 18:44:10 +0300 Subject: [PATCH 039/149] Fix ActionsDAG::splitActionsForFilter --- src/Interpreters/ActionsDAG.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index cd3a2853687..78254e5139a 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1375,7 +1375,17 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, } else { - (*it)->children.swap(new_children); + Node node; + node.type = ActionType::COLUMN; + node.result_name = (*it)->result_type->getName(); + node.column = DataTypeString().createColumnConst(0, node.result_name); + node.result_type = std::make_shared(); + + auto * right_arg = &nodes.emplace_back(std::move(node)); + auto * left_arg = new_children.front(); + + + (*it)->children = {left_arg, right_arg}; ColumnsWithTypeAndName arguments; arguments.reserve((*it)->children.size()); From a26c8d9eee365d72d151e55416137377e2ea56bb Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 11 Feb 2021 19:08:54 +0300 Subject: [PATCH 040/149] Fix const filter resilt for filter push down. --- .../QueryPlan/Optimizations/filterPushDown.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 82704bcbce9..2a42b08af73 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB::ErrorCodes { @@ -41,8 +42,15 @@ size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) for (auto pos : params.keys) keys.push_back(params.src_header.getByPosition(pos).name); + std::cerr << "Filter: \n" << expression->dumpDAG() << std::endl; if (auto split_filter = expression->splitActionsForFilter(filter_column_name, removes_filter, keys)) { + std::cerr << "===============\n" << expression->dumpDAG() << std::endl; + std::cerr << "---------------\n" << split_filter->dumpDAG() << std::endl; + + //if (split_filter) + // throw Exception("!!!!", 0); + auto it = expression->getIndex().find(filter_column_name); if (it == expression->getIndex().end()) { @@ -53,6 +61,10 @@ size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) parent = std::make_unique(child->getOutputStream(), expression); } + else if ((*it)->column && isColumnConst(*(*it)->column)) + { + parent = std::make_unique(child->getOutputStream(), expression); + } /// Add new Filter step before Aggregating. /// Expression/Filter -> Aggregating -> Something From fc6587319c97c6cccb8b5dc1b108a7b56afaed9e Mon Sep 17 00:00:00 2001 From: George Date: Thu, 11 Feb 2021 19:29:46 +0300 Subject: [PATCH 041/149] Edit and translated encription-functions --- .../functions/encryption-functions.md | 14 +- .../functions/encryption-functions.md | 305 ++++++++---------- 2 files changed, 150 insertions(+), 169 deletions(-) diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md index 9e360abfe26..f3e851db29b 100644 --- a/docs/en/sql-reference/functions/encryption-functions.md +++ b/docs/en/sql-reference/functions/encryption-functions.md @@ -55,7 +55,7 @@ CREATE TABLE encryption_test `comment` String, `secret` String ) -ENGINE = Memory +ENGINE = Memory; ``` Insert some data (please avoid storing the keys/ivs in the database as this undermines the whole concept of encryption), also storing 'hints' is unsafe too and used only for illustrative purposes: @@ -110,7 +110,7 @@ Result: Compatible with mysql encryption and resulting ciphertext can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function. -Will produce same ciphertext as `encrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_encrypt_mysql` will stick to what MySQL's `aes_encrypt` does: 'fold' `key` and ignore excess bits of `IV`. +Will produce the same ciphertext as `encrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_encrypt_mysql` will stick to what MySQL's `aes_encrypt` does: 'fold' `key` and ignore excess bits of `iv`. Supported encryption modes: @@ -138,7 +138,6 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv]) - Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string). - **Examples** Given equal input `encrypt` and `aes_encrypt_mysql` produce the same ciphertext: @@ -157,7 +156,6 @@ Result: └───────────────────┘ ``` - But `encrypt` fails when `key` or `iv` is longer than expected: Query: @@ -252,7 +250,7 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad]) **Examples** -Re-using table from [encrypt](./encryption-functions.md#encrypt). +Re-using table from [encrypt](#encrypt). Query: @@ -284,6 +282,7 @@ SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920 ``` Result: + ``` text ┌─comment─────────────────────────────┬─plaintext─┐ │ aes-256-cfb128 no IV │ Secret │ @@ -294,7 +293,7 @@ Result: └─────────────────────────────────────┴───────────┘ ``` -Notice how only portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption. +Notice how only a portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption. ## aes_decrypt_mysql {#aes_decrypt_mysql} @@ -331,6 +330,7 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv]) **Examples** Let's decrypt data we've previously encrypted with MySQL: + ``` sql mysql> SET block_encryption_mode='aes-256-cfb128'; Query OK, 0 rows affected (0.00 sec) @@ -345,11 +345,13 @@ mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviv ``` Query: + ``` sql SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext ``` Result: + ``` text ┌─plaintext─┐ │ Secret │ diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md index f1f6516d453..14ce97f5513 100644 --- a/docs/ru/sql-reference/functions/encryption-functions.md +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -11,7 +11,7 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438 \u0434\u043b\u044f \u0448 Длина инициализирующего вектора всегда 16 байт (лишнии байты игнорируются). -Обратите внимание, что эти функции работают медленно. +Обратите внимание, что до версии Clickhouse 21.1 эти функции работают медленно. ## encrypt {#encrypt} @@ -41,7 +41,7 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad]) **Возвращаемое значение** -- Зашифрованная строка. [String](../../sql-reference/data-types/string.md#string). +- Двоичная зашифрованная строка. [String](../../sql-reference/data-types/string.md#string). **Примеры** @@ -52,57 +52,38 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad]) ``` sql CREATE TABLE encryption_test ( - input String, - key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), - iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), - key32 String DEFAULT substring(key, 1, 32), - key24 String DEFAULT substring(key, 1, 24), - key16 String DEFAULT substring(key, 1, 16) -) Engine = Memory; + `comment` String, + `secret` String +) +ENGINE = Memory; ``` -Вставим эти данные: +Вставим некоторые данные (замечание: не храните ключи или инициализирующие векторы в базе данных, так как это компрометирует всю концепцию шифрования), также хранение "подсказок" небезопасно и используется только для наглядности: Запрос: ``` sql -INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +INSERT INTO encryption_test VALUES('aes-256-cfb128 no IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212')),\ +('aes-256-cfb128 no IV, different key', encrypt('aes-256-cfb128', 'Secret', 'keykeykeykeykeykeykeykeykeykeyke')),\ +('aes-256-cfb128 with IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')),\ +('aes-256-cbc no IV', encrypt('aes-256-cbc', 'Secret', '12345678910121314151617181920212')); ``` -Пример без `iv`: - Запрос: ``` sql -SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test; +SELECT comment, hex(secret) FROM encryption_test; ``` Результат: ``` text -┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐ -│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │ -│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │ -│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │ -└─────────────┴──────────────────────────────────────────────────────────────────┘ -``` - -Пример с `iv`: - -Запрос: - -``` sql -SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test; -``` - -Результат: - -``` text -┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐ -│ aes-256-ctr │ │ -│ aes-256-ctr │ 7FB039F7 │ -│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │ -└─────────────┴───────────────────────────────────────────────┘ +┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐ +│ aes-256-cfb128 no IV │ B4972BDC4459 │ +│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │ +│ aes-256-cfb128 with IV │ 5E6CB398F653 │ +│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │ +└─────────────────────────────────────┴──────────────────────────────────┘ ``` Пример в режиме `-gcm`: @@ -110,41 +91,27 @@ SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encrypti Запрос: ``` sql -SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test; +INSERT INTO encryption_test VALUES('aes-256-gcm', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')), \ +('aes-256-gcm with AAD', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv', 'aad')); + +SELECT comment, hex(secret) FROM encryption_test WHERE comment LIKE '%gcm%'; ``` Результат: ``` text -┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐ -│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │ -│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │ -│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │ -└─────────────┴────────────────────────────────────────────────────────────────────────┘ -``` - -Пример в режиме `-gcm` и с `aad`: - -Запрос: - -``` sql -SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test; -``` - -Результат: - -``` text -┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐ -│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │ -│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │ -│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │ -└─────────────┴────────────────────────────────────────────────────────────────────────┘ +┌─comment──────────────┬─hex(secret)──────────────────────────────────┐ +│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │ +│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │ +└──────────────────────┴──────────────────────────────────────────────┘ ``` ## aes_encrypt_mysql {#aes_encrypt_mysql} Совместима с шифрованием myqsl, результат может быть расшифрован функцией [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt). +При одинаковых вводных зашифрованный текст будет совпадать с результатом `encrypt`. Однако, когда `key` или `iv` длиннее, чем должны быть, `aes_encrypt_mysql` будет работать аналогично MySQL `aes_encrypt`: свернет ключ и проигнорирует лишнюю часть `iv`. + Функция поддерживает шифрофание данных следующими режимами: - aes-128-ecb, aes-192-ecb, aes-256-ecb @@ -156,7 +123,7 @@ SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM e **Синтаксис** -```sql +``` sql aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv]) ``` @@ -164,78 +131,96 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv]) - `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string). - `plaintext` — текст, который будет зашифрован. [String](../../sql-reference/data-types/string.md#string). -- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string). -- `iv` — инициализирующий вектор. Необязателен. [String](../../sql-reference/data-types/string.md#string). +- `key` — ключ шифрования. Если ключ длиннее, чем требует режим шифрования, производится специфичная для MySQL свертка ключа. [String](../../sql-reference/data-types/string.md#string). +- `iv` — инициализирующий вектор. Необязателен, учитываются только первые 16 байтов. [String](../../sql-reference/data-types/string.md#string). **Возвращаемое значение** -- Зашифрованная строка. [String](../../sql-reference/data-types/string.md#string). +- Двоичная зашифрованная строка. [String](../../sql-reference/data-types/string.md#string). **Примеры** -Создадим такую таблицу: +При одинаковых вводных результаты шифрования `encrypt` и `aes_encrypt_mysql` будут совпадать. Запрос: ``` sql -CREATE TABLE encryption_test -( - input String, - key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), - iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), - key32 String DEFAULT substring(key, 1, 32), - key24 String DEFAULT substring(key, 1, 24), - key16 String DEFAULT substring(key, 1, 16) -) Engine = Memory; -``` - -Вставим эти данные: - -Запрос: - -``` sql -INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); -``` - -Пример без `iv`: - -Запрос: - -``` sql -SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test; +SELECT encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') = aes_encrypt_mysql('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') AS ciphertexts_equal; ``` Результат: ``` text -┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐ -│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │ -│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │ -│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │ -└─────────────┴──────────────────────────────────────────────────────────────────┘ +┌─ciphertexts_equal─┐ +│ 1 │ +└───────────────────┘ ``` -Пример с `iv`: +Но `encrypt` генерирует исключение, когда `key` или `iv` длиннее, чем нужно: Запрос: ``` sql -SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test; +SELECT encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123'); ``` Результат: ``` text -┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐ -│ aes-256-cfb128 │ │ -│ aes-256-cfb128 │ 7FB039F7 │ -│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │ -└────────────────┴────────────────────────────────────────────────────────────┘ +Received exception from server (version 21.1.2): +Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123'). +``` + +Тогда как `aes_encrypt_mysql` возвращает совместимый с MySQL вывод: + +Запрос: + +``` sql +SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123')) AS ciphertext; +``` + +Результат: + +```text +┌─ciphertext───┐ +│ 24E9E4966469 │ +└──────────────┘ +``` + +Если передать `iv` еще длиннее, результат останется таким же: + +Запрос: + +``` sql +SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456')) AS ciphertext +``` + +Результат: + +``` text +┌─ciphertext───┐ +│ 24E9E4966469 │ +└──────────────┘ +``` + +Это совпадает с тем, что выводит MySQL с такими же вводными: + +``` sql +mysql> SET block_encryption_mode='aes-256-cfb128'; +Query OK, 0 rows affected (0.00 sec) + +mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext; ++------------------------+ +| ciphertext | ++------------------------+ +| 0x24E9E4966469 | ++------------------------+ +1 row in set (0.00 sec) ``` ## decrypt {#decrypt} -Функция поддерживает расшифровку данных следующими режимами: +Функция расшифровывает зашифрованный текст в обычный следующими режимами: - aes-128-ecb, aes-192-ecb, aes-256-ecb - aes-128-cbc, aes-192-cbc, aes-256-cbc @@ -247,7 +232,7 @@ SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) **Синтаксис** -```sql +``` sql decrypt('mode', 'ciphertext', 'key' [, iv, aad]) ``` @@ -265,51 +250,57 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad]) **Примеры** -Создадим такую таблицу: +Используется таблица из [encrypt](#encrypt). Запрос: ``` sql -CREATE TABLE encryption_test -( - input String, - key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), - iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), - key32 String DEFAULT substring(key, 1, 32), - key24 String DEFAULT substring(key, 1, 24), - key16 String DEFAULT substring(key, 1, 16) -) Engine = Memory; -``` - -Вставим эти данные: - -Запрос: - -``` sql -INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); -``` - -Запрос: - -``` sql - -SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test; +SELECT comment, hex(secret) FROM encryption_test; ``` Результат: -```text -┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐ -│ aes-128-ecb │ │ -│ aes-128-ecb │ text │ -│ aes-128-ecb │ What Is ClickHouse? │ -└─────────────┴─────────────────────────────────────────────────────────────────────┘ +``` text +┌─comment──────────────┬─hex(secret)──────────────────────────────────┐ +│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │ +│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │ +└──────────────────────┴──────────────────────────────────────────────┘ +┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐ +│ aes-256-cfb128 no IV │ B4972BDC4459 │ +│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │ +│ aes-256-cfb128 with IV │ 5E6CB398F653 │ +│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │ +└─────────────────────────────────────┴──────────────────────────────────┘ ``` +Теперь попытаемся расшифровать эти данные: + +Запрос: + +``` sql +SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920212') as plaintext FROM encryption_test +``` + +Результат: + +``` text +┌─comment─────────────────────────────┬─plaintext─┐ +│ aes-256-cfb128 no IV │ Secret │ +│ aes-256-cfb128 no IV, different key │ �4� + � │ +│ aes-256-cfb128 with IV │ ���6�~ │ + │aes-256-cbc no IV │ �2*4�h3c�4w��@ +└─────────────────────────────────────┴───────────┘ +``` + +Обратите внимание, что только часть данных была расшифрована, а остальное является бессмыслицей, как как `mode`, `key`, или `iv`были другими во время шифрования. + ## aes_decrypt_mysql {#aes_decrypt_mysql} Совместима с шифрованием myqsl и может расшифровать данные, зашифрованные функцией [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt). +При одинаковых вводных расшифрованный текст будет совпадать с результатом `decrypt`. Однако, когда `key` или `iv` длиннее, чем должны быть, `aes_decrypt_mysql` будет работать аналогично MySQL `aes_decrypt`: свернет ключ и проигнорирует лишнюю часть `iv`. + Функция поддерживает расшифровку данных следующими режимами: - aes-128-ecb, aes-192-ecb, aes-256-ecb @@ -321,7 +312,7 @@ SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) **Синтаксис** -```sql +``` sql aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv]) ``` @@ -332,51 +323,39 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv]) - `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string). - `iv` — инициализирующий вектор. Необязателен. [String](../../sql-reference/data-types/string.md#string). - **Возвращаемое значение** - Расшифрованная строка. [String](../../sql-reference/data-types/string.md#string). **Примеры** -Создадим такую таблицу: +Расшифруем данные, которые до этого зашифровали с помощью MySQL: -Запрос: ``` sql -CREATE TABLE encryption_test -( - input String, - key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), - iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), - key32 String DEFAULT substring(key, 1, 32), - key24 String DEFAULT substring(key, 1, 24), - key16 String DEFAULT substring(key, 1, 16) -) Engine = Memory; -``` +mysql> SET block_encryption_mode='aes-256-cfb128'; +Query OK, 0 rows affected (0.00 sec) -Вставим эти данные: - -Запрос: - -``` sql -INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext; ++------------------------+ +| ciphertext | ++------------------------+ +| 0x24E9E4966469 | ++------------------------+ +1 row in set (0.00 sec) ``` Запрос: ``` sql -SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test; +SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext ``` Результат: ``` text -┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐ -│ aes-128-cbc │ │ -│ aes-128-cbc │ text │ -│ aes-128-cbc │ What Is ClickHouse? │ -└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘ +┌─plaintext─┐ +│ Secret │ +└───────────┘ ``` - [Original article](https://clickhouse.tech/docs/ru/sql-reference/functions/encryption_functions/) From 2a9a6cf4048969d1fa670fb7afac18d57b86649a Mon Sep 17 00:00:00 2001 From: George Date: Thu, 11 Feb 2021 19:46:23 +0300 Subject: [PATCH 042/149] Edited and translated parametric-functions --- .../sql-reference/aggregate-functions/parametric-functions.md | 2 +- .../sql-reference/aggregate-functions/parametric-functions.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 4b3bf12aa8c..2d2df3bd6cb 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -241,7 +241,7 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN) **Parameters** -- `window` — Length of the sliding window. The unit of `window` depends on the timestamp itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`. +- `window` — Length of the sliding window. The unit of `window` depends on the `timestamp` itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`. - `mode` - It is an optional argument. - `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values. - `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it’s value can’t exceed the Int64 maximum, which is 2^63 - 1). diff --git a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md index f20acaa45c3..2c367882714 100644 --- a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md @@ -239,7 +239,7 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN) **Параметры** -- `window` — ширина скользящего окна по времени в секундах. [UInt](../../sql-reference/aggregate-functions/parametric-functions.md). +- `window` — ширина скользящего окна по времени. Единица измерения зависит от `timestamp` и может варьироваться. Определяется выражением `timestamp от cond2 <= timestamp от cond1 + window`. - `mode` - необязательный параметр. Если установлено значение `'strict'`, то функция `windowFunnel()` применяет условия только для уникальных значений. - `timestamp` — имя столбца, содержащего временные отметки. [Date](../../sql-reference/aggregate-functions/parametric-functions.md), [DateTime](../../sql-reference/aggregate-functions/parametric-functions.md#data_type-datetime) и другие параметры с типом `Integer`. В случае хранения меток времени в столбцах с типом `UInt64`, максимально допустимое значение соответствует ограничению для типа `Int64`, т.е. равно `2^63-1`. - `cond` — условия или данные, описывающие цепочку событий. [UInt8](../../sql-reference/aggregate-functions/parametric-functions.md). From cd11212bba784958174fdfbd334622a533686756 Mon Sep 17 00:00:00 2001 From: George Date: Thu, 11 Feb 2021 19:57:41 +0300 Subject: [PATCH 043/149] Edited and translated settings --- docs/en/operations/settings/settings.md | 4 ++-- docs/ru/operations/settings/settings.md | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index c7ee48c11bf..70809885a99 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1956,8 +1956,8 @@ Default value: 16. **See Also** -- [Kafka](../../engines/table-engines/integrations/kafka.md#kafka) engine -- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine) engine +- [Kafka](../../engines/table-engines/integrations/kafka.md#kafka) engine. +- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine) engine. ## validate_polygons {#validate_polygons} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 1352fe850df..fed10d21920 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1939,6 +1939,21 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1; Значение по умолчанию: 16. +## background_message_broker_schedule_pool_size {#background_message_broker_schedule_pool_size} + +Задает количество потоков для вывода потокового вывода сообщений. Настройка применяется при запуске сервера ClickHouse и не может быть изменена в пользовательском сеансе. + +Допустимые значения: + +- Положительное целое число. + +Значение по умолчанию: 16. + +**Смотрите также** + +- Движок [Kafka](../../engines/table-engines/integrations/kafka.md#kafka). +- Движок [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine). + ## format_avro_schema_registry_url {#format_avro_schema_registry_url} Задает URL реестра схем [Confluent](https://docs.confluent.io/current/schema-registry/index.html) для использования с форматом [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent). From 93ea1e5e82da3a3eb07dbe9daa355d3ab31accf5 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 11 Feb 2021 20:13:59 +0300 Subject: [PATCH 044/149] Comment output --- .../QueryPlan/Optimizations/filterPushDown.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 2a42b08af73..a5f1d37e2f2 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -42,14 +42,11 @@ size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) for (auto pos : params.keys) keys.push_back(params.src_header.getByPosition(pos).name); - std::cerr << "Filter: \n" << expression->dumpDAG() << std::endl; + // std::cerr << "Filter: \n" << expression->dumpDAG() << std::endl; if (auto split_filter = expression->splitActionsForFilter(filter_column_name, removes_filter, keys)) { - std::cerr << "===============\n" << expression->dumpDAG() << std::endl; - std::cerr << "---------------\n" << split_filter->dumpDAG() << std::endl; - - //if (split_filter) - // throw Exception("!!!!", 0); + // std::cerr << "===============\n" << expression->dumpDAG() << std::endl; + // std::cerr << "---------------\n" << split_filter->dumpDAG() << std::endl; auto it = expression->getIndex().find(filter_column_name); if (it == expression->getIndex().end()) From 838dab756491d5bdcd6151fb5075756d0807b807 Mon Sep 17 00:00:00 2001 From: George Date: Thu, 11 Feb 2021 21:07:38 +0300 Subject: [PATCH 045/149] Edit and translated Kafka --- .../table-engines/integrations/kafka.md | 22 +++++++++---------- .../table-engines/integrations/kafka.md | 19 +++++++++------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index c519d6bb136..fb1df62bb15 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -38,20 +38,20 @@ SETTINGS Required parameters: -- `kafka_broker_list` – A comma-separated list of brokers (for example, `localhost:9092`). -- `kafka_topic_list` – A list of Kafka topics. -- `kafka_group_name` – A group of Kafka consumers. Reading margins are tracked for each group separately. If you don’t want messages to be duplicated in the cluster, use the same group name everywhere. -- `kafka_format` – Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section. +- `kafka_broker_list` — A comma-separated list of brokers (for example, `localhost:9092`). +- `kafka_topic_list` — A list of Kafka topics. +- `kafka_group_name` — A group of Kafka consumers. Reading margins are tracked for each group separately. If you don’t want messages to be duplicated in the cluster, use the same group name everywhere. +- `kafka_format` — Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section. Optional parameters: -- `kafka_row_delimiter` – Delimiter character, which ends the message. -- `kafka_schema` – Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. -- `kafka_num_consumers` – The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition. -- `kafka_max_block_size` - The maximum batch size (in messages) for poll (default: `max_block_size`). -- `kafka_skip_broken_messages` – Kafka message parser tolerance to schema-incompatible messages per block. Default: `0`. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data). -- `kafka_commit_every_batch` - Commit every consumed and handled batch instead of a single commit after writing a whole block (default: `0`). -- `kafka_thread_per_consumer` - Provide independent thread for each consumer (default: `0`). When enabled, every consumer flush the data independently, in parallel (otherwise - rows from several consumers squashed to form one block). +- `kafka_row_delimiter` — Delimiter character, which ends the message. +- `kafka_schema` — Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. +- `kafka_num_consumers` — The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition. +- `kafka_max_block_size` — The maximum batch size (in messages) for poll (default: `max_block_size`). +- `kafka_skip_broken_messages` — Kafka message parser tolerance to schema-incompatible messages per block. Default: `0`. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data). +- `kafka_commit_every_batch` — Commit every consumed and handled batch instead of a single commit after writing a whole block (default: `0`). +- `kafka_thread_per_consumer` — Provide independent thread for each consumer (default: `0`). When enabled, every consumer flush the data independently, in parallel (otherwise — rows from several consumers squashed to form one block). Examples: diff --git a/docs/ru/engines/table-engines/integrations/kafka.md b/docs/ru/engines/table-engines/integrations/kafka.md index 940fee2452b..2b9dfcd49da 100644 --- a/docs/ru/engines/table-engines/integrations/kafka.md +++ b/docs/ru/engines/table-engines/integrations/kafka.md @@ -31,21 +31,24 @@ SETTINGS [kafka_schema = '',] [kafka_num_consumers = N,] [kafka_skip_broken_messages = N] + [kafka_commit_every_batch = 0,] + [kafka_thread_per_consumer = 0] ``` Обязательные параметры: -- `kafka_broker_list` – перечень брокеров, разделенный запятыми (`localhost:9092`). -- `kafka_topic_list` – перечень необходимых топиков Kafka. -- `kafka_group_name` – группа потребителя Kafka. Отступы для чтения отслеживаются для каждой группы отдельно. Если необходимо, чтобы сообщения не повторялись на кластере, используйте везде одно имя группы. -- `kafka_format` – формат сообщений. Названия форматов должны быть теми же, что можно использовать в секции `FORMAT`, например, `JSONEachRow`. Подробнее читайте в разделе [Форматы](../../../interfaces/formats.md). +- `kafka_broker_list` — перечень брокеров, разделенный запятыми (`localhost:9092`). +- `kafka_topic_list` — перечень необходимых топиков Kafka. +- `kafka_group_name` — группа потребителя Kafka. Отступы для чтения отслеживаются для каждой группы отдельно. Если необходимо, чтобы сообщения не повторялись на кластере, используйте везде одно имя группы. +- `kafka_format` — формат сообщений. Названия форматов должны быть теми же, что можно использовать в секции `FORMAT`, например, `JSONEachRow`. Подробнее читайте в разделе [Форматы](../../../interfaces/formats.md). Опциональные параметры: -- `kafka_row_delimiter` – символ-разделитель записей (строк), которым завершается сообщение. -- `kafka_schema` – опциональный параметр, необходимый, если используется формат, требующий определения схемы. Например, [Cap’n Proto](https://capnproto.org/) требует путь к файлу со схемой и название корневого объекта `schema.capnp:Message`. -- `kafka_num_consumers` – количество потребителей (consumer) на таблицу. По умолчанию: `1`. Укажите больше потребителей, если пропускная способность одного потребителя недостаточна. Общее число потребителей не должно превышать количество партиций в топике, так как на одну партицию может быть назначено не более одного потребителя. -- `kafka_skip_broken_messages` – максимальное количество некорректных сообщений в блоке. Если `kafka_skip_broken_messages = N`, то движок отбрасывает `N` сообщений Кафки, которые не получилось обработать. Одно сообщение в точности соответствует одной записи (строке). Значение по умолчанию – 0. +- `kafka_row_delimiter` — символ-разделитель записей (строк), которым завершается сообщение. +- `kafka_schema` — опциональный параметр, необходимый, если используется формат, требующий определения схемы. Например, [Cap’n Proto](https://capnproto.org/) требует путь к файлу со схемой и название корневого объекта `schema.capnp:Message`. +- `kafka_num_consumers` — количество потребителей (consumer) на таблицу. По умолчанию: `1`. Укажите больше потребителей, если пропускная способность одного потребителя недостаточна. Общее число потребителей не должно превышать количество партиций в топике, так как на одну партицию может быть назначено не более одного потребителя. +- `kafka_skip_broken_messages` — максимальное количество некорректных сообщений в блоке. Если `kafka_skip_broken_messages = N`, то движок отбрасывает `N` сообщений Кафки, которые не получилось обработать. Одно сообщение в точности соответствует одной записи (строке). Значение по умолчанию – 0. +- `kafka_thread_per_consumer` — снабжает каждого потребителя независимым потоком (по умолчанию `0`). При включенном состоянии каждый потребитель сбрасывает данные независимо и параллельно (иначе — строки от нескольких потребителей склеиваются в один блок). Примеры From 4c8632bd9ab32322af29abb04cf70c39c6cd3c79 Mon Sep 17 00:00:00 2001 From: George Date: Fri, 12 Feb 2021 00:22:55 +0300 Subject: [PATCH 046/149] Minor fixes --- docs/ru/operations/settings/settings.md | 2 +- docs/ru/sql-reference/functions/encryption-functions.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index fed10d21920..a7754cfc421 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1941,7 +1941,7 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1; ## background_message_broker_schedule_pool_size {#background_message_broker_schedule_pool_size} -Задает количество потоков для вывода потокового вывода сообщений. Настройка применяется при запуске сервера ClickHouse и не может быть изменена в пользовательском сеансе. +Задает количество потоков для фонового потокового вывода сообщений. Настройка применяется при запуске сервера ClickHouse и не может быть изменена в пользовательском сеансе. Допустимые значения: diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md index 14ce97f5513..91b26a2415d 100644 --- a/docs/ru/sql-reference/functions/encryption-functions.md +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -293,7 +293,7 @@ SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920 └─────────────────────────────────────┴───────────┘ ``` -Обратите внимание, что только часть данных была расшифрована, а остальное является бессмыслицей, как как `mode`, `key`, или `iv`были другими во время шифрования. +Обратите внимание, что только часть данных была расшифрована, а остальное является бессмыслицей, как как `mode`, `key`, или `iv` были другими во время шифрования. ## aes_decrypt_mysql {#aes_decrypt_mysql} From 7e75965af887d7a7d68699b7bac5e0401cbf02c7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 12 Feb 2021 12:35:26 +0300 Subject: [PATCH 047/149] Fix ActionsDAG::splitActionsForFilter --- src/Interpreters/ActionsDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 78254e5139a..6a7dbc47230 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1168,7 +1168,7 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, { auto & inputs_list = inputs_map[name]; if (inputs_list.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find input {} in ActionsDAG. DAG:\n{}", name, dumpDAG()); + continue; allowed_nodes.emplace(inputs_list.front()); inputs_list.pop_front(); From 443a3e7e6fd2452bf3efa8e4ab2a349feaf3b29f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 12 Feb 2021 13:12:31 +0300 Subject: [PATCH 048/149] Fix limit push down. --- src/Processors/QueryPlan/Optimizations/Optimizations.h | 4 ++-- src/Processors/QueryPlan/Optimizations/filterPushDown.cpp | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index be7f81e5db0..a5c3af488a9 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -40,7 +40,7 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &); /// Move FilterStep down if possible. /// May split FilterStep and push down only part of it. -size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes); +size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes); inline const auto & getOptimizations() { @@ -50,7 +50,7 @@ inline const auto & getOptimizations() {tryPushDownLimit, "pushDownLimit"}, {trySplitFilter, "splitFilter"}, {tryMergeExpressions, "mergeExpressions"}, - {tryPushDownLimit, "pushDownFilter"}, + {tryPushDownFilter, "pushDownFilter"}, }}; return optimizations; diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index a5f1d37e2f2..ac95d69d237 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -42,11 +42,11 @@ size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) for (auto pos : params.keys) keys.push_back(params.src_header.getByPosition(pos).name); - // std::cerr << "Filter: \n" << expression->dumpDAG() << std::endl; + std::cerr << "Filter: \n" << expression->dumpDAG() << std::endl; if (auto split_filter = expression->splitActionsForFilter(filter_column_name, removes_filter, keys)) { - // std::cerr << "===============\n" << expression->dumpDAG() << std::endl; - // std::cerr << "---------------\n" << split_filter->dumpDAG() << std::endl; + std::cerr << "===============\n" << expression->dumpDAG() << std::endl; + std::cerr << "---------------\n" << split_filter->dumpDAG() << std::endl; auto it = expression->getIndex().find(filter_column_name); if (it == expression->getIndex().end()) From 93e1428f2119ecc5b3979ff5bff0d0304327579c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 12 Feb 2021 13:51:16 +0300 Subject: [PATCH 049/149] Fix limit push down. --- src/Processors/QueryPlan/Optimizations/filterPushDown.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index ac95d69d237..ec005e59729 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -15,7 +15,7 @@ namespace DB::ErrorCodes namespace DB::QueryPlanOptimizations { -size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) +size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) { if (parent_node->children.size() != 1) return 0; @@ -42,11 +42,11 @@ size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) for (auto pos : params.keys) keys.push_back(params.src_header.getByPosition(pos).name); - std::cerr << "Filter: \n" << expression->dumpDAG() << std::endl; + // std::cerr << "Filter: \n" << expression->dumpDAG() << std::endl; if (auto split_filter = expression->splitActionsForFilter(filter_column_name, removes_filter, keys)) { - std::cerr << "===============\n" << expression->dumpDAG() << std::endl; - std::cerr << "---------------\n" << split_filter->dumpDAG() << std::endl; + // std::cerr << "===============\n" << expression->dumpDAG() << std::endl; + // std::cerr << "---------------\n" << split_filter->dumpDAG() << std::endl; auto it = expression->getIndex().find(filter_column_name); if (it == expression->getIndex().end()) From 683d793cc289ec12b8885efe1405b79a22350a36 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 12 Feb 2021 14:31:14 +0300 Subject: [PATCH 050/149] Update test. --- .../01655_plan_optimizations.reference | 33 +++++++++++- .../0_stateless/01655_plan_optimizations.sh | 51 ++++++++++++++++++- 2 files changed, 80 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index fda40305f9d..510224146ed 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -1,7 +1,7 @@ -sipHash should be calculated after filtration +> sipHash should be calculated after filtration FUNCTION sipHash64 Filter column: equals -sorting steps should know about limit +> sorting steps should know about limit Limit 10 MergingSorted Limit 10 @@ -9,3 +9,32 @@ MergeSorting Limit 10 PartialSorting Limit 10 +-- filter push down -- +> filter should be pushed down after aggregating +Aggregating +Filter +> filter should be pushed down after aggregating, column after aggregation is const +COLUMN Const(UInt8) -> notEquals(y, 0) +Aggregating +Filter +Filter +> one condition of filter should be pushed down after aggregating, other condition is aliased +Filter column +ALIAS notEquals(s, 4) :: 1 -> and(notEquals(y, 0), notEquals(s, 4)) +Aggregating +Filter column: notEquals(y, 0) +> one condition of filter should be pushed down after aggregating, other condition is casted +Filter column +FUNCTION CAST(minus(s, 4) :: 1, UInt8 :: 3) -> and(notEquals(y, 0), minus(s, 4)) +Aggregating +Filter column: notEquals(y, 0) +> one condition of filter should be pushed down after aggregating, other two conditions are ANDed +Filter column +FUNCTION and(minus(s, 4) :: 2, minus(s, 8) :: 1) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4)) +Aggregating +Filter column: notEquals(y, 0) +> two conditions of filter should be pushed down after aggregating and ANDed, one condition is aliased +Filter column +ALIAS notEquals(s, 8) :: 1 -> and(notEquals(y, 0), notEquals(s, 8), minus(y, 4)) +Aggregating +Filter column: and(minus(y, 4), notEquals(y, 0)) diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index 4f3541f9dde..ea76d15c648 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -4,7 +4,54 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -echo "sipHash should be calculated after filtration" +echo "> sipHash should be calculated after filtration" $CLICKHOUSE_CLIENT -q "explain actions = 1 select sum(x), sum(y) from (select sipHash64(number) as x, bitAnd(number, 1024) as y from numbers_mt(1000000000) limit 1000000000) where y = 0" | grep -o "FUNCTION sipHash64\|Filter column: equals" -echo "sorting steps should know about limit" +echo "> sorting steps should know about limit" $CLICKHOUSE_CLIENT -q "explain actions = 1 select number from (select number from numbers(500000000) order by -number) limit 10" | grep -o "MergingSorted\|MergeSorting\|PartialSorting\|Limit 10" + +echo "-- filter push down --" +echo "> filter should be pushed down after aggregating" +$CLICKHOUSE_CLIENT -q " + explain select * from (select sum(x), y from ( + select number as x, number + 1 as y from numbers(10)) group by y + ) where y != 0 + settings enable_optimize_predicate_expression=0" | grep -o "Aggregating\|Filter" + +echo "> filter should be pushed down after aggregating, column after aggregation is const" +$CLICKHOUSE_CLIENT -q " + explain actions = 1 select *, y != 0 from (select sum(x), y from ( + select number as x, number + 1 as y from numbers(10)) group by y + ) where y != 0 + settings enable_optimize_predicate_expression=0" | grep -o "Aggregating\|Filter\|COLUMN Const(UInt8) -> notEquals(y, 0)" + +echo "> one condition of filter should be pushed down after aggregating, other condition is aliased" +$CLICKHOUSE_CLIENT -q " + explain actions = 1 select * from ( + select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y + ) where y != 0 and s != 4 + settings enable_optimize_predicate_expression=0" | + grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|ALIAS notEquals(s, 4) :: 1 -> and(notEquals(y, 0), notEquals(s, 4))" + +echo "> one condition of filter should be pushed down after aggregating, other condition is casted" +$CLICKHOUSE_CLIENT -q " + explain actions = 1 select * from ( + select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y + ) where y != 0 and s - 4 + settings enable_optimize_predicate_expression=0" | + grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION CAST(minus(s, 4) :: 1, UInt8 :: 3) -> and(notEquals(y, 0), minus(s, 4))" + +echo "> one condition of filter should be pushed down after aggregating, other two conditions are ANDed" +$CLICKHOUSE_CLIENT -q " + explain actions = 1 select * from ( + select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y + ) where y != 0 and s - 8 and s - 4 + settings enable_optimize_predicate_expression=0" | + grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION and(minus(s, 4) :: 2, minus(s, 8) :: 1) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4))" + +echo "> two conditions of filter should be pushed down after aggregating and ANDed, one condition is aliased" +$CLICKHOUSE_CLIENT -q " + explain optimize = 1, actions = 1 select * from ( + select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y + ) where y != 0 and s != 8 and y - 4 + settings enable_optimize_predicate_expression=0" | + grep -o "Aggregating\|Filter column\|Filter column: and(minus(y, 4), notEquals(y, 0))\|ALIAS notEquals(s, 8) :: 1 -> and(notEquals(y, 0), notEquals(s, 8), minus(y, 4))" From bbed905461d9e08adaa1303f71c228d2f62fff8c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 12 Feb 2021 18:20:54 +0300 Subject: [PATCH 051/149] Fix ActionsDAG::removeUnusedResult --- src/Interpreters/ActionsDAG.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 6a7dbc47230..255c774bbf9 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -490,6 +490,11 @@ bool ActionsDAG::removeUnusedResult(const std::string & column_name) if (col == child) return false; + /// Do not remove input if it was mentioned in index several times. + for (const auto * node : index) + if (col == node) + return false; + /// Remove from nodes and inputs. for (auto jt = nodes.begin(); jt != nodes.end(); ++jt) { From 90c7cf5a5293a32654e97cc8b4f8cb1d2090d3be Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 12 Feb 2021 18:24:31 +0300 Subject: [PATCH 052/149] Push down for ArrayJoin --- .../Optimizations/filterPushDown.cpp | 116 ++++++++++++------ 1 file changed, 80 insertions(+), 36 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index ec005e59729..98e923249f3 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -3,7 +3,9 @@ #include #include #include +#include #include +#include #include #include @@ -15,6 +17,68 @@ namespace DB::ErrorCodes namespace DB::QueryPlanOptimizations { +static size_t tryAddNewFilterStep( + QueryPlan::Node * parent_node, + QueryPlan::Nodes & nodes, + const Names & allowed_inputs) +{ + QueryPlan::Node * child_node = parent_node->children.front(); + + auto & parent = parent_node->step; + auto & child = child_node->step; + + auto * filter = static_cast(parent.get()); + const auto & expression = filter->getExpression(); + const auto & filter_column_name = filter->getFilterColumnName(); + bool removes_filter = filter->removesFilterColumn(); + + // std::cerr << "Filter: \n" << expression->dumpDAG() << std::endl; + + auto split_filter = expression->splitActionsForFilter(filter_column_name, removes_filter, allowed_inputs); + if (!split_filter) + return 0; + + // std::cerr << "===============\n" << expression->dumpDAG() << std::endl; + // std::cerr << "---------------\n" << split_filter->dumpDAG() << std::endl; + + const auto & index = expression->getIndex(); + auto it = index.begin(); + for (; it != index.end(); ++it) + if ((*it)->result_name == filter_column_name) + break; + + if (it == expression->getIndex().end()) + { + if (!removes_filter) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}", + filter_column_name, expression->dumpDAG()); + + std::cerr << "replacing to expr because filter " << filter_column_name << " was removed\n"; + parent = std::make_unique(child->getOutputStream(), expression); + } + else if ((*it)->column && isColumnConst(*(*it)->column)) + { + std::cerr << "replacing to expr because filter is const\n"; + parent = std::make_unique(child->getOutputStream(), expression); + } + + /// Add new Filter step before Aggregating. + /// Expression/Filter -> Aggregating -> Something + auto & node = nodes.emplace_back(); + node.children.swap(child_node->children); + child_node->children.emplace_back(&node); + /// Expression/Filter -> Aggregating -> Filter -> Something + + /// New filter column is added to the end. + auto split_filter_column_name = (*split_filter->getIndex().rbegin())->result_name; + node.step = std::make_unique( + node.children.at(0)->step->getOutputStream(), + std::move(split_filter), std::move(split_filter_column_name), true); + + return 3; +} + size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) { if (parent_node->children.size() != 1) @@ -29,10 +93,6 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (!filter) return 0; - const auto & expression = filter->getExpression(); - const auto & filter_column_name = filter->getFilterColumnName(); - bool removes_filter = filter->removesFilterColumn(); - if (auto * aggregating = typeid_cast(child.get())) { const auto & params = aggregating->getParams(); @@ -42,42 +102,26 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes for (auto pos : params.keys) keys.push_back(params.src_header.getByPosition(pos).name); - // std::cerr << "Filter: \n" << expression->dumpDAG() << std::endl; - if (auto split_filter = expression->splitActionsForFilter(filter_column_name, removes_filter, keys)) - { - // std::cerr << "===============\n" << expression->dumpDAG() << std::endl; - // std::cerr << "---------------\n" << split_filter->dumpDAG() << std::endl; + if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, keys)) + return updated_steps; + } - auto it = expression->getIndex().find(filter_column_name); - if (it == expression->getIndex().end()) - { - if (!removes_filter) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}", - filter_column_name, expression->dumpDAG()); + if (auto * array_join = typeid_cast(child.get())) + { + const auto & array_join_actions = array_join->arrayJoin(); + const auto & keys = array_join_actions->columns; + const auto & array_join_header = array_join->getInputStreams().front().header; - parent = std::make_unique(child->getOutputStream(), expression); - } - else if ((*it)->column && isColumnConst(*(*it)->column)) - { - parent = std::make_unique(child->getOutputStream(), expression); - } + Names allowed_inputs; + for (const auto & column : array_join_header) + if (keys.count(column.name) == 0) + allowed_inputs.push_back(column.name); - /// Add new Filter step before Aggregating. - /// Expression/Filter -> Aggregating -> Something - auto & node = nodes.emplace_back(); - node.children.swap(child_node->children); - child_node->children.emplace_back(&node); - /// Expression/Filter -> Aggregating -> Filter -> Something + for (const auto & name : allowed_inputs) + std::cerr << name << std::endl; - /// New filter column is added to the end. - auto split_filter_column_name = (*split_filter->getIndex().rbegin())->result_name; - node.step = std::make_unique( - node.children.at(0)->step->getOutputStream(), - std::move(split_filter), std::move(split_filter_column_name), true); - - return 3; - } + if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs)) + return updated_steps; } return 0; From 5fd80555aa6241e01737c9a9083f663a8d7ed0eb Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 12 Feb 2021 19:06:18 +0300 Subject: [PATCH 053/149] Update test. --- .../queries/0_stateless/01655_plan_optimizations.reference | 4 ++++ tests/queries/0_stateless/01655_plan_optimizations.sh | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index 510224146ed..1e638829c74 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -38,3 +38,7 @@ Filter column ALIAS notEquals(s, 8) :: 1 -> and(notEquals(y, 0), notEquals(s, 8), minus(y, 4)) Aggregating Filter column: and(minus(y, 4), notEquals(y, 0)) +> filter is split, one part is filtered before ARRAY JOIN +Filter column: and(notEquals(y, 2), notEquals(x, 0)) +ARRAY JOIN x +Filter column: notEquals(y, 2) diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index ea76d15c648..ccd331df45e 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -55,3 +55,10 @@ $CLICKHOUSE_CLIENT -q " ) where y != 0 and s != 8 and y - 4 settings enable_optimize_predicate_expression=0" | grep -o "Aggregating\|Filter column\|Filter column: and(minus(y, 4), notEquals(y, 0))\|ALIAS notEquals(s, 8) :: 1 -> and(notEquals(y, 0), notEquals(s, 8), minus(y, 4))" + +echo "> filter is split, one part is filtered before ARRAY JOIN" +$CLICKHOUSE_CLIENT -q " + explain actions = 1 select x, y from ( + select range(number) as x, number + 1 as y from numbers(3) + ) array join x where y != 2 and x != 0" | + grep -o "Filter column: and(notEquals(y, 2), notEquals(x, 0))\|ARRAY JOIN x\|Filter column: notEquals(y, 2)" \ No newline at end of file From e34d6b0f37da637e2fa68fc05945c6a3e4e57e5a Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Mon, 15 Feb 2021 21:25:10 +0300 Subject: [PATCH 054/149] Update docs/ru/sql-reference/functions/date-time-functions.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/sql-reference/functions/date-time-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index d019c18a688..bb4c49e898e 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -706,7 +706,7 @@ SELECT FROM_UNIXTIME(423543535); └──────────────────────────┘ ``` -В случае, когда есть два аргумента: первый типа [Integer](../../sql-reference/data-types/int-uint.md) или [DateTime](../../sql-reference/data-types/datetime.md), а второй является строкой постоянного формата — функция работает таким же образом, как [formatDateTime](#formatdatetime), и возвращает значение типа [String](../../sql-reference/data-types/string.md#string). +В случае, когда есть два аргумента: первый типа [Integer](../../sql-reference/data-types/int-uint.md) или [DateTime](../../sql-reference/data-types/datetime.md), а второй является строкой постоянного формата — функция работает также, как [formatDateTime](#formatdatetime), и возвращает значение типа [String](../../sql-reference/data-types/string.md#string). Запрос: From e8889463a6351316c1d0ae1cc0b99c8424c767d5 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Mon, 15 Feb 2021 21:25:31 +0300 Subject: [PATCH 055/149] Update docs/ru/operations/utilities/clickhouse-local.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/operations/utilities/clickhouse-local.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/utilities/clickhouse-local.md b/docs/ru/operations/utilities/clickhouse-local.md index 8ecbbfcce8c..15d069c9acf 100644 --- a/docs/ru/operations/utilities/clickhouse-local.md +++ b/docs/ru/operations/utilities/clickhouse-local.md @@ -77,7 +77,7 @@ $ clickhouse-local --query " 1 2 ``` -Объём оперативной памяти, занимаемой пользователями (Unix): +Объём оперативной памяти, занимаемой процессами, которые запустил пользователь (Unix): Запрос: From a72ef6f026eb955fe43ba9c2d07e3ad6e6646983 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 16 Feb 2021 11:26:24 +0300 Subject: [PATCH 056/149] Fix number of threads for scalar subqueries and subqueries for index. --- .../ExecuteScalarSubqueriesVisitor.cpp | 16 ++++++++++++---- src/Interpreters/ExpressionAnalyzer.cpp | 7 +++++-- .../Executors/PullingAsyncPipelineExecutor.cpp | 7 ++++++- src/Processors/Formats/LazyOutputFormat.cpp | 9 +++++++-- 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index e6061aabe94..7ee7bb1f301 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -21,7 +21,7 @@ #include -#include +#include namespace DB { @@ -122,8 +122,10 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr try { - PullingPipelineExecutor executor(io.pipeline); - if (!executor.pull(block)) + PullingAsyncPipelineExecutor executor(io.pipeline); + while (block.rows() == 0 && executor.pull(block)); + + if (block.rows() == 0) { /// Interpret subquery with empty result as Null literal auto ast_new = std::make_unique(Null()); @@ -132,7 +134,13 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr return; } - if (block.rows() != 1 || executor.pull(block)) + if (block.rows() != 1) + throw Exception("Scalar subquery returned more than one row", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY); + + Block tmp_block; + while (tmp_block.rows() == 0 && executor.pull(tmp_block)); + + if (tmp_block.rows() != 0) throw Exception("Scalar subquery returned more than one row", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY); } catch (const Exception & e) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 3f65a6f3f58..cea056d6a21 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -54,7 +54,7 @@ #include #include -#include +#include #include namespace DB @@ -321,7 +321,7 @@ void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr auto interpreter_subquery = interpretSubquery(subquery_or_table_name, context, {}, query_options); auto io = interpreter_subquery->execute(); - PullingPipelineExecutor executor(io.pipeline); + PullingAsyncPipelineExecutor executor(io.pipeline); SetPtr set = std::make_shared(settings.size_limits_for_set, true, context.getSettingsRef().transform_null_in); set->setHeader(executor.getHeader()); @@ -329,6 +329,9 @@ void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr Block block; while (executor.pull(block)) { + if (block.rows() == 0) + continue; + /// If the limits have been exceeded, give up and let the default subquery processing actions take place. if (!set->insertFromBlock(block)) return; diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index e4bcf6dc0ab..21741d30dfa 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -133,7 +133,12 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds) } chunk.clear(); - data->finish_event.tryWait(milliseconds); + + if (milliseconds) + data->finish_event.tryWait(milliseconds); + else + data->finish_event.wait(); + return true; } diff --git a/src/Processors/Formats/LazyOutputFormat.cpp b/src/Processors/Formats/LazyOutputFormat.cpp index 46287d1cce9..0663ff28f84 100644 --- a/src/Processors/Formats/LazyOutputFormat.cpp +++ b/src/Processors/Formats/LazyOutputFormat.cpp @@ -16,8 +16,13 @@ Chunk LazyOutputFormat::getChunk(UInt64 milliseconds) } Chunk chunk; - if (!queue.tryPop(chunk, milliseconds)) - return {}; + if (milliseconds) + { + if (!queue.tryPop(chunk, milliseconds)) + return {}; + } + else + queue.pop(chunk); if (chunk) info.update(chunk.getNumRows(), chunk.allocatedBytes()); From 10f1432c5cb1dc77c0c31cd960a275480fa380dd Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 16 Feb 2021 11:31:17 +0300 Subject: [PATCH 057/149] Added perftest. --- tests/performance/subqueries.xml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 tests/performance/subqueries.xml diff --git a/tests/performance/subqueries.xml b/tests/performance/subqueries.xml new file mode 100644 index 00000000000..f1481a78c7e --- /dev/null +++ b/tests/performance/subqueries.xml @@ -0,0 +1,7 @@ + + create table tab (a UInt32, b UInt32) engine = MergeTree order by (a, b) + insert into tab values (1, 1) + select a, b from tab where (a, b) in (select toUInt32(number) as x, toUInt32(sleep(0.1) + 1) from numbers_mt(16)) settings max_threads = 2, max_block_size = 4 + select a, b from tab where (1, 1) = (select min(toUInt32(number + 1)) as x, min(toUInt32(sleep(0.1) + 1)) from numbers_mt(16)) settings max_threads = 2, max_block_size = 4 + DROP TABLE tab + \ No newline at end of file From a1cd07b9a00ff0ea4bc4e98d03af9b5046e6854f Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 16 Feb 2021 22:24:49 +0300 Subject: [PATCH 058/149] Update docs/ru/sql-reference/aggregate-functions/parametric-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- .../sql-reference/aggregate-functions/parametric-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md index 2c367882714..d96f7a13bcc 100644 --- a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md @@ -239,7 +239,7 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN) **Параметры** -- `window` — ширина скользящего окна по времени. Единица измерения зависит от `timestamp` и может варьироваться. Определяется выражением `timestamp от cond2 <= timestamp от cond1 + window`. +- `window` — ширина скользящего окна по времени. Единица измерения зависит от `timestamp` и может варьироваться. Должно соблюдаться условие `timestamp события cond2 <= timestamp события cond1 + window`. - `mode` - необязательный параметр. Если установлено значение `'strict'`, то функция `windowFunnel()` применяет условия только для уникальных значений. - `timestamp` — имя столбца, содержащего временные отметки. [Date](../../sql-reference/aggregate-functions/parametric-functions.md), [DateTime](../../sql-reference/aggregate-functions/parametric-functions.md#data_type-datetime) и другие параметры с типом `Integer`. В случае хранения меток времени в столбцах с типом `UInt64`, максимально допустимое значение соответствует ограничению для типа `Int64`, т.е. равно `2^63-1`. - `cond` — условия или данные, описывающие цепочку событий. [UInt8](../../sql-reference/aggregate-functions/parametric-functions.md). From b8be90cdf9c8505714cfaeb94ac6ffa296a0778d Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 16 Feb 2021 22:32:59 +0300 Subject: [PATCH 059/149] Update docs/ru/sql-reference/functions/encryption-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/encryption-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md index 91b26a2415d..adf084a6b21 100644 --- a/docs/ru/sql-reference/functions/encryption-functions.md +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -110,7 +110,7 @@ SELECT comment, hex(secret) FROM encryption_test WHERE comment LIKE '%gcm%'; Совместима с шифрованием myqsl, результат может быть расшифрован функцией [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt). -При одинаковых вводных зашифрованный текст будет совпадать с результатом `encrypt`. Однако, когда `key` или `iv` длиннее, чем должны быть, `aes_encrypt_mysql` будет работать аналогично MySQL `aes_encrypt`: свернет ключ и проигнорирует лишнюю часть `iv`. +При одинаковых входящих значениях зашифрованный текст будет совпадать с результатом, возвращаемым функцией `encrypt`. Однако если `key` или `iv` длиннее, чем должны быть, `aes_encrypt_mysql` будет работать аналогично функции `aes_encrypt` в MySQL: свернет ключ и проигнорирует лишнюю часть `iv`. Функция поддерживает шифрофание данных следующими режимами: From a642dbce46f1734b1893f6528ad591641edbdc70 Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 16 Feb 2021 22:33:19 +0300 Subject: [PATCH 060/149] Update docs/ru/sql-reference/functions/encryption-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/encryption-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md index adf084a6b21..0e8e7d2a33a 100644 --- a/docs/ru/sql-reference/functions/encryption-functions.md +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -140,7 +140,7 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv]) **Примеры** -При одинаковых вводных результаты шифрования `encrypt` и `aes_encrypt_mysql` будут совпадать. +При одинаковых входящих значениях результаты шифрования у функций `encrypt` и `aes_encrypt_mysql` совпадают. Запрос: From 22ab639287ea47b9a2dba80982170e15c9edd3a0 Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 16 Feb 2021 22:33:32 +0300 Subject: [PATCH 061/149] Update docs/ru/sql-reference/functions/encryption-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/encryption-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md index 0e8e7d2a33a..a72866121c4 100644 --- a/docs/ru/sql-reference/functions/encryption-functions.md +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -156,7 +156,7 @@ SELECT encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', ' └───────────────────┘ ``` -Но `encrypt` генерирует исключение, когда `key` или `iv` длиннее, чем нужно: +Функция `encrypt` генерирует исключение, если `key` или `iv` длиннее чем нужно: Запрос: From d213039fe58fa8efe4340fdd4e3b14564139c71f Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 16 Feb 2021 22:33:57 +0300 Subject: [PATCH 062/149] Update docs/ru/sql-reference/functions/encryption-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/encryption-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md index a72866121c4..90aa3268922 100644 --- a/docs/ru/sql-reference/functions/encryption-functions.md +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -171,7 +171,7 @@ Received exception from server (version 21.1.2): Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123'). ``` -Тогда как `aes_encrypt_mysql` возвращает совместимый с MySQL вывод: +Однако функция `aes_encrypt_mysql` в аналогичном случае возвращает результат, который может быть обработан MySQL: Запрос: From 66d6b7a3a088be7e72cab7ced29b1c7fa5c4f418 Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 16 Feb 2021 22:34:33 +0300 Subject: [PATCH 063/149] Update docs/ru/sql-reference/functions/encryption-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/encryption-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md index 90aa3268922..f75e7bcc1a3 100644 --- a/docs/ru/sql-reference/functions/encryption-functions.md +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -220,7 +220,7 @@ mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviv ## decrypt {#decrypt} -Функция расшифровывает зашифрованный текст в обычный следующими режимами: +Функция расшифровывает зашифрованный текст и может работать в следующих режимах: - aes-128-ecb, aes-192-ecb, aes-256-ecb - aes-128-cbc, aes-192-cbc, aes-256-cbc From 5edba428658e60f9ee0be3681e17b638e8f2d254 Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 16 Feb 2021 22:34:43 +0300 Subject: [PATCH 064/149] Update docs/ru/sql-reference/functions/encryption-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/encryption-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md index f75e7bcc1a3..c4e0968d6f9 100644 --- a/docs/ru/sql-reference/functions/encryption-functions.md +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -203,7 +203,7 @@ SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '1234567891012131415161 └──────────────┘ ``` -Это совпадает с тем, что выводит MySQL с такими же вводными: +Это совпадает с результатом, возвращаемым MySQL при таких же входящих значениях: ``` sql mysql> SET block_encryption_mode='aes-256-cfb128'; From a26f2b77cb84e5d5629a706f42bd5a0c8214c694 Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 16 Feb 2021 22:35:07 +0300 Subject: [PATCH 065/149] Update docs/ru/sql-reference/functions/encryption-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/encryption-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md index c4e0968d6f9..92e8d62faca 100644 --- a/docs/ru/sql-reference/functions/encryption-functions.md +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -250,7 +250,7 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad]) **Примеры** -Используется таблица из [encrypt](#encrypt). +Рассмотрим таблицу из примера для функции [encrypt](#encrypt). Запрос: From 7a910d38a10c92f1aae4d13e5de34a73e10e978e Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 16 Feb 2021 22:35:12 +0300 Subject: [PATCH 066/149] Update docs/ru/sql-reference/functions/encryption-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/encryption-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md index 92e8d62faca..faddf314fe7 100644 --- a/docs/ru/sql-reference/functions/encryption-functions.md +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -278,7 +278,7 @@ SELECT comment, hex(secret) FROM encryption_test; Запрос: ``` sql -SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920212') as plaintext FROM encryption_test +SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920212') as plaintext FROM encryption_test; ``` Результат: From 07795335cecc9352b7d4164bbd6c63599d19bda1 Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 16 Feb 2021 22:35:23 +0300 Subject: [PATCH 067/149] Update docs/ru/sql-reference/functions/encryption-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/encryption-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md index faddf314fe7..0f46f3c1fd5 100644 --- a/docs/ru/sql-reference/functions/encryption-functions.md +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -293,7 +293,7 @@ SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920 └─────────────────────────────────────┴───────────┘ ``` -Обратите внимание, что только часть данных была расшифрована, а остальное является бессмыслицей, как как `mode`, `key`, или `iv` были другими во время шифрования. +Обратите внимание, что только часть данных была расшифрована верно. Оставшаяся часть расшифрована некорректно, так как при шифровании использовались другие значения `mode`, `key`, или `iv`. ## aes_decrypt_mysql {#aes_decrypt_mysql} From 579f8a95bcaa804b4264e8047d68474af5ef3ec6 Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 16 Feb 2021 22:35:43 +0300 Subject: [PATCH 068/149] Update docs/ru/sql-reference/functions/encryption-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/encryption-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md index 0f46f3c1fd5..6cf5b520f23 100644 --- a/docs/ru/sql-reference/functions/encryption-functions.md +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -299,7 +299,7 @@ SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920 Совместима с шифрованием myqsl и может расшифровать данные, зашифрованные функцией [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt). -При одинаковых вводных расшифрованный текст будет совпадать с результатом `decrypt`. Однако, когда `key` или `iv` длиннее, чем должны быть, `aes_decrypt_mysql` будет работать аналогично MySQL `aes_decrypt`: свернет ключ и проигнорирует лишнюю часть `iv`. +При одинаковых входящих значениях расшифрованный текст будет совпадать с результатом, возвращаемым функцией `decrypt`. Однако если `key` или `iv` длиннее, чем должны быть, `aes_decrypt_mysql` будет работать аналогично функции `aes_decrypt` в MySQL: свернет ключ и проигнорирует лишнюю часть `iv`. Функция поддерживает расшифровку данных следующими режимами: From b82e564076203733a292d53ebcf843ad0289ace9 Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 16 Feb 2021 22:35:48 +0300 Subject: [PATCH 069/149] Update docs/ru/sql-reference/functions/encryption-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/encryption-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md index 6cf5b520f23..04a74fe8107 100644 --- a/docs/ru/sql-reference/functions/encryption-functions.md +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -301,7 +301,7 @@ SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920 При одинаковых входящих значениях расшифрованный текст будет совпадать с результатом, возвращаемым функцией `decrypt`. Однако если `key` или `iv` длиннее, чем должны быть, `aes_decrypt_mysql` будет работать аналогично функции `aes_decrypt` в MySQL: свернет ключ и проигнорирует лишнюю часть `iv`. -Функция поддерживает расшифровку данных следующими режимами: +Функция поддерживает расшифровку данных в следующих режимах: - aes-128-ecb, aes-192-ecb, aes-256-ecb - aes-128-cbc, aes-192-cbc, aes-256-cbc From c10485d21a29ab7e1ec405ef19fad35ca306185a Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 16 Feb 2021 22:35:55 +0300 Subject: [PATCH 070/149] Update docs/ru/sql-reference/functions/encryption-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/encryption-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md index 04a74fe8107..3c2f9e3e682 100644 --- a/docs/ru/sql-reference/functions/encryption-functions.md +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -348,7 +348,7 @@ mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviv Запрос: ``` sql -SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext +SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext; ``` Результат: From 236b9cfeff06a9ac5115736041586a9ae119d761 Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 16 Feb 2021 22:36:01 +0300 Subject: [PATCH 071/149] Update docs/ru/sql-reference/functions/encryption-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/encryption-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md index 3c2f9e3e682..5406112624f 100644 --- a/docs/ru/sql-reference/functions/encryption-functions.md +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -329,7 +329,7 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv]) **Примеры** -Расшифруем данные, которые до этого зашифровали с помощью MySQL: +Расшифруем данные, которые до этого были зашифрованы в MySQL: ``` sql From f2c7c38c18b817bf101769d4d69e1ab78075778e Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 16 Feb 2021 22:38:20 +0300 Subject: [PATCH 072/149] Update docs/ru/sql-reference/functions/encryption-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/encryption-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md index 5406112624f..e2c5560e4f6 100644 --- a/docs/ru/sql-reference/functions/encryption-functions.md +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -11,7 +11,7 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438 \u0434\u043b\u044f \u0448 Длина инициализирующего вектора всегда 16 байт (лишнии байты игнорируются). -Обратите внимание, что до версии Clickhouse 21.1 эти функции работают медленно. +Обратите внимание, что до версии Clickhouse 21.1 эти функции работали медленно. ## encrypt {#encrypt} From 2858151d09b70b018a9626a2c4efda6d1535ec8b Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Wed, 17 Feb 2021 00:25:34 +0300 Subject: [PATCH 073/149] Update kafka.md --- docs/ru/engines/table-engines/integrations/kafka.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/ru/engines/table-engines/integrations/kafka.md b/docs/ru/engines/table-engines/integrations/kafka.md index 2b9dfcd49da..a1528edfd1d 100644 --- a/docs/ru/engines/table-engines/integrations/kafka.md +++ b/docs/ru/engines/table-engines/integrations/kafka.md @@ -47,7 +47,9 @@ SETTINGS - `kafka_row_delimiter` — символ-разделитель записей (строк), которым завершается сообщение. - `kafka_schema` — опциональный параметр, необходимый, если используется формат, требующий определения схемы. Например, [Cap’n Proto](https://capnproto.org/) требует путь к файлу со схемой и название корневого объекта `schema.capnp:Message`. - `kafka_num_consumers` — количество потребителей (consumer) на таблицу. По умолчанию: `1`. Укажите больше потребителей, если пропускная способность одного потребителя недостаточна. Общее число потребителей не должно превышать количество партиций в топике, так как на одну партицию может быть назначено не более одного потребителя. +- `kafka_max_block_size` — максимальный размер пачек (в сообщениях) для poll (по умолчанию `max_block_size`). - `kafka_skip_broken_messages` — максимальное количество некорректных сообщений в блоке. Если `kafka_skip_broken_messages = N`, то движок отбрасывает `N` сообщений Кафки, которые не получилось обработать. Одно сообщение в точности соответствует одной записи (строке). Значение по умолчанию – 0. +- `kafka_commit_every_batch` — фиксирует каждый обработанный и потребленный пакет вместо отдельной фиксации после записи целого блока (по умолчанию `0`). - `kafka_thread_per_consumer` — снабжает каждого потребителя независимым потоком (по умолчанию `0`). При включенном состоянии каждый потребитель сбрасывает данные независимо и параллельно (иначе — строки от нескольких потребителей склеиваются в один блок). Примеры From 23754e46e8a8c54ff00537546908fa629f8ece71 Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Wed, 17 Feb 2021 01:41:47 +0300 Subject: [PATCH 074/149] Update docs/ru/engines/table-engines/integrations/kafka.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/engines/table-engines/integrations/kafka.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/engines/table-engines/integrations/kafka.md b/docs/ru/engines/table-engines/integrations/kafka.md index a1528edfd1d..5a6971b1ae6 100644 --- a/docs/ru/engines/table-engines/integrations/kafka.md +++ b/docs/ru/engines/table-engines/integrations/kafka.md @@ -49,8 +49,8 @@ SETTINGS - `kafka_num_consumers` — количество потребителей (consumer) на таблицу. По умолчанию: `1`. Укажите больше потребителей, если пропускная способность одного потребителя недостаточна. Общее число потребителей не должно превышать количество партиций в топике, так как на одну партицию может быть назначено не более одного потребителя. - `kafka_max_block_size` — максимальный размер пачек (в сообщениях) для poll (по умолчанию `max_block_size`). - `kafka_skip_broken_messages` — максимальное количество некорректных сообщений в блоке. Если `kafka_skip_broken_messages = N`, то движок отбрасывает `N` сообщений Кафки, которые не получилось обработать. Одно сообщение в точности соответствует одной записи (строке). Значение по умолчанию – 0. -- `kafka_commit_every_batch` — фиксирует каждый обработанный и потребленный пакет вместо отдельной фиксации после записи целого блока (по умолчанию `0`). -- `kafka_thread_per_consumer` — снабжает каждого потребителя независимым потоком (по умолчанию `0`). При включенном состоянии каждый потребитель сбрасывает данные независимо и параллельно (иначе — строки от нескольких потребителей склеиваются в один блок). +- `kafka_commit_every_batch` — включает или отключает режим записи каждой принятой и обработанной пачки по отдельности вместо единой записи целого блока (по умолчанию `0`). +- `kafka_thread_per_consumer` — включает или отключает предоставление отдельного потока каждому потребителю (по умолчанию `0`). При включенном режиме каждый потребитель сбрасывает данные независимо и параллельно, при отключённом — строки с данными от нескольких потребителей собираются в один блок. Примеры From e5cef576e589f4307f35074cf45e8dbb08801c65 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 17 Feb 2021 12:39:40 +0300 Subject: [PATCH 075/149] Update subqueries.xml --- tests/performance/subqueries.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/performance/subqueries.xml b/tests/performance/subqueries.xml index f1481a78c7e..0d41099841b 100644 --- a/tests/performance/subqueries.xml +++ b/tests/performance/subqueries.xml @@ -1,7 +1,7 @@ - create table tab (a UInt32, b UInt32) engine = MergeTree order by (a, b) + create table tab (a UInt32, b UInt32) engine = MergeTree order by (a, b) insert into tab values (1, 1) select a, b from tab where (a, b) in (select toUInt32(number) as x, toUInt32(sleep(0.1) + 1) from numbers_mt(16)) settings max_threads = 2, max_block_size = 4 select a, b from tab where (1, 1) = (select min(toUInt32(number + 1)) as x, min(toUInt32(sleep(0.1) + 1)) from numbers_mt(16)) settings max_threads = 2, max_block_size = 4 DROP TABLE tab - \ No newline at end of file + From a8647096ed96fb348aea73edf54b5e7bedea4284 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 17 Feb 2021 13:27:47 +0300 Subject: [PATCH 076/149] Try fix tests. --- src/Interpreters/ActionsDAG.cpp | 20 +++++++++++++------ .../Optimizations/filterPushDown.cpp | 4 ++-- .../QueryPlan/Optimizations/optimizeTree.cpp | 8 ++++++++ 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index e9e9d1628a8..691905bed27 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1245,14 +1245,14 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, { struct Frame { - const Node * node; + Node * node; bool is_predicate = false; size_t next_child_to_visit = 0; size_t num_allowed_children = 0; }; std::stack stack; - std::unordered_set visited_nodes; + std::unordered_set visited_nodes; stack.push(Frame{.node = *it, .is_predicate = true}); visited_nodes.insert(*it); @@ -1290,12 +1290,12 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, else if (is_conjunction) { for (auto * child : cur.node->children) - { if (allowed_nodes.count(child)) selected_predicates.insert(child); - else - other_predicates.insert(child); - } + } + else if (cur.is_predicate) + { + other_predicates.insert(cur.node); } stack.pop(); @@ -1311,6 +1311,14 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, return nullptr; } + // std::cerr << "************* Selectecd predicates\n"; + // for (const auto * p : selected_predicates) + // std::cerr << p->result_name << std::endl; + + // std::cerr << "............. Other predicates\n"; + // for (const auto * p : other_predicates) + // std::cerr << p->result_name << std::endl; + auto actions = cloneEmpty(); actions->settings.project_input = false; diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 98e923249f3..39f24a32b45 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -117,8 +117,8 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (keys.count(column.name) == 0) allowed_inputs.push_back(column.name); - for (const auto & name : allowed_inputs) - std::cerr << name << std::endl; + // for (const auto & name : allowed_inputs) + // std::cerr << name << std::endl; if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs)) return updated_steps; diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index e5ccc173ed8..cc81a7f39fc 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -23,6 +23,9 @@ void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes) std::stack stack; stack.push(Frame{.node = &root}); + size_t max_optimizations_to_apply = 0; + size_t total_applied_optimizations = 0; + while (!stack.empty()) { auto & frame = stack.top(); @@ -54,8 +57,13 @@ void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes) if (!optimization.apply) continue; + if (max_optimizations_to_apply && max_optimizations_to_apply < total_applied_optimizations) + continue; + /// Try to apply optimization. auto update_depth = optimization.apply(frame.node, nodes); + if (update_depth) + ++total_applied_optimizations; max_update_depth = std::max(max_update_depth, update_depth); } From 6522bfc402260b2b4edfd4c2f0ab55a662296e63 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 17 Feb 2021 19:54:11 +0300 Subject: [PATCH 077/149] Support for DIstinct, sorting steps. --- src/Interpreters/ActionsDAG.cpp | 2 +- src/Processors/QueryPlan/CreatingSetsStep.h | 2 +- src/Processors/QueryPlan/CubeStep.cpp | 5 ++ src/Processors/QueryPlan/CubeStep.h | 2 + src/Processors/QueryPlan/FillingStep.h | 2 + .../Optimizations/filterPushDown.cpp | 68 +++++++++++++++++-- 6 files changed, 74 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 691905bed27..8b6013a4365 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1389,7 +1389,7 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, for (const auto * predicate : selected_predicates) args.emplace_back(nodes_mapping[predicate]); - result_predicate = &actions->addFunction(func_builder_and, args, {}, true); + result_predicate = &actions->addFunction(func_builder_and, args, {}, true, false); } actions->index.insert(result_predicate); diff --git a/src/Processors/QueryPlan/CreatingSetsStep.h b/src/Processors/QueryPlan/CreatingSetsStep.h index ec13ab2052e..97821cb63d3 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.h +++ b/src/Processors/QueryPlan/CreatingSetsStep.h @@ -34,7 +34,7 @@ private: class CreatingSetsStep : public IQueryPlanStep { public: - CreatingSetsStep(DataStreams input_streams_); + explicit CreatingSetsStep(DataStreams input_streams_); String getName() const override { return "CreatingSets"; } diff --git a/src/Processors/QueryPlan/CubeStep.cpp b/src/Processors/QueryPlan/CubeStep.cpp index de8bb2b3d43..6a0ec33402b 100644 --- a/src/Processors/QueryPlan/CubeStep.cpp +++ b/src/Processors/QueryPlan/CubeStep.cpp @@ -43,4 +43,9 @@ void CubeStep::transformPipeline(QueryPipeline & pipeline) }); } +const Aggregator::Params & CubeStep::getParams() const +{ + return params->params; +} + } diff --git a/src/Processors/QueryPlan/CubeStep.h b/src/Processors/QueryPlan/CubeStep.h index 707f62ce7d6..f67a03dc7e2 100644 --- a/src/Processors/QueryPlan/CubeStep.h +++ b/src/Processors/QueryPlan/CubeStep.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include namespace DB { @@ -18,6 +19,7 @@ public: void transformPipeline(QueryPipeline & pipeline) override; + const Aggregator::Params & getParams() const; private: AggregatingTransformParamsPtr params; }; diff --git a/src/Processors/QueryPlan/FillingStep.h b/src/Processors/QueryPlan/FillingStep.h index 85736464a6c..c8d1f74c6ca 100644 --- a/src/Processors/QueryPlan/FillingStep.h +++ b/src/Processors/QueryPlan/FillingStep.h @@ -17,6 +17,8 @@ public: void describeActions(FormatSettings & settings) const override; + const SortDescription & getSortDescription() const { return sort_description; } + private: SortDescription sort_description; }; diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 39f24a32b45..74c4fa6f329 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -4,9 +4,15 @@ #include #include #include +#include #include #include #include +#include "Processors/QueryPlan/FinishSortingStep.h" +#include "Processors/QueryPlan/MergeSortingStep.h" +#include "Processors/QueryPlan/MergingSortedStep.h" +#include "Processors/QueryPlan/PartialSortingStep.h" +#include #include namespace DB::ErrorCodes @@ -79,6 +85,30 @@ static size_t tryAddNewFilterStep( return 3; } +static Names getAggregatinKeys(const Aggregator::Params & params) +{ + Names keys; + keys.reserve(params.keys.size()); + for (auto pos : params.keys) + keys.push_back(params.src_header.getByPosition(pos).name); + + return keys; +} + +// static NameSet getColumnNamesFromSortDescription(const SortDescription & sort_desc, const Block & header) +// { +// NameSet names; +// for (const auto & column : sort_desc) +// { +// if (!column.column_name.empty()) +// names.insert(column.column_name); +// else +// names.insert(header.safeGetByPosition(column.column_number).name); +// } + +// return names; +// } + size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) { if (parent_node->children.size() != 1) @@ -96,11 +126,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (auto * aggregating = typeid_cast(child.get())) { const auto & params = aggregating->getParams(); - - Names keys; - keys.reserve(params.keys.size()); - for (auto pos : params.keys) - keys.push_back(params.src_header.getByPosition(pos).name); + Names keys = getAggregatinKeys(params); if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, keys)) return updated_steps; @@ -124,6 +150,38 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes return updated_steps; } + if (auto * distinct = typeid_cast(child.get())) + { + Names allowed_inputs = distinct->getOutputStream().header.getNames(); + if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs)) + return updated_steps; + } + + /// TODO. + /// We can filter earlier if expression does not depend on WITH FILL columns. + /// But we cannot just push down condition, because other column may be filled with defaults. + /// + /// It is possible to filter columns before and after WITH FILL, but such change is not idempotent. + /// So, appliying this to pair (Filter -> Filling) several times will create several similar filters. + // if (auto * filling = typeid_cast(child.get())) + // { + // } + + /// Same reason for Cube + // if (auto * cube = typeid_cast(child.get())) + // { + // } + + if (typeid_cast(child.get()) + || typeid_cast(child.get()) + || typeid_cast(child.get()) + || typeid_cast(child.get())) + { + Names allowed_inputs = child->getOutputStream().header.getNames(); + if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs)) + return updated_steps; + } + return 0; } From e5b9c42860cce08b0b94f7863dbeb6f38b066d83 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 17 Feb 2021 19:54:37 +0300 Subject: [PATCH 078/149] Update test. --- .../01655_plan_optimizations.reference | 70 +++++++++++++++ .../0_stateless/01655_plan_optimizations.sh | 85 +++++++++++++++++-- 2 files changed, 149 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index 1e638829c74..7bc75dc0bf6 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -13,32 +13,102 @@ Limit 10 > filter should be pushed down after aggregating Aggregating Filter +0 1 +1 2 +2 3 +3 4 +4 5 +5 6 +6 7 +7 8 +8 9 +9 10 > filter should be pushed down after aggregating, column after aggregation is const COLUMN Const(UInt8) -> notEquals(y, 0) Aggregating Filter Filter +0 1 1 +1 2 1 +2 3 1 +3 4 1 +4 5 1 +5 6 1 +6 7 1 +7 8 1 +8 9 1 +9 10 1 > one condition of filter should be pushed down after aggregating, other condition is aliased Filter column ALIAS notEquals(s, 4) :: 1 -> and(notEquals(y, 0), notEquals(s, 4)) Aggregating Filter column: notEquals(y, 0) +0 1 +1 2 +2 3 +3 4 +5 6 +6 7 +7 8 +8 9 +9 10 > one condition of filter should be pushed down after aggregating, other condition is casted Filter column FUNCTION CAST(minus(s, 4) :: 1, UInt8 :: 3) -> and(notEquals(y, 0), minus(s, 4)) Aggregating Filter column: notEquals(y, 0) +0 1 +1 2 +2 3 +3 4 +5 6 +6 7 +7 8 +8 9 +9 10 > one condition of filter should be pushed down after aggregating, other two conditions are ANDed Filter column FUNCTION and(minus(s, 4) :: 2, minus(s, 8) :: 1) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4)) Aggregating Filter column: notEquals(y, 0) +0 1 +1 2 +2 3 +3 4 +5 6 +6 7 +7 8 +9 10 > two conditions of filter should be pushed down after aggregating and ANDed, one condition is aliased Filter column ALIAS notEquals(s, 8) :: 1 -> and(notEquals(y, 0), notEquals(s, 8), minus(y, 4)) Aggregating Filter column: and(minus(y, 4), notEquals(y, 0)) +0 1 +1 2 +2 3 +4 5 +5 6 +6 7 +7 8 +9 10 > filter is split, one part is filtered before ARRAY JOIN Filter column: and(notEquals(y, 2), notEquals(x, 0)) ARRAY JOIN x Filter column: notEquals(y, 2) +1 3 +> filter is pushed down before Distinct +Distinct +Distinct +Filter column: notEquals(y, 2) +0 0 +0 1 +1 0 +1 1 +> filter is pushed down before sorting steps +MergingSorted +MergeSorting +PartialSorting +Filter column: and(notEquals(x, 0), notEquals(y, 0)) +1 2 +1 1 diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index ccd331df45e..f770643fc41 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -16,49 +16,122 @@ $CLICKHOUSE_CLIENT -q " select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 settings enable_optimize_predicate_expression=0" | grep -o "Aggregating\|Filter" +$CLICKHOUSE_CLIENT -q " + select s, y from (select sum(x) as s, y from ( + select number as x, number + 1 as y from numbers(10)) group by y + ) where y != 0 order by s, y + settings enable_optimize_predicate_expression=0" echo "> filter should be pushed down after aggregating, column after aggregation is const" $CLICKHOUSE_CLIENT -q " - explain actions = 1 select *, y != 0 from (select sum(x), y from ( + explain actions = 1 select s, y, y != 0 from (select sum(x) as s, y from ( select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 settings enable_optimize_predicate_expression=0" | grep -o "Aggregating\|Filter\|COLUMN Const(UInt8) -> notEquals(y, 0)" +$CLICKHOUSE_CLIENT -q " + select s, y, y != 0 from (select sum(x) as s, y from ( + select number as x, number + 1 as y from numbers(10)) group by y + ) where y != 0 order by s, y, y != 0 + settings enable_optimize_predicate_expression=0" echo "> one condition of filter should be pushed down after aggregating, other condition is aliased" $CLICKHOUSE_CLIENT -q " - explain actions = 1 select * from ( + explain actions = 1 select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s != 4 settings enable_optimize_predicate_expression=0" | grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|ALIAS notEquals(s, 4) :: 1 -> and(notEquals(y, 0), notEquals(s, 4))" +$CLICKHOUSE_CLIENT -q " + select s, y from ( + select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y + ) where y != 0 and s != 4 order by s, y + settings enable_optimize_predicate_expression=0" echo "> one condition of filter should be pushed down after aggregating, other condition is casted" $CLICKHOUSE_CLIENT -q " - explain actions = 1 select * from ( + explain actions = 1 select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s - 4 settings enable_optimize_predicate_expression=0" | grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION CAST(minus(s, 4) :: 1, UInt8 :: 3) -> and(notEquals(y, 0), minus(s, 4))" +$CLICKHOUSE_CLIENT -q " + select s, y from ( + select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y + ) where y != 0 and s - 4 order by s, y + settings enable_optimize_predicate_expression=0" echo "> one condition of filter should be pushed down after aggregating, other two conditions are ANDed" $CLICKHOUSE_CLIENT -q " - explain actions = 1 select * from ( + explain actions = 1 select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s - 8 and s - 4 settings enable_optimize_predicate_expression=0" | grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION and(minus(s, 4) :: 2, minus(s, 8) :: 1) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4))" +$CLICKHOUSE_CLIENT -q " + select s, y from ( + select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y + ) where y != 0 and s - 8 and s - 4 order by s, y + settings enable_optimize_predicate_expression=0" echo "> two conditions of filter should be pushed down after aggregating and ANDed, one condition is aliased" $CLICKHOUSE_CLIENT -q " - explain optimize = 1, actions = 1 select * from ( + explain actions = 1 select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s != 8 and y - 4 settings enable_optimize_predicate_expression=0" | grep -o "Aggregating\|Filter column\|Filter column: and(minus(y, 4), notEquals(y, 0))\|ALIAS notEquals(s, 8) :: 1 -> and(notEquals(y, 0), notEquals(s, 8), minus(y, 4))" +$CLICKHOUSE_CLIENT -q " + select s, y from ( + select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y + ) where y != 0 and s != 8 and y - 4 order by s, y + settings enable_optimize_predicate_expression=0" echo "> filter is split, one part is filtered before ARRAY JOIN" $CLICKHOUSE_CLIENT -q " explain actions = 1 select x, y from ( select range(number) as x, number + 1 as y from numbers(3) ) array join x where y != 2 and x != 0" | - grep -o "Filter column: and(notEquals(y, 2), notEquals(x, 0))\|ARRAY JOIN x\|Filter column: notEquals(y, 2)" \ No newline at end of file + grep -o "Filter column: and(notEquals(y, 2), notEquals(x, 0))\|ARRAY JOIN x\|Filter column: notEquals(y, 2)" +$CLICKHOUSE_CLIENT -q " + select x, y from ( + select range(number) as x, number + 1 as y from numbers(3) + ) array join x where y != 2 and x != 0 order by x, y" + +# echo "> filter is split, one part is filtered before Aggregating and Cube" +# $CLICKHOUSE_CLIENT -q " +# explain actions = 1 select * from ( +# select sum(x) as s, x, y from (select number as x, number + 1 as y from numbers(10)) group by x, y with cube +# ) where y != 0 and s != 4 +# settings enable_optimize_predicate_expression=0" | +# grep -o "Cube\|Aggregating\|Filter column: notEquals(y, 0)" +# $CLICKHOUSE_CLIENT -q " +# select s, x, y from ( +# select sum(x) as s, x, y from (select number as x, number + 1 as y from numbers(10)) group by x, y with cube +# ) where y != 0 and s != 4 order by s, x, y +# settings enable_optimize_predicate_expression=0" + +echo "> filter is pushed down before Distinct" +$CLICKHOUSE_CLIENT -q " + explain actions = 1 select x, y from ( + select distinct x, y from (select number % 2 as x, number % 3 as y from numbers(10)) + ) where y != 2 + settings enable_optimize_predicate_expression=0" | + grep -o "Distinct\|Filter column: notEquals(y, 2)" +$CLICKHOUSE_CLIENT -q " + select x, y from ( + select distinct x, y from (select number % 2 as x, number % 3 as y from numbers(10)) + ) where y != 2 order by x, y + settings enable_optimize_predicate_expression=0" + +echo "> filter is pushed down before sorting steps" +$CLICKHOUSE_CLIENT -q " + explain actions = 1 select x, y from ( + select number % 2 as x, number % 3 as y from numbers(6) order by y desc + ) where x != 0 and y != 0 + settings enable_optimize_predicate_expression = 0" | + grep -o "MergingSorted\|MergeSorting\|PartialSorting\|Filter column: and(notEquals(x, 0), notEquals(y, 0))" +$CLICKHOUSE_CLIENT -q " + select x, y from ( + select number % 2 as x, number % 3 as y from numbers(6) order by y desc + ) where x != 0 and y != 0 + settings enable_optimize_predicate_expression = 0" From f6278ed429dc2231d68aa5179e63b3bb635d081a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 17 Feb 2021 19:56:17 +0300 Subject: [PATCH 079/149] Support for DIstinct, sorting steps. --- src/Processors/QueryPlan/Optimizations/filterPushDown.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 74c4fa6f329..02e1914504d 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -60,12 +60,12 @@ static size_t tryAddNewFilterStep( "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}", filter_column_name, expression->dumpDAG()); - std::cerr << "replacing to expr because filter " << filter_column_name << " was removed\n"; + // std::cerr << "replacing to expr because filter " << filter_column_name << " was removed\n"; parent = std::make_unique(child->getOutputStream(), expression); } else if ((*it)->column && isColumnConst(*(*it)->column)) { - std::cerr << "replacing to expr because filter is const\n"; + // std::cerr << "replacing to expr because filter is const\n"; parent = std::make_unique(child->getOutputStream(), expression); } From 56a5d1dafaa7cb08719277886000349490c47eda Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 17 Feb 2021 21:48:26 +0300 Subject: [PATCH 080/149] Skip stateful functions --- src/Processors/QueryPlan/Optimizations/filterPushDown.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 02e1914504d..456faeb72c2 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -123,6 +123,9 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (!filter) return 0; + if (filter->getExpression()->hasStatefulFunctions()) + return 0; + if (auto * aggregating = typeid_cast(child.get())) { const auto & params = aggregating->getParams(); From ec4dafaa5f914e99acc8cede5b60e85458eab134 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 17 Feb 2021 22:19:39 +0300 Subject: [PATCH 081/149] Fix build. --- src/CMakeLists.txt | 4 ++-- src/Processors/ya.make | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 86db7742c97..7a7f160dd81 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -100,8 +100,8 @@ endif() list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD}) list (APPEND clickhouse_common_io_headers ${CONFIG_VERSION} ${CONFIG_COMMON}) -list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp Functions/replicate.cpp) -list (APPEND dbms_headers Functions/IFunctionImpl.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h Functions/replicate.h) +list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp Functions/replicate.cpp Functions/FunctionsLogical.cpp) +list (APPEND dbms_headers Functions/IFunctionImpl.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h Functions/replicate.h Functions/FunctionsLogical.h) list (APPEND dbms_sources AggregateFunctions/AggregateFunctionFactory.cpp diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 34ff61d03c5..71ddd07f6a2 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -114,6 +114,7 @@ SRCS( QueryPlan/MergingFinal.cpp QueryPlan/MergingSortedStep.cpp QueryPlan/OffsetStep.cpp + QueryPlan/Optimizations/filterPushDown.cpp QueryPlan/Optimizations/liftUpArrayJoin.cpp QueryPlan/Optimizations/limitPushDown.cpp QueryPlan/Optimizations/mergeExpressions.cpp From 7231a97085b34d0ee6fa14a23a085a0bd60cc01f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 18 Feb 2021 14:15:16 +0300 Subject: [PATCH 082/149] Remove MaterializingStep --- .../QueryPlan/MaterializingStep.cpp | 39 ------------------- src/Processors/QueryPlan/MaterializingStep.h | 18 --------- src/Processors/ya.make | 1 - src/Storages/StorageView.cpp | 6 ++- 4 files changed, 4 insertions(+), 60 deletions(-) delete mode 100644 src/Processors/QueryPlan/MaterializingStep.cpp delete mode 100644 src/Processors/QueryPlan/MaterializingStep.h diff --git a/src/Processors/QueryPlan/MaterializingStep.cpp b/src/Processors/QueryPlan/MaterializingStep.cpp deleted file mode 100644 index f5313369020..00000000000 --- a/src/Processors/QueryPlan/MaterializingStep.cpp +++ /dev/null @@ -1,39 +0,0 @@ -#include -#include -#include - -#include - -namespace DB -{ - -static ITransformingStep::Traits getTraits() -{ - return ITransformingStep::Traits - { - { - .preserves_distinct_columns = true, - .returns_single_stream = false, - .preserves_number_of_streams = true, - .preserves_sorting = true, - }, - { - .preserves_number_of_rows = true, - } - }; -} - -MaterializingStep::MaterializingStep(const DataStream & input_stream_) - : ITransformingStep(input_stream_, materializeBlock(input_stream_.header), getTraits()) -{ -} - -void MaterializingStep::transformPipeline(QueryPipeline & pipeline) -{ - pipeline.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header); - }); -} - -} diff --git a/src/Processors/QueryPlan/MaterializingStep.h b/src/Processors/QueryPlan/MaterializingStep.h deleted file mode 100644 index 72b3133dfe4..00000000000 --- a/src/Processors/QueryPlan/MaterializingStep.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once -#include - -namespace DB -{ - -/// Materialize constants. See MaterializingTransform. -class MaterializingStep : public ITransformingStep -{ -public: - explicit MaterializingStep(const DataStream & input_stream_); - - String getName() const override { return "Materializing"; } - - void transformPipeline(QueryPipeline & pipeline) override; -}; - -} diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 71ddd07f6a2..a44272cf9c0 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -108,7 +108,6 @@ SRCS( QueryPlan/ITransformingStep.cpp QueryPlan/LimitByStep.cpp QueryPlan/LimitStep.cpp - QueryPlan/MaterializingStep.cpp QueryPlan/MergeSortingStep.cpp QueryPlan/MergingAggregatedStep.cpp QueryPlan/MergingFinal.cpp diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 38349ef8df9..1ee5ab3d0ca 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -15,7 +15,6 @@ #include #include -#include #include #include @@ -87,7 +86,10 @@ void StorageView::read( /// It's expected that the columns read from storage are not constant. /// Because method 'getSampleBlockForColumns' is used to obtain a structure of result in InterpreterSelectQuery. - auto materializing = std::make_unique(query_plan.getCurrentDataStream()); + auto materializing_actions = std::make_shared(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + materializing_actions->addMaterializingOutputActions(); + + auto materializing = std::make_unique(query_plan.getCurrentDataStream(), std::move(materializing_actions)); materializing->setStepDescription("Materialize constants after VIEW subquery"); query_plan.addStep(std::move(materializing)); From 0449546bca7319132a99693b6634ca8684aa41f3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 18 Feb 2021 16:13:09 +0300 Subject: [PATCH 083/149] Support TotalsHaving. Update test. --- .../Optimizations/filterPushDown.cpp | 38 ++++++++++++++++--- src/Processors/QueryPlan/TotalsHavingStep.h | 2 + .../01655_plan_optimizations.reference | 9 +++++ .../0_stateless/01655_plan_optimizations.sh | 12 ++++++ 4 files changed, 56 insertions(+), 5 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 456faeb72c2..4d01235e2fc 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -5,14 +5,17 @@ #include #include #include +#include +#include +#include +#include +#include +#include #include #include #include -#include "Processors/QueryPlan/FinishSortingStep.h" -#include "Processors/QueryPlan/MergeSortingStep.h" -#include "Processors/QueryPlan/MergingSortedStep.h" -#include "Processors/QueryPlan/PartialSortingStep.h" -#include +#include + #include namespace DB::ErrorCodes @@ -135,6 +138,31 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes return updated_steps; } + if (auto * totals_having = typeid_cast(child.get())) + { + /// If totals step has HAVING expression, skip it for now. + /// TODO: + /// We can merge HAING expression with current filer. + /// Alos, we can push down part of HAVING which depend only on aggregation keys. + if (totals_having->getActions()) + return 0; + + Names keys; + const auto & header = totals_having->getInputStreams().front().header; + for (const auto & column : header) + if (typeid_cast(column.type.get()) == nullptr) + keys.push_back(column.name); + + /// NOTE: this optimization changes TOTALS value. Example: + /// `select * from (select y, sum(x) from ( + /// select number as x, number % 4 as y from numbers(10) + /// ) group by y with totals) where y != 2` + /// Optimization will replace totals row `y, sum(x)` from `(0, 45)` to `(0, 37)`. + /// It is expected to ok, cause AST optimization `enable_optimize_predicate_expression = 1` also brakes it. + if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, keys)) + return updated_steps; + } + if (auto * array_join = typeid_cast(child.get())) { const auto & array_join_actions = array_join->arrayJoin(); diff --git a/src/Processors/QueryPlan/TotalsHavingStep.h b/src/Processors/QueryPlan/TotalsHavingStep.h index 7c1638013e5..57d5cf7aad5 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.h +++ b/src/Processors/QueryPlan/TotalsHavingStep.h @@ -28,6 +28,8 @@ public: void describeActions(FormatSettings & settings) const override; + const ActionsDAGPtr & getActions() const { return actions_dag; } + private: bool overflow_row; ActionsDAGPtr actions_dag; diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index 7bc75dc0bf6..fa83c098412 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -112,3 +112,12 @@ PartialSorting Filter column: and(notEquals(x, 0), notEquals(y, 0)) 1 2 1 1 +> filter is pushed down before TOTALS HAVING and aggregating +TotalsHaving +Aggregating +Filter column: notEquals(y, 2) +0 12 +1 15 +3 10 + +0 37 diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index f770643fc41..e47b03661e4 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -135,3 +135,15 @@ $CLICKHOUSE_CLIENT -q " select number % 2 as x, number % 3 as y from numbers(6) order by y desc ) where x != 0 and y != 0 settings enable_optimize_predicate_expression = 0" + +echo "> filter is pushed down before TOTALS HAVING and aggregating" +$CLICKHOUSE_CLIENT -q " + explain actions = 1 select * from ( + select y, sum(x) from (select number as x, number % 4 as y from numbers(10)) group by y with totals + ) where y != 2 + settings enable_optimize_predicate_expression=0" | + grep -o "TotalsHaving\|Aggregating\|Filter column: notEquals(y, 2)" +$CLICKHOUSE_CLIENT -q " + select * from ( + select y, sum(x) from (select number as x, number % 4 as y from numbers(10)) group by y with totals + ) where y != 2" \ No newline at end of file From f4b0b1110cb77c6901243cc1120615d9735a2da3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 18 Feb 2021 23:53:40 +0300 Subject: [PATCH 084/149] Fix test. --- tests/queries/0_stateless/01272_totals_and_filter_bug.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01272_totals_and_filter_bug.reference b/tests/queries/0_stateless/01272_totals_and_filter_bug.reference index 0db840561fd..5b407738cb8 100644 --- a/tests/queries/0_stateless/01272_totals_and_filter_bug.reference +++ b/tests/queries/0_stateless/01272_totals_and_filter_bug.reference @@ -1,6 +1,6 @@ 1 1 -0 2 +0 1 - test1 10 0 From 6e9bf682179229b4ae3d7f97ec3ab5c83229704b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 18 Feb 2021 23:54:42 +0300 Subject: [PATCH 085/149] Fix typo --- src/Processors/QueryPlan/Optimizations/filterPushDown.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 4d01235e2fc..1b84fee4857 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -143,7 +143,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes /// If totals step has HAVING expression, skip it for now. /// TODO: /// We can merge HAING expression with current filer. - /// Alos, we can push down part of HAVING which depend only on aggregation keys. + /// Also, we can push down part of HAVING which depend only on aggregation keys. if (totals_having->getActions()) return 0; From 21ee685ef0f1910d42e0e5a47b010b2eb4cc9a71 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 19 Feb 2021 19:38:46 +0000 Subject: [PATCH 086/149] Fix brotly --- src/IO/BrotliWriteBuffer.cpp | 2 +- ...7_http_compression_prefer_brotli.reference | 23 +++++++++++++++++++ .../01057_http_compression_prefer_brotli.sh | 2 ++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/IO/BrotliWriteBuffer.cpp b/src/IO/BrotliWriteBuffer.cpp index d14c94ca43d..e562cc70e61 100644 --- a/src/IO/BrotliWriteBuffer.cpp +++ b/src/IO/BrotliWriteBuffer.cpp @@ -86,7 +86,7 @@ void BrotliWriteBuffer::nextImpl() throw Exception("brotli compress failed", ErrorCodes::BROTLI_WRITE_FAILED); } } - while (in_available > 0 || out_capacity == 0); + while (in_available > 0); } void BrotliWriteBuffer::finish() diff --git a/tests/queries/0_stateless/01057_http_compression_prefer_brotli.reference b/tests/queries/0_stateless/01057_http_compression_prefer_brotli.reference index 5dd396a38c9..c28cbee8485 100644 --- a/tests/queries/0_stateless/01057_http_compression_prefer_brotli.reference +++ b/tests/queries/0_stateless/01057_http_compression_prefer_brotli.reference @@ -9,3 +9,26 @@ 999997 999998 999999 + }, + { + "datetime": "2020-12-12", + "pipeline": "test-pipeline", + "host": "clickhouse-test-host-001.clickhouse.com", + "home": "clickhouse", + "detail": "clickhouse", + "row_number": "999998" + }, + { + "datetime": "2020-12-12", + "pipeline": "test-pipeline", + "host": "clickhouse-test-host-001.clickhouse.com", + "home": "clickhouse", + "detail": "clickhouse", + "row_number": "999999" + } + ], + + "rows": 1000000, + + "rows_before_limit_at_least": 1048080, + diff --git a/tests/queries/0_stateless/01057_http_compression_prefer_brotli.sh b/tests/queries/0_stateless/01057_http_compression_prefer_brotli.sh index e5f4d12ee18..f93062d43a7 100755 --- a/tests/queries/0_stateless/01057_http_compression_prefer_brotli.sh +++ b/tests/queries/0_stateless/01057_http_compression_prefer_brotli.sh @@ -11,3 +11,5 @@ ${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: gzip,deflate,br' "${CLICKHOUSE_URL}& ${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: gzip,deflate' "${CLICKHOUSE_URL}&enable_http_compression=1" -d 'SELECT 1' | gzip -d ${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: gzip' "${CLICKHOUSE_URL}&enable_http_compression=1" -d 'SELECT number FROM numbers(1000000)' | gzip -d | tail -n3 ${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: br' "${CLICKHOUSE_URL}&enable_http_compression=1" -d 'SELECT number FROM numbers(1000000)' | brotli -d | tail -n3 + +${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: br' "${CLICKHOUSE_URL}&enable_http_compression=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) FORMAT JSON" | brotli -d | tail -n30 | head -n23 From e1868d1392d9834d84e4d9f1f0230429e7df2e3c Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 20 Feb 2021 05:13:31 +0000 Subject: [PATCH 087/149] Move test into separate file --- .../01057_http_compression_prefer_brotli.sh | 1 - ...tli_http_compression_json_format.reference | 23 +++++++++++++++++++ ...ong_brotli_http_compression_json_format.sh | 7 ++++++ 3 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.reference create mode 100755 tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh diff --git a/tests/queries/0_stateless/01057_http_compression_prefer_brotli.sh b/tests/queries/0_stateless/01057_http_compression_prefer_brotli.sh index f93062d43a7..22ab745d7c0 100755 --- a/tests/queries/0_stateless/01057_http_compression_prefer_brotli.sh +++ b/tests/queries/0_stateless/01057_http_compression_prefer_brotli.sh @@ -12,4 +12,3 @@ ${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: gzip,deflate' "${CLICKHOUSE_URL}& ${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: gzip' "${CLICKHOUSE_URL}&enable_http_compression=1" -d 'SELECT number FROM numbers(1000000)' | gzip -d | tail -n3 ${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: br' "${CLICKHOUSE_URL}&enable_http_compression=1" -d 'SELECT number FROM numbers(1000000)' | brotli -d | tail -n3 -${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: br' "${CLICKHOUSE_URL}&enable_http_compression=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) FORMAT JSON" | brotli -d | tail -n30 | head -n23 diff --git a/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.reference b/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.reference new file mode 100644 index 00000000000..7c089a2fd05 --- /dev/null +++ b/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.reference @@ -0,0 +1,23 @@ + }, + { + "datetime": "2020-12-12", + "pipeline": "test-pipeline", + "host": "clickhouse-test-host-001.clickhouse.com", + "home": "clickhouse", + "detail": "clickhouse", + "row_number": "999998" + }, + { + "datetime": "2020-12-12", + "pipeline": "test-pipeline", + "host": "clickhouse-test-host-001.clickhouse.com", + "home": "clickhouse", + "detail": "clickhouse", + "row_number": "999999" + } + ], + + "rows": 1000000, + + "rows_before_limit_at_least": 1048080, + diff --git a/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh b/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh new file mode 100755 index 00000000000..a187d778fdb --- /dev/null +++ b/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: br' "${CLICKHOUSE_URL}&enable_http_compression=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) FORMAT JSON" | brotli -d | tail -n30 | head -n23 From 0d88366b2775bdcb60ae3eb18bc9fcb2ce7eef01 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 20 Feb 2021 07:07:50 +0000 Subject: [PATCH 088/149] Add forgotten .reference file update --- ...7_http_compression_prefer_brotli.reference | 23 ------------------- 1 file changed, 23 deletions(-) diff --git a/tests/queries/0_stateless/01057_http_compression_prefer_brotli.reference b/tests/queries/0_stateless/01057_http_compression_prefer_brotli.reference index c28cbee8485..5dd396a38c9 100644 --- a/tests/queries/0_stateless/01057_http_compression_prefer_brotli.reference +++ b/tests/queries/0_stateless/01057_http_compression_prefer_brotli.reference @@ -9,26 +9,3 @@ 999997 999998 999999 - }, - { - "datetime": "2020-12-12", - "pipeline": "test-pipeline", - "host": "clickhouse-test-host-001.clickhouse.com", - "home": "clickhouse", - "detail": "clickhouse", - "row_number": "999998" - }, - { - "datetime": "2020-12-12", - "pipeline": "test-pipeline", - "host": "clickhouse-test-host-001.clickhouse.com", - "home": "clickhouse", - "detail": "clickhouse", - "row_number": "999999" - } - ], - - "rows": 1000000, - - "rows_before_limit_at_least": 1048080, - From f0396661b3cf74b98ea2b562d96edb18949e9df8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sat, 20 Feb 2021 19:13:36 +0300 Subject: [PATCH 089/149] Refactor ActionsDAG::splitActionsForFilter --- src/Interpreters/ActionsDAG.cpp | 411 ++++++++++++++++++-------------- src/Interpreters/ActionsDAG.h | 2 + 2 files changed, 228 insertions(+), 185 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 8b6013a4365..b3f86313a1c 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1212,112 +1212,120 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsForFilter(const std::string & co return split(split_nodes); } -ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, bool can_remove_filter, const Names & available_inputs) +namespace { - std::unordered_map> inputs_map; - for (const auto & input : inputs) - inputs_map[input->result_name].emplace_back(input); - std::unordered_set allowed_nodes; - for (const auto & name : available_inputs) +struct ConjinctionNodes +{ + std::unordered_set allowed; + std::unordered_set rejected; +}; + +/// Take a node which result is predicate. +/// Assuming predicate is a conjunction (probably, trivial). +/// Find separate conjunctions nodes. Split nodes into allowed and rejected sets. +/// Allowed predicate is a predicate which can be calculated using only nodes from allowed_nodes set. +ConjinctionNodes getConjinctionNodes(ActionsDAG::Node * predicate, std::unordered_set allowed_nodes) +{ + ConjinctionNodes conjunction; + + struct Frame { - auto & inputs_list = inputs_map[name]; - if (inputs_list.empty()) - continue; + ActionsDAG::Node * node; + bool is_predicate = false; + size_t next_child_to_visit = 0; + size_t num_allowed_children = 0; + }; - allowed_nodes.emplace(inputs_list.front()); - inputs_list.pop_front(); - } - - auto it = index.begin(); - for (; it != index.end(); ++it) - if ((*it)->result_name == filter_name) - break; - - if (it == index.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Index for ActionsDAG does not contain filter column name {}. DAG:\n{}", - filter_name, dumpDAG()); - - std::unordered_set selected_predicates; - std::unordered_set other_predicates; + std::stack stack; + std::unordered_set visited_nodes; + stack.push(Frame{.node = predicate, .is_predicate = true}); + visited_nodes.insert(predicate); + while (!stack.empty()) { - struct Frame + auto & cur = stack.top(); + bool is_conjunction = cur.is_predicate + && cur.node->type == ActionsDAG::ActionType::FUNCTION + && cur.node->function_base->getName() == "and"; + + /// At first, visit all children. + while (cur.next_child_to_visit < cur.node->children.size()) { - Node * node; - bool is_predicate = false; - size_t next_child_to_visit = 0; - size_t num_allowed_children = 0; - }; + auto * child = cur.node->children[cur.next_child_to_visit]; - std::stack stack; - std::unordered_set visited_nodes; - - stack.push(Frame{.node = *it, .is_predicate = true}); - visited_nodes.insert(*it); - while (!stack.empty()) - { - auto & cur = stack.top(); - bool is_conjunction = cur.is_predicate - && cur.node->type == ActionType::FUNCTION - && cur.node->function_base->getName() == "and"; - - /// At first, visit all children. - while (cur.next_child_to_visit < cur.node->children.size()) + if (visited_nodes.count(child) == 0) { - auto * child = cur.node->children[cur.next_child_to_visit]; - - if (visited_nodes.count(child) == 0) - { - visited_nodes.insert(child); - stack.push({.node = child, .is_predicate = is_conjunction}); - break; - } - - if (allowed_nodes.contains(child)) - ++cur.num_allowed_children; - ++cur.next_child_to_visit; + visited_nodes.insert(child); + stack.push({.node = child, .is_predicate = is_conjunction}); + break; } - if (cur.next_child_to_visit == cur.node->children.size()) - { - if (cur.num_allowed_children == cur.node->children.size()) - { - if (cur.node->type != ActionType::ARRAY_JOIN && cur.node->type != ActionType::INPUT) - allowed_nodes.emplace(cur.node); - } - else if (is_conjunction) - { - for (auto * child : cur.node->children) - if (allowed_nodes.count(child)) - selected_predicates.insert(child); - } - else if (cur.is_predicate) - { - other_predicates.insert(cur.node); - } + if (allowed_nodes.contains(child)) + ++cur.num_allowed_children; + ++cur.next_child_to_visit; + } - stack.pop(); + if (cur.next_child_to_visit == cur.node->children.size()) + { + if (cur.num_allowed_children == cur.node->children.size()) + { + if (cur.node->type != ActionsDAG::ActionType::ARRAY_JOIN && cur.node->type != ActionsDAG::ActionType::INPUT) + allowed_nodes.emplace(cur.node); } + else if (is_conjunction) + { + for (auto * child : cur.node->children) + if (allowed_nodes.count(child)) + conjunction.allowed.insert(child); + } + else if (cur.is_predicate) + { + conjunction.rejected.insert(cur.node); + } + + stack.pop(); } } - if (selected_predicates.empty()) + if (conjunction.allowed.empty()) { - if (allowed_nodes.count(*it)) - selected_predicates.insert(*it); - else - return nullptr; + if (allowed_nodes.count(predicate)) + conjunction.allowed.insert(predicate); } - // std::cerr << "************* Selectecd predicates\n"; - // for (const auto * p : selected_predicates) - // std::cerr << p->result_name << std::endl; + return conjunction; +} - // std::cerr << "............. Other predicates\n"; - // for (const auto * p : other_predicates) - // std::cerr << p->result_name << std::endl; +ColumnsWithTypeAndName prepareFunctionArguments(const std::vector nodes) +{ + ColumnsWithTypeAndName arguments; + arguments.reserve(nodes.size()); + + for (const auto * child : nodes) + { + ColumnWithTypeAndName argument; + argument.column = child->column; + argument.type = child->result_type; + argument.name = child->result_name; + + arguments.emplace_back(std::move(argument)); + } + + return arguments; +} + +} + +/// Create actions which calculate conjunction of selected nodes. +/// Assume conjunction nodes are predicates (and may be used as arguments of function AND). +/// +/// Result actions add single column with conjunction result (it is always last in index). +/// No other columns are added or removed. +ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::unordered_set conjunction) +{ + if (conjunction.empty()) + return nullptr; auto actions = cloneEmpty(); actions->settings.project_input = false; @@ -1327,82 +1335,128 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, std::make_unique( std::make_shared())); - std::unordered_map nodes_mapping; + std::unordered_map nodes_mapping; + struct Frame { - struct Frame + const ActionsDAG::Node * node; + size_t next_child_to_visit = 0; + }; + + std::stack stack; + + /// DFS. Clone actions. + for (const auto * predicate : conjunction) + { + if (nodes_mapping.count(predicate)) + continue; + + stack.push({.node = predicate}); + while (!stack.empty()) { - const Node * node; - size_t next_child_to_visit = 0; - }; - - std::stack stack; - - for (const auto * predicate : selected_predicates) - { - if (nodes_mapping.count(predicate)) - continue; - - stack.push({.node = predicate}); - while (!stack.empty()) + auto & cur = stack.top(); + /// At first, visit all children. + while (cur.next_child_to_visit < cur.node->children.size()) { - auto & cur = stack.top(); - /// At first, visit all children. - while (cur.next_child_to_visit < cur.node->children.size()) + auto * child = cur.node->children[cur.next_child_to_visit]; + + if (nodes_mapping.count(child) == 0) { - auto * child = cur.node->children[cur.next_child_to_visit]; - - if (nodes_mapping.count(child) == 0) - { - stack.push({.node = child}); - break; - } - - ++cur.next_child_to_visit; + stack.push({.node = child}); + break; } - if (cur.next_child_to_visit == cur.node->children.size()) + ++cur.next_child_to_visit; + } + + if (cur.next_child_to_visit == cur.node->children.size()) + { + auto & node = actions->nodes.emplace_back(*cur.node); + nodes_mapping[cur.node] = &node; + + for (auto & child : node.children) + child = nodes_mapping[child]; + + if (node.type == ActionType::INPUT) { - auto & node = actions->nodes.emplace_back(*cur.node); - nodes_mapping[cur.node] = &node; - - for (auto & child : node.children) - child = nodes_mapping[child]; - - if (node.type == ActionType::INPUT) - { - actions->inputs.emplace_back(&node); - actions->index.insert(&node); - } - - stack.pop(); + actions->inputs.emplace_back(&node); + actions->index.insert(&node); } + + stack.pop(); } } - - Node * result_predicate = nodes_mapping[*selected_predicates.begin()]; - - if (selected_predicates.size() > 1) - { - std::vector args; - args.reserve(selected_predicates.size()); - for (const auto * predicate : selected_predicates) - args.emplace_back(nodes_mapping[predicate]); - - result_predicate = &actions->addFunction(func_builder_and, args, {}, true, false); - } - - actions->index.insert(result_predicate); } - if (selected_predicates.count(*it)) + Node * result_predicate = nodes_mapping[*conjunction.begin()]; + + if (conjunction.size() > 1) + { + std::vector args; + args.reserve(conjunction.size()); + for (const auto * predicate : conjunction) + args.emplace_back(nodes_mapping[predicate]); + + result_predicate = &actions->addFunction(func_builder_and, args, {}, true, false); + } + + actions->index.insert(result_predicate); + return actions; +} + +ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, bool can_remove_filter, const Names & available_inputs) +{ + Node * predicate; + + { + auto it = index.begin(); + for (; it != index.end(); ++it) + if ((*it)->result_name == filter_name) + break; + + if (it == index.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Index for ActionsDAG does not contain filter column name {}. DAG:\n{}", + filter_name, dumpDAG()); + + predicate = *it; + } + + std::unordered_set allowed_nodes; + + /// Get input nodes from available_inputs names. + { + std::unordered_map> inputs_map; + for (const auto & input : inputs) + inputs_map[input->result_name].emplace_back(input); + + for (const auto & name : available_inputs) + { + auto & inputs_list = inputs_map[name]; + if (inputs_list.empty()) + continue; + + allowed_nodes.emplace(inputs_list.front()); + inputs_list.pop_front(); + } + } + + auto conjunction = getConjinctionNodes(predicate, allowed_nodes); + auto actions = cloneActionsForConjunction(conjunction.allowed); + if (!actions) + return nullptr; + + /// Now, when actions are created, update current DAG. + + if (conjunction.allowed.count(predicate)) { /// The whole predicate was split. if (can_remove_filter) { + /// If filter column is not needed, remove it from index. for (auto i = index.begin(); i != index.end(); ++i) { - if (*i == *it) + if (*i == predicate) { index.remove(i); break; @@ -1411,84 +1465,71 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, } else { + /// Replace predicate result to constant 1. Node node; node.type = ActionType::COLUMN; - node.result_name = std::move((*it)->result_name); - node.result_type = std::move((*it)->result_type); + node.result_name = std::move(predicate->result_name); + node.result_type = std::move(predicate->result_type); node.column = node.result_type->createColumnConst(0, 1); - *(*it) = std::move(node); + *predicate = std::move(node); } removeUnusedActions(false); } - else if ((*it)->type == ActionType::FUNCTION && (*it)->function_base->getName() == "and") + else { - std::vector new_children(other_predicates.begin(), other_predicates.end()); + /// Predicate is conjunction, where both allowed and rejected sets are not empty. + /// Replace this node to conjunction of rejected predicates. + + std::vector new_children(conjunction.rejected.begin(), conjunction.rejected.end()); if (new_children.size() == 1) { - if (new_children.front()->result_type->equals(*((*it)->result_type))) + /// Rejected set has only one predicate. + if (new_children.front()->result_type->equals(*predicate->result_type)) { + /// If it's type is same, just add alias. Node node; node.type = ActionType::ALIAS; - node.result_name = (*it)->result_name; - node.result_type = (*it)->result_type; + node.result_name = predicate->result_name; + node.result_type = predicate->result_type; node.children.swap(new_children); - *(*it) = std::move(node); + *predicate = std::move(node); } else { + /// If type is different, cast column. + /// This case is possible, cause AND can use any numeric type as argument. Node node; node.type = ActionType::COLUMN; - node.result_name = (*it)->result_type->getName(); + node.result_name = predicate->result_type->getName(); node.column = DataTypeString().createColumnConst(0, node.result_name); node.result_type = std::make_shared(); auto * right_arg = &nodes.emplace_back(std::move(node)); auto * left_arg = new_children.front(); - - (*it)->children = {left_arg, right_arg}; - ColumnsWithTypeAndName arguments; - arguments.reserve((*it)->children.size()); - - for (const auto * child : (*it)->children) - { - ColumnWithTypeAndName argument; - argument.column = child->column; - argument.type = child->result_type; - argument.name = child->result_name; - - arguments.emplace_back(std::move(argument)); - } + predicate->children = {left_arg, right_arg}; + auto arguments = prepareFunctionArguments(predicate->children); FunctionOverloadResolverPtr func_builder_cast = std::make_shared( CastOverloadResolver::createImpl(false)); - (*it)->function_builder = func_builder_cast; - (*it)->function_base = (*it)->function_builder->build(arguments); - (*it)->function = (*it)->function_base->prepare(arguments); + predicate->function_builder = func_builder_cast; + predicate->function_base = predicate->function_builder->build(arguments); + predicate->function = predicate->function_base->prepare(arguments); } } else { - (*it)->children.swap(new_children); - ColumnsWithTypeAndName arguments; - arguments.reserve((*it)->children.size()); + /// Predicate is function AND, which still have more then one argument. + /// Just update children and rebuild it. + predicate->children.swap(new_children); + auto arguments = prepareFunctionArguments(predicate->children); - for (const auto * child : (*it)->children) - { - ColumnWithTypeAndName argument; - argument.column = child->column; - argument.type = child->result_type; - argument.name = child->result_name; - - arguments.emplace_back(std::move(argument)); - } - - (*it)->function_base = (*it)->function_builder->build(arguments); - (*it)->function = (*it)->function_base->prepare(arguments); + predicate->function_base = predicate->function_builder->build(arguments); + predicate->function = predicate->function_base->prepare(arguments); } removeUnusedActions(false); diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index bd1dcd347df..87cf03f6edd 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -313,6 +313,8 @@ private: void addAliases(const NamesWithAliases & aliases, std::vector & result_nodes); void compileFunctions(); + + ActionsDAGPtr cloneActionsForConjunction(std::unordered_set conjunction); }; From 2ae0b47edbf1b01d45461e64c1c8df59ed2a7361 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sat, 20 Feb 2021 19:25:47 +0300 Subject: [PATCH 090/149] Refactor tryPushDownFilter optimization. --- .../Optimizations/filterPushDown.cpp | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 1b84fee4857..01e38e81092 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -56,19 +56,30 @@ static size_t tryAddNewFilterStep( if ((*it)->result_name == filter_column_name) break; + const bool found_filter_column = it != expression->getIndex().end(); + + if (!found_filter_column && removes_filter) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}", + filter_column_name, expression->dumpDAG()); + + const bool filter_is_constant = found_filter_column && (*it)->column && isColumnConst(*(*it)->column); + + if (!found_filter_column || filter_is_constant) + /// This means that all predicates of filter were pused down. + /// Replace current actions to expression, as we don't need to filter anything. + parent = std::make_unique(child->getOutputStream(), expression); + if (it == expression->getIndex().end()) { - if (!removes_filter) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}", - filter_column_name, expression->dumpDAG()); + /// Filter was removed after split. + + - // std::cerr << "replacing to expr because filter " << filter_column_name << " was removed\n"; - parent = std::make_unique(child->getOutputStream(), expression); } else if ((*it)->column && isColumnConst(*(*it)->column)) { - // std::cerr << "replacing to expr because filter is const\n"; + /// Filter column was replaced to constant. parent = std::make_unique(child->getOutputStream(), expression); } From 00e0dbc3e5d39bb8bd0ff79b5001d69866c3a9cf Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sat, 20 Feb 2021 20:42:06 +0300 Subject: [PATCH 091/149] Fix test. --- src/Interpreters/ActionsDAG.cpp | 23 +++++++++----- src/Interpreters/ActionsDAG.h | 2 +- .../Optimizations/filterPushDown.cpp | 30 ++----------------- .../01655_plan_optimizations.reference | 4 +-- .../0_stateless/01655_plan_optimizations.sh | 4 +-- 5 files changed, 23 insertions(+), 40 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index b3f86313a1c..1406eecc5c0 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1217,8 +1217,8 @@ namespace struct ConjinctionNodes { - std::unordered_set allowed; - std::unordered_set rejected; + std::vector allowed; + std::vector rejected; }; /// Take a node which result is predicate. @@ -1228,6 +1228,8 @@ struct ConjinctionNodes ConjinctionNodes getConjinctionNodes(ActionsDAG::Node * predicate, std::unordered_set allowed_nodes) { ConjinctionNodes conjunction; + std::unordered_set allowed; + std::unordered_set rejected; struct Frame { @@ -1276,12 +1278,19 @@ ConjinctionNodes getConjinctionNodes(ActionsDAG::Node * predicate, std::unordere else if (is_conjunction) { for (auto * child : cur.node->children) + { if (allowed_nodes.count(child)) - conjunction.allowed.insert(child); + { + if (allowed.insert(child).second) + conjunction.allowed.push_back(child); + + } + } } else if (cur.is_predicate) { - conjunction.rejected.insert(cur.node); + if (rejected.insert(cur.node).second) + conjunction.rejected.push_back(cur.node); } stack.pop(); @@ -1291,7 +1300,7 @@ ConjinctionNodes getConjinctionNodes(ActionsDAG::Node * predicate, std::unordere if (conjunction.allowed.empty()) { if (allowed_nodes.count(predicate)) - conjunction.allowed.insert(predicate); + conjunction.allowed.push_back(predicate); } return conjunction; @@ -1322,7 +1331,7 @@ ColumnsWithTypeAndName prepareFunctionArguments(const std::vector conjunction) +ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::vector conjunction) { if (conjunction.empty()) return nullptr; @@ -1448,7 +1457,7 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, /// Now, when actions are created, update current DAG. - if (conjunction.allowed.count(predicate)) + if (conjunction.rejected.empty()) { /// The whole predicate was split. if (can_remove_filter) diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 87cf03f6edd..2e3baa181fd 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -314,7 +314,7 @@ private: void compileFunctions(); - ActionsDAGPtr cloneActionsForConjunction(std::unordered_set conjunction); + ActionsDAGPtr cloneActionsForConjunction(std::vector conjunction); }; diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 01e38e81092..d64f082b7ee 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -58,11 +58,12 @@ static size_t tryAddNewFilterStep( const bool found_filter_column = it != expression->getIndex().end(); - if (!found_filter_column && removes_filter) + if (!found_filter_column && !removes_filter) throw Exception(ErrorCodes::LOGICAL_ERROR, "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}", filter_column_name, expression->dumpDAG()); + /// Filter column was replaced to constant. const bool filter_is_constant = found_filter_column && (*it)->column && isColumnConst(*(*it)->column); if (!found_filter_column || filter_is_constant) @@ -70,19 +71,6 @@ static size_t tryAddNewFilterStep( /// Replace current actions to expression, as we don't need to filter anything. parent = std::make_unique(child->getOutputStream(), expression); - if (it == expression->getIndex().end()) - { - /// Filter was removed after split. - - - - } - else if ((*it)->column && isColumnConst(*(*it)->column)) - { - /// Filter column was replaced to constant. - parent = std::make_unique(child->getOutputStream(), expression); - } - /// Add new Filter step before Aggregating. /// Expression/Filter -> Aggregating -> Something auto & node = nodes.emplace_back(); @@ -109,20 +97,6 @@ static Names getAggregatinKeys(const Aggregator::Params & params) return keys; } -// static NameSet getColumnNamesFromSortDescription(const SortDescription & sort_desc, const Block & header) -// { -// NameSet names; -// for (const auto & column : sort_desc) -// { -// if (!column.column_name.empty()) -// names.insert(column.column_name); -// else -// names.insert(header.safeGetByPosition(column.column_number).name); -// } - -// return names; -// } - size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) { if (parent_node->children.size() != 1) diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index fa83c098412..f261e134494 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -68,7 +68,7 @@ Filter column: notEquals(y, 0) 9 10 > one condition of filter should be pushed down after aggregating, other two conditions are ANDed Filter column -FUNCTION and(minus(s, 4) :: 2, minus(s, 8) :: 1) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4)) +FUNCTION and(minus(s, 8) :: 1, minus(s, 4) :: 2) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4)) Aggregating Filter column: notEquals(y, 0) 0 1 @@ -83,7 +83,7 @@ Filter column: notEquals(y, 0) Filter column ALIAS notEquals(s, 8) :: 1 -> and(notEquals(y, 0), notEquals(s, 8), minus(y, 4)) Aggregating -Filter column: and(minus(y, 4), notEquals(y, 0)) +Filter column: and(notEquals(y, 0), minus(y, 4)) 0 1 1 2 2 3 diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index e47b03661e4..84452fe651f 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -66,7 +66,7 @@ $CLICKHOUSE_CLIENT -q " select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s - 8 and s - 4 settings enable_optimize_predicate_expression=0" | - grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION and(minus(s, 4) :: 2, minus(s, 8) :: 1) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4))" + grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION and(minus(s, 8) :: 1, minus(s, 4) :: 2) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4))" $CLICKHOUSE_CLIENT -q " select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y @@ -79,7 +79,7 @@ $CLICKHOUSE_CLIENT -q " select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s != 8 and y - 4 settings enable_optimize_predicate_expression=0" | - grep -o "Aggregating\|Filter column\|Filter column: and(minus(y, 4), notEquals(y, 0))\|ALIAS notEquals(s, 8) :: 1 -> and(notEquals(y, 0), notEquals(s, 8), minus(y, 4))" + grep -o "Aggregating\|Filter column\|Filter column: and(notEquals(y, 0), minus(y, 4))\|ALIAS notEquals(s, 8) :: 1 -> and(notEquals(y, 0), notEquals(s, 8), minus(y, 4))" $CLICKHOUSE_CLIENT -q " select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y From cfb0dbb9e5f069342fe096542d888cbb2a01bc02 Mon Sep 17 00:00:00 2001 From: George Date: Sun, 21 Feb 2021 15:01:10 +0300 Subject: [PATCH 092/149] updated and added links --- .../functions/tuple-map-functions.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index f8755f1e2a9..ac8d92750fd 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -114,7 +114,7 @@ Result: ## mapContains {#mapcontains} -Determines whether `map.keys` contains the `key` parameter. +Determines whether the `map` contains the `key` parameter. **Syntax** @@ -124,12 +124,12 @@ mapContains(map, key) **Parameters** -- `map` — Map. -- `key` — Key. Type matches the type of `map.keys`. +- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `key` — Key. Type matches the type of `key` parameter. **Returned value** -- `1` if `map.keys` contains `key`, `0` if not. +- `1` if `map` contains `key`, `0` if not. Type: [UInt8](../../sql-reference/data-types/int-uint.md). @@ -157,7 +157,7 @@ Result: ## mapKeys {#mapkeys} -Returns all the keys from `map` parameter. +Returns all keys from the `map` parameter. **Syntax** @@ -167,11 +167,11 @@ mapKeys(map) **Parameters** -- `map` — Map. +- `map` — Map. [Map](../../sql-reference/data-types/map.md). **Returned value** -- Array containing all the keys from `map`. +- Array containing all keys from the `map`. Type: [Array](../../sql-reference/data-types/array.md). @@ -198,7 +198,7 @@ Result: ## mapValues {#mapvalues} -Returns all the values from `map` parameter. +Returns all values from the `map` parameter. **Syntax** @@ -208,7 +208,7 @@ mapKeys(map) **Parameters** -- `map` — Map. +- `map` — Map. [Map](../../sql-reference/data-types/map.md). **Returned value** From bff08e32bfb00e1e73be8ab23c8f1263dc8ab455 Mon Sep 17 00:00:00 2001 From: George Date: Sun, 21 Feb 2021 15:22:48 +0300 Subject: [PATCH 093/149] updates --- .../sql-reference/functions/tuple-map-functions.md | 2 +- .../sql-reference/functions/tuple-map-functions.md | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 8bc36d19ed7..1d4839cbbf9 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -187,7 +187,7 @@ mapContains(map, key) **Parameters** - `map` — Map. [Map](../../sql-reference/data-types/map.md). -- `key` — Key. Type matches the type of `key` parameter. +- `key` — Key. Type matches the type of keys of `map` parameter. **Returned value** diff --git a/docs/ru/sql-reference/functions/tuple-map-functions.md b/docs/ru/sql-reference/functions/tuple-map-functions.md index 7c69f2ca4ef..9516deb243a 100644 --- a/docs/ru/sql-reference/functions/tuple-map-functions.md +++ b/docs/ru/sql-reference/functions/tuple-map-functions.md @@ -178,7 +178,7 @@ select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type ## mapContains {#mapcontains} -Определяет, включает ли в себя `map.keys` параметр `key`. +Определяет, включает ли в себя `map` параметр `key`. **Синтаксис** @@ -188,12 +188,12 @@ mapContains(map, key) **Параметры** -- `map` — контейнер map. -- `key` — ключ. Тип соответстует типу `map.keys`. +- `map` — контейнер Map. [Map](../../sql-reference/data-types/map.md). +- `key` — ключ. Тип соответстует типу ключей параметра `map`. **Возвращаемое значение** -- `1` если `map.keys` включает `key`, иначе `0`. +- `1` если `map` включает `key`, иначе `0`. Тип: [UInt8](../../sql-reference/data-types/int-uint.md). @@ -231,7 +231,7 @@ mapKeys(map) **Параметры** -- `map` — контейнер map. +- `map` — контейнер Map. [Map](../../sql-reference/data-types/map.md). **Возвращаемое значение** @@ -272,7 +272,7 @@ mapKeys(map) **Параметры** -- `map` — контейнер map. +- `map` — контейнер Map. [Map](../../sql-reference/data-types/map.md). **Возвращаемое значение** From f2b8f81e40eeadf3b85a1dae873828f4da40c8e7 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Mon, 22 Feb 2021 21:05:25 -0400 Subject: [PATCH 094/149] test for window functions --- .../01592_window_functions.reference | 81 +++++++++++++ .../0_stateless/01592_window_functions.sql | 107 ++++++++++++++++++ .../01592_window_functions1.reference | 4 + .../0_stateless/01592_window_functions1.sql | 35 ++++++ 4 files changed, 227 insertions(+) create mode 100644 tests/queries/0_stateless/01592_window_functions.reference create mode 100644 tests/queries/0_stateless/01592_window_functions.sql create mode 100644 tests/queries/0_stateless/01592_window_functions1.reference create mode 100644 tests/queries/0_stateless/01592_window_functions1.sql diff --git a/tests/queries/0_stateless/01592_window_functions.reference b/tests/queries/0_stateless/01592_window_functions.reference new file mode 100644 index 00000000000..8ca5086821b --- /dev/null +++ b/tests/queries/0_stateless/01592_window_functions.reference @@ -0,0 +1,81 @@ +---- Q1 ---- +Dell Vostro 800.00 Laptop 850 +HP Elite 1200.00 Laptop 850 +Lenovo Thinkpad 700.00 Laptop 850 +Sony VAIO 700.00 Laptop 850 +HTC One 400.00 Smartphone 500 +Microsoft Lumia 200.00 Smartphone 500 +Nexus 500.00 Smartphone 500 +iPhone 900.00 Smartphone 500 +Kindle Fire 150.00 Tablet 350 +Samsung Galaxy Tab 200.00 Tablet 350 +iPad 700.00 Tablet 350 +---- Q2 ---- +Lenovo Thinkpad Laptop 700.00 1 +Sony VAIO Laptop 700.00 1 +Dell Vostro Laptop 800.00 3 +HP Elite Laptop 1200.00 4 +Microsoft Lumia Smartphone 200.00 1 +HTC One Smartphone 400.00 2 +Nexus Smartphone 500.00 3 +iPhone Smartphone 900.00 4 +Kindle Fire Tablet 150.00 1 +Samsung Galaxy Tab Tablet 200.00 2 +iPad Tablet 700.00 3 +---- Q3 ---- +HP Elite Laptop 1200.00 1 +Dell Vostro Laptop 800.00 2 +Lenovo Thinkpad Laptop 700.00 3 +Sony VAIO Laptop 700.00 4 +iPhone Smartphone 900.00 1 +Nexus Smartphone 500.00 2 +HTC One Smartphone 400.00 3 +Microsoft Lumia Smartphone 200.00 4 +iPad Tablet 700.00 1 +Samsung Galaxy Tab Tablet 200.00 2 +Kindle Fire Tablet 150.00 3 +---- Q4 ---- +Lenovo Thinkpad Laptop 700.00 700.00 1 +Sony VAIO Laptop 700.00 700.00 1 +Dell Vostro Laptop 800.00 700.00 2 +HP Elite Laptop 1200.00 700.00 3 +Microsoft Lumia Smartphone 200.00 200.00 1 +HTC One Smartphone 400.00 200.00 2 +Nexus Smartphone 500.00 200.00 3 +iPhone Smartphone 900.00 200.00 4 +---- Q5 ---- +Sony VAIO Laptop 700.00 700.00 +Lenovo Thinkpad Laptop 700.00 700.00 +HP Elite Laptop 1200.00 700.00 +Dell Vostro Laptop 800.00 700.00 +iPhone Smartphone 900.00 900.00 +Nexus Smartphone 500.00 900.00 +Microsoft Lumia Smartphone 200.00 900.00 +HTC One Smartphone 400.00 900.00 +iPad Tablet 700.00 700.00 +Samsung Galaxy Tab Tablet 200.00 700.00 +Kindle Fire Tablet 150.00 700.00 +---- Q6 ---- +Dell Vostro Laptop 800.00 1200.00 +HP Elite Laptop 1200.00 1200.00 +Lenovo Thinkpad Laptop 700.00 1200.00 +Sony VAIO Laptop 700.00 1200.00 +HTC One Smartphone 400.00 900.00 +Microsoft Lumia Smartphone 200.00 900.00 +Nexus Smartphone 500.00 900.00 +iPhone Smartphone 900.00 900.00 +Kindle Fire Tablet 150.00 700.00 +Samsung Galaxy Tab Tablet 200.00 700.00 +iPad Tablet 700.00 700.00 +---- Q7 ---- +Dell Vostro 800.00 Laptop 733 850 +HP Elite 1200.00 Laptop 850 850 +Lenovo Thinkpad 700.00 Laptop 700 850 +Sony VAIO 700.00 Laptop 700 850 +HTC One 400.00 Smartphone 300 500 +Microsoft Lumia 200.00 Smartphone 200 500 +Nexus 500.00 Smartphone 367 500 +iPhone 900.00 Smartphone 500 500 +Kindle Fire 150.00 Tablet 150 350 +Samsung Galaxy Tab 200.00 Tablet 175 350 +iPad 700.00 Tablet 350 350 diff --git a/tests/queries/0_stateless/01592_window_functions.sql b/tests/queries/0_stateless/01592_window_functions.sql new file mode 100644 index 00000000000..8d5033fc821 --- /dev/null +++ b/tests/queries/0_stateless/01592_window_functions.sql @@ -0,0 +1,107 @@ +set allow_experimental_window_functions = 1; + +drop table if exists product_groups; +drop table if exists products; + +CREATE TABLE product_groups ( + group_id Int64, + group_name String +) Engine = Memory; + + +CREATE TABLE products ( + product_id Int64, + product_name String, + price DECIMAL(11, 2), + group_id Int64 +) Engine = Memory; + +INSERT INTO product_groups VALUES (1, 'Smartphone'),(2, 'Laptop'),(3, 'Tablet'); + +INSERT INTO products (product_id,product_name, group_id,price) VALUES (1, 'Microsoft Lumia', 1, 200), (2, 'HTC One', 1, 400), (3, 'Nexus', 1, 500), (4, 'iPhone', 1, 900),(5, 'HP Elite', 2, 1200),(6, 'Lenovo Thinkpad', 2, 700),(7, 'Sony VAIO', 2, 700),(8, 'Dell Vostro', 2, 800),(9, 'iPad', 3, 700),(10, 'Kindle Fire', 3, 150),(11, 'Samsung Galaxy Tab', 3, 200); + +select '---- Q1 ----'; + +SELECT + product_name, + price, + group_name, + AVG(price) OVER (PARTITION BY group_name) +FROM products INNER JOIN product_groups USING (group_id) +order by group_name, product_name, price; + +select '---- Q2 ----'; + +SELECT + product_name, + group_name, + price, + rank() OVER (PARTITION BY group_name ORDER BY price) rank +FROM products INNER JOIN product_groups USING (group_id) +order by group_name, rank, price; + +select '---- Q3 ----'; +SELECT + product_name, + group_name, + price, + row_number() OVER (PARTITION BY group_name ORDER BY price desc) rn +FROM products INNER JOIN product_groups USING (group_id) +ORDER BY group_name, rn; + +select '---- Q4 ----'; +SELECT * +FROM +( + SELECT + product_name, + group_name, + price, + min(price) OVER (PARTITION BY group_name) AS min_price, + dense_rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS r + FROM products + INNER JOIN product_groups USING (group_id) +) AS t +WHERE min_price > 160 +ORDER BY + group_name ASC, + r ASC, + product_name ASC; + +select '---- Q5 ----'; +SELECT + product_name, + group_name, + price, + FIRST_VALUE (price) OVER (PARTITION BY group_name ORDER BY product_name desc) AS price_per_group_per_alphab +FROM products INNER JOIN product_groups USING (group_id) +order by group_name, product_name desc; + +select '---- Q6 ----'; +SELECT + product_name, + group_name, + price, + LAST_VALUE (price) OVER (PARTITION BY group_name ORDER BY + price RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) AS highest_price_per_group +FROM + products +INNER JOIN product_groups USING (group_id) +order by group_name, product_name; + +select '---- Q7 ----'; +select product_name, price, group_name, round(avg0), round(avg1) +from ( +SELECT + product_name, + price, + group_name, + avg(price) OVER (PARTITION BY group_name ORDER BY price) avg0, + avg(price) OVER (PARTITION BY group_name ORDER BY + price RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) avg1 +FROM products INNER JOIN product_groups USING (group_id)) t +order by group_name, product_name, price; + +drop table product_groups; +drop table products; diff --git a/tests/queries/0_stateless/01592_window_functions1.reference b/tests/queries/0_stateless/01592_window_functions1.reference new file mode 100644 index 00000000000..5160cca9c3e --- /dev/null +++ b/tests/queries/0_stateless/01592_window_functions1.reference @@ -0,0 +1,4 @@ +---- arrays ---- +6360452672161319041 +---- window f ---- +6360452672161319041 diff --git a/tests/queries/0_stateless/01592_window_functions1.sql b/tests/queries/0_stateless/01592_window_functions1.sql new file mode 100644 index 00000000000..c7751ab9f33 --- /dev/null +++ b/tests/queries/0_stateless/01592_window_functions1.sql @@ -0,0 +1,35 @@ +drop table if exists stack; + +set allow_experimental_window_functions = 1; + +create table stack(item_id Int64, brand_id Int64, rack_id Int64, dt DateTime, expiration_dt DateTime, quantity UInt64) +Engine = MergeTree +partition by toYYYYMM(dt) +order by (brand_id, toStartOfHour(dt)); + +insert into stack +select number%99991, number%11, number%1111, toDateTime('2020-01-01 00:00:00')+number/100, + toDateTime('2020-02-01 00:00:00')+number/10, intDiv(number,100)+1 +from numbers(10000000); + +select '---- arrays ----'; + +select cityHash64( toString( groupArray (tuple(*) ) )) from ( + select brand_id, rack_id, arrayJoin(arraySlice(arraySort(groupArray(quantity)),1,2)) quantity + from stack + group by brand_id, rack_id + order by brand_id, rack_id, quantity +) t; + + +select '---- window f ----'; + +select cityHash64( toString( groupArray (tuple(*) ) )) from ( + select brand_id, rack_id, quantity from + ( select brand_id, rack_id, quantity, row_number() over (partition by brand_id, rack_id order by quantity) rn + from stack ) as t0 + where rn <= 2 + order by brand_id, rack_id, quantity +) t; + +drop table if exists stack; From 2ebae14f12af45a89ace4dc6ace681669935698f Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Mon, 22 Feb 2021 23:40:38 -0400 Subject: [PATCH 095/149] mark test as long --- ...unctions1.reference => 01592_long_window_functions1.reference} | 0 ...592_window_functions1.sql => 01592_long_window_functions1.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{01592_window_functions1.reference => 01592_long_window_functions1.reference} (100%) rename tests/queries/0_stateless/{01592_window_functions1.sql => 01592_long_window_functions1.sql} (100%) diff --git a/tests/queries/0_stateless/01592_window_functions1.reference b/tests/queries/0_stateless/01592_long_window_functions1.reference similarity index 100% rename from tests/queries/0_stateless/01592_window_functions1.reference rename to tests/queries/0_stateless/01592_long_window_functions1.reference diff --git a/tests/queries/0_stateless/01592_window_functions1.sql b/tests/queries/0_stateless/01592_long_window_functions1.sql similarity index 100% rename from tests/queries/0_stateless/01592_window_functions1.sql rename to tests/queries/0_stateless/01592_long_window_functions1.sql From 07e4cbcc06a2738cbad0be5fa46a1e15589352fa Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 23 Feb 2021 15:08:29 +0300 Subject: [PATCH 096/149] Update docs/ru/sql-reference/functions/tuple-map-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/tuple-map-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/tuple-map-functions.md b/docs/ru/sql-reference/functions/tuple-map-functions.md index 9516deb243a..e33fc83e67e 100644 --- a/docs/ru/sql-reference/functions/tuple-map-functions.md +++ b/docs/ru/sql-reference/functions/tuple-map-functions.md @@ -276,7 +276,7 @@ mapKeys(map) **Возвращаемое значение** -- Массив со всеми значениями `map`. +- Массив со всеми значениями контейнера `map`. Тип: [Array](../../sql-reference/data-types/array.md). From c223a87658088754de9791e8344b644e1b749752 Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 23 Feb 2021 15:08:34 +0300 Subject: [PATCH 097/149] Update docs/ru/sql-reference/functions/tuple-map-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/tuple-map-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/tuple-map-functions.md b/docs/ru/sql-reference/functions/tuple-map-functions.md index e33fc83e67e..52a6a972f75 100644 --- a/docs/ru/sql-reference/functions/tuple-map-functions.md +++ b/docs/ru/sql-reference/functions/tuple-map-functions.md @@ -178,7 +178,7 @@ select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type ## mapContains {#mapcontains} -Определяет, включает ли в себя `map` параметр `key`. +Определяет, содержит ли контейнер `map` ключ `key`. **Синтаксис** From 9f169aa13e415c8a040f8e0e282ec0b972543669 Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 23 Feb 2021 15:08:39 +0300 Subject: [PATCH 098/149] Update docs/ru/sql-reference/functions/tuple-map-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/tuple-map-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/tuple-map-functions.md b/docs/ru/sql-reference/functions/tuple-map-functions.md index 52a6a972f75..696fdb9e5ae 100644 --- a/docs/ru/sql-reference/functions/tuple-map-functions.md +++ b/docs/ru/sql-reference/functions/tuple-map-functions.md @@ -235,7 +235,7 @@ mapKeys(map) **Возвращаемое значение** -- Массив со всеми ключами контейнера `map`. +- Массив со всеми ключами контейнера `map`. Тип: [Array](../../sql-reference/data-types/array.md). From d966725f3320ce48b76d43e0092aaeb5161b2d23 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 24 Feb 2021 17:10:35 +0300 Subject: [PATCH 099/149] Try fix hung in void PullingAsyncPipelineExecutor::cancel() --- .../PullingAsyncPipelineExecutor.cpp | 21 +++++++++---------- src/Processors/Formats/LazyOutputFormat.h | 2 ++ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index 21741d30dfa..3270d1186f6 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -14,6 +14,7 @@ struct PullingAsyncPipelineExecutor::Data { PipelineExecutorPtr executor; std::exception_ptr exception; + LazyOutputFormat * lazy_format = nullptr; std::atomic_bool is_finished = false; std::atomic_bool has_exception = false; ThreadFromGlobalPool thread; @@ -82,6 +83,10 @@ static void threadFunction(PullingAsyncPipelineExecutor::Data & data, ThreadGrou { data.exception = std::current_exception(); data.has_exception = true; + + /// Finish lazy format in case of exception. Otherwise thread.join() may hung. + if (data.lazy_format) + data.lazy_format->cancel(); } data.is_finished = true; @@ -95,6 +100,7 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds) { data = std::make_unique(); data->executor = pipeline.execute(); + data->lazy_format = lazy_format.get(); auto func = [&, thread_group = CurrentThread::getGroup()]() { @@ -105,14 +111,7 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds) } if (data->has_exception) - { - /// Finish lazy format in case of exception. Otherwise thread.join() may hung. - if (lazy_format) - lazy_format->finish(); - - data->has_exception = false; std::rethrow_exception(std::move(data->exception)); - } bool is_execution_finished = lazy_format ? lazy_format->isFinished() : data->is_finished.load(); @@ -172,14 +171,14 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds) void PullingAsyncPipelineExecutor::cancel() { - /// Cancel execution if it wasn't finished. - if (data && !data->is_finished && data->executor) - data->executor->cancel(); - /// Finish lazy format. Otherwise thread.join() may hung. if (lazy_format && !lazy_format->isFinished()) lazy_format->finish(); + /// Cancel execution if it wasn't finished. + if (data && !data->is_finished && data->executor) + data->executor->cancel(); + /// Join thread here to wait for possible exception. if (data && data->thread.joinable()) data->thread.join(); diff --git a/src/Processors/Formats/LazyOutputFormat.h b/src/Processors/Formats/LazyOutputFormat.h index 06ec116f3dd..647daa33889 100644 --- a/src/Processors/Formats/LazyOutputFormat.h +++ b/src/Processors/Formats/LazyOutputFormat.h @@ -36,6 +36,8 @@ public: queue.clear(); } + void onCancel() override { finalize(); } + protected: void consume(Chunk chunk) override { From 9ded4dd3966c9359bdddad75b638c0431f836057 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 24 Feb 2021 17:13:29 +0300 Subject: [PATCH 100/149] Update test. --- tests/queries/0_stateless/00205_scalar_subqueries.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/00205_scalar_subqueries.sql b/tests/queries/0_stateless/00205_scalar_subqueries.sql index 14244377e5f..03bcd0a3ebc 100644 --- a/tests/queries/0_stateless/00205_scalar_subqueries.sql +++ b/tests/queries/0_stateless/00205_scalar_subqueries.sql @@ -7,3 +7,4 @@ SELECT (SELECT toDate('2015-01-02'), 'Hello'); SELECT (SELECT toDate('2015-01-02'), 'Hello') AS x, x, identity((SELECT 1)), identity((SELECT 1) AS y); -- SELECT (SELECT uniqState('')); + SELECT ( SELECT throwIf(1 + dummy) ); -- { serverError 395 } From aa8632a1bc2eb6379e83655301d9ca00cc156b33 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 25 Feb 2021 00:27:47 +0300 Subject: [PATCH 101/149] Fix tests. --- .../Executors/PullingAsyncPipelineExecutor.cpp | 4 ++-- src/Processors/Formats/LazyOutputFormat.h | 17 ++++++++--------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index 3270d1186f6..d058ea9e6ac 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -86,7 +86,7 @@ static void threadFunction(PullingAsyncPipelineExecutor::Data & data, ThreadGrou /// Finish lazy format in case of exception. Otherwise thread.join() may hung. if (data.lazy_format) - data.lazy_format->cancel(); + data.lazy_format->finalize(); } data.is_finished = true; @@ -120,7 +120,7 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds) { /// If lazy format is finished, we don't cancel pipeline but wait for main thread to be finished. data->is_finished = true; - /// Wait thread ant rethrow exception if any. + /// Wait thread and rethrow exception if any. cancel(); return false; } diff --git a/src/Processors/Formats/LazyOutputFormat.h b/src/Processors/Formats/LazyOutputFormat.h index 647daa33889..7188458dd82 100644 --- a/src/Processors/Formats/LazyOutputFormat.h +++ b/src/Processors/Formats/LazyOutputFormat.h @@ -36,7 +36,14 @@ public: queue.clear(); } - void onCancel() override { finalize(); } + void finalize() override + { + std::cerr << StackTrace().toString() << std::endl; + finished_processing = true; + + /// In case we are waiting for result. + queue.emplace(Chunk()); + } protected: void consume(Chunk chunk) override @@ -48,14 +55,6 @@ protected: void consumeTotals(Chunk chunk) override { totals = std::move(chunk); } void consumeExtremes(Chunk chunk) override { extremes = std::move(chunk); } - void finalize() override - { - finished_processing = true; - - /// In case we are waiting for result. - queue.emplace(Chunk()); - } - private: ConcurrentBoundedQueue queue; From 6f5d4ba8cd9ffdec8743c9c7cdff996324b5481c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 25 Feb 2021 00:31:15 +0300 Subject: [PATCH 102/149] Fix tests. --- src/Processors/Executors/PullingAsyncPipelineExecutor.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index d058ea9e6ac..c975153d317 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -171,14 +171,14 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds) void PullingAsyncPipelineExecutor::cancel() { - /// Finish lazy format. Otherwise thread.join() may hung. - if (lazy_format && !lazy_format->isFinished()) - lazy_format->finish(); - /// Cancel execution if it wasn't finished. if (data && !data->is_finished && data->executor) data->executor->cancel(); + /// Finish lazy format. Otherwise thread.join() may hung. + if (lazy_format && !lazy_format->isFinished()) + lazy_format->finish(); + /// Join thread here to wait for possible exception. if (data && data->thread.joinable()) data->thread.join(); From 33364f6bdf15fe164321bff7da0fdd9e000a3947 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 25 Feb 2021 13:50:35 +0300 Subject: [PATCH 103/149] Remove debug output --- src/Processors/Formats/LazyOutputFormat.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Processors/Formats/LazyOutputFormat.h b/src/Processors/Formats/LazyOutputFormat.h index 7188458dd82..15ea5022f82 100644 --- a/src/Processors/Formats/LazyOutputFormat.h +++ b/src/Processors/Formats/LazyOutputFormat.h @@ -38,7 +38,6 @@ public: void finalize() override { - std::cerr << StackTrace().toString() << std::endl; finished_processing = true; /// In case we are waiting for result. From 70b8d6d5657efe0157babeabf107453dad07e42a Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 25 Feb 2021 14:19:26 +0300 Subject: [PATCH 104/149] Update encryption-functions.md --- docs/ru/sql-reference/functions/encryption-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md index e2c5560e4f6..4388f327eab 100644 --- a/docs/ru/sql-reference/functions/encryption-functions.md +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -41,7 +41,7 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad]) **Возвращаемое значение** -- Двоичная зашифрованная строка. [String](../../sql-reference/data-types/string.md#string). +- Бинарная зашифрованная строка. [String](../../sql-reference/data-types/string.md#string). **Примеры** From d4e48f44f5e69d553e4c67f8a158307206cca132 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 25 Feb 2021 14:20:42 +0300 Subject: [PATCH 105/149] Update encryption-functions.md --- docs/ru/sql-reference/functions/encryption-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md index 4388f327eab..0216a6b2356 100644 --- a/docs/ru/sql-reference/functions/encryption-functions.md +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -136,7 +136,7 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv]) **Возвращаемое значение** -- Двоичная зашифрованная строка. [String](../../sql-reference/data-types/string.md#string). +- Бинарная зашифрованная строка. [String](../../sql-reference/data-types/string.md#string). **Примеры** From 8fec34af12f6d3373c53e8792a0ecb49bf983ac3 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 19 Feb 2021 23:00:33 +0300 Subject: [PATCH 106/149] Constraints complex types support --- .../CheckConstraintsBlockOutputStream.cpp | 114 +++++++++--------- .../01720_constraints_complex_types.reference | 3 + .../01720_constraints_complex_types.sql | 47 ++++++++ ...constraints_constant_expressions.reference | 1 + ...01721_constraints_constant_expressions.sql | 27 +++++ 5 files changed, 136 insertions(+), 56 deletions(-) create mode 100644 tests/queries/0_stateless/01720_constraints_complex_types.reference create mode 100644 tests/queries/0_stateless/01720_constraints_complex_types.sql create mode 100644 tests/queries/0_stateless/01721_constraints_constant_expressions.reference create mode 100644 tests/queries/0_stateless/01721_constraints_constant_expressions.sql diff --git a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index a967ee28502..84c31de99b4 100644 --- a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -1,12 +1,15 @@ -#include -#include -#include -#include -#include -#include #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace DB @@ -15,7 +18,7 @@ namespace DB namespace ErrorCodes { extern const int VIOLATED_CONSTRAINT; - extern const int LOGICAL_ERROR; + extern const int UNSUPPORTED_METHOD; } @@ -48,62 +51,61 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) ColumnWithTypeAndName res_column = block_to_calculate.getByName(constraint_ptr->expr->getColumnName()); - if (!isUInt8(res_column.type)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Constraint {} does not return a value of type UInt8", + auto result_type = removeNullable(removeLowCardinality(res_column.type)); + auto result_column = res_column.column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality(); + + if (const auto * column_nullable = checkAndGetColumn(*result_column)) + result_column = column_nullable->getNestedColumnPtr(); + + if (!isUInt8(result_type)) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Constraint {} does not return a value of type UInt8", backQuote(constraint_ptr->name)); - if (const ColumnConst * res_const = typeid_cast(res_column.column.get())) + const ColumnUInt8 & res_column_uint8 = assert_cast(*result_column); + + const UInt8 * data = res_column_uint8.getData().data(); + size_t size = res_column_uint8.size(); + + std::cerr << std::endl; + for (size_t j = 0; j < size; ++j) + std::cerr << data[j] << " "; + std::cerr << std::endl; + + /// Is violated. + if (!memoryIsByte(data, size, 1)) { - UInt8 value = res_const->getValue(); + size_t row_idx = 0; + for (; row_idx < size; ++row_idx) + if (data[row_idx] != 1) + break; - /// Is violated. - if (!value) + Names related_columns = constraint_expr->getRequiredColumns(); + + bool first = true; + String column_values_msg; + constexpr size_t approx_bytes_for_col = 32; + column_values_msg.reserve(approx_bytes_for_col * related_columns.size()); + for (const auto & name : related_columns) { - throw Exception(ErrorCodes::VIOLATED_CONSTRAINT, - "Constraint {} for table {} is violated, because it is a constant expression returning 0. " - "It is most likely an error in table definition.", - backQuote(constraint_ptr->name), table_id.getNameForLogs()); + const IColumn & column = *block.getByName(name).column; + assert(row_idx < column.size()); + + if (!first) + column_values_msg.append(", "); + column_values_msg.append(backQuoteIfNeed(name)); + column_values_msg.append(" = "); + column_values_msg.append(applyVisitor(FieldVisitorToString(), column[row_idx])); + first = false; } - } - else - { - const ColumnUInt8 & res_column_uint8 = assert_cast(*res_column.column); - const UInt8 * data = res_column_uint8.getData().data(); - size_t size = res_column_uint8.size(); - - /// Is violated. - if (!memoryIsByte(data, size, 1)) - { - size_t row_idx = 0; - for (; row_idx < size; ++row_idx) - if (data[row_idx] != 1) - break; - - Names related_columns = constraint_expr->getRequiredColumns(); - - bool first = true; - String column_values_msg; - constexpr size_t approx_bytes_for_col = 32; - column_values_msg.reserve(approx_bytes_for_col * related_columns.size()); - for (const auto & name : related_columns) - { - const IColumn & column = *block.getByName(name).column; - assert(row_idx < column.size()); - - if (!first) - column_values_msg.append(", "); - column_values_msg.append(backQuoteIfNeed(name)); - column_values_msg.append(" = "); - column_values_msg.append(applyVisitor(FieldVisitorToString(), column[row_idx])); - first = false; - } - - throw Exception(ErrorCodes::VIOLATED_CONSTRAINT, - "Constraint {} for table {} is violated at row {}. Expression: ({}). Column values: {}", - backQuote(constraint_ptr->name), table_id.getNameForLogs(), rows_written + row_idx + 1, - serializeAST(*(constraint_ptr->expr), true), column_values_msg); - } + throw Exception( + ErrorCodes::VIOLATED_CONSTRAINT, + "Constraint {} for table {} is violated at row {}. Expression: ({}). Column values: {}", + backQuote(constraint_ptr->name), + table_id.getNameForLogs(), + rows_written + row_idx + 1, + serializeAST(*(constraint_ptr->expr), true), + column_values_msg); } } } diff --git a/tests/queries/0_stateless/01720_constraints_complex_types.reference b/tests/queries/0_stateless/01720_constraints_complex_types.reference new file mode 100644 index 00000000000..01e79c32a8c --- /dev/null +++ b/tests/queries/0_stateless/01720_constraints_complex_types.reference @@ -0,0 +1,3 @@ +1 +2 +3 diff --git a/tests/queries/0_stateless/01720_constraints_complex_types.sql b/tests/queries/0_stateless/01720_constraints_complex_types.sql new file mode 100644 index 00000000000..7e400c4931e --- /dev/null +++ b/tests/queries/0_stateless/01720_constraints_complex_types.sql @@ -0,0 +1,47 @@ +DROP TABLE IF EXISTS constraint_on_nullable_type; +CREATE TABLE constraint_on_nullable_type +( + `id` Nullable(UInt64), + CONSTRAINT `c0` CHECK `id` = 1 +) +ENGINE = TinyLog(); + +INSERT INTO constraint_on_nullable_type VALUES (0); -- {serverError 469} +INSERT INTO constraint_on_nullable_type VALUES (1); + +SELECT * FROM constraint_on_nullable_type; + +DROP TABLE constraint_on_nullable_type; + +SET allow_suspicious_low_cardinality_types = 1; + +DROP TABLE IF EXISTS constraint_on_low_cardinality_type; +CREATE TABLE constraint_on_low_cardinality_type +( + `id` LowCardinality(UInt64), + CONSTRAINT `c0` CHECK `id` = 2 +) +ENGINE = TinyLog; + +INSERT INTO constraint_on_low_cardinality_type VALUES (0); -- {serverError 469} +INSERT INTO constraint_on_low_cardinality_type VALUES (2); + +SELECT * FROM constraint_on_low_cardinality_type; + +DROP TABLE constraint_on_low_cardinality_type; + +DROP TABLE IF EXISTS constraint_on_low_cardinality_nullable_type; + +CREATE TABLE constraint_on_low_cardinality_nullable_type +( + `id` LowCardinality(Nullable(UInt64)), + CONSTRAINT `c0` CHECK `id` = 3 +) +ENGINE = TinyLog; + +INSERT INTO constraint_on_low_cardinality_nullable_type VALUES (0); -- {serverError 469} +INSERT INTO constraint_on_low_cardinality_nullable_type VALUES (3); + +SELECT * FROM constraint_on_low_cardinality_nullable_type; + +DROP TABLE constraint_on_low_cardinality_nullable_type; diff --git a/tests/queries/0_stateless/01721_constraints_constant_expressions.reference b/tests/queries/0_stateless/01721_constraints_constant_expressions.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01721_constraints_constant_expressions.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01721_constraints_constant_expressions.sql b/tests/queries/0_stateless/01721_constraints_constant_expressions.sql new file mode 100644 index 00000000000..c38ed442eef --- /dev/null +++ b/tests/queries/0_stateless/01721_constraints_constant_expressions.sql @@ -0,0 +1,27 @@ +DROP TABLE IF EXISTS constraint_constant_number_expression; +CREATE TABLE constraint_constant_number_expression +( + id UInt64, + CONSTRAINT `c0` CHECK 1, + CONSTRAINT `c1` CHECK 1 < 2, + CONSTRAINT `c2` CHECK isNull(cast(NULL, 'Nullable(UInt8)')) +) ENGINE = TinyLog(); + +INSERT INTO constraint_constant_number_expression VALUES (1); + +SELECT * FROM constraint_constant_number_expression; + +DROP TABLE constraint_constant_number_expression; + +DROP TABLE IF EXISTS constraint_constant_number_expression_non_uint8; +CREATE TABLE constraint_constant_number_expression_non_uint8 +( + id UInt64, + CONSTRAINT `c0` CHECK toUInt64(1) +) ENGINE = TinyLog(); + +INSERT INTO constraint_constant_number_expression_non_uint8 VALUES (1); -- {serverError 1} + +SELECT * FROM constraint_constant_number_expression_non_uint8; + +DROP TABLE constraint_constant_number_expression_non_uint8; From 2eec1d021b4ac417cc625ece47d5c0f86cc90d0d Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sat, 20 Feb 2021 00:37:52 +0300 Subject: [PATCH 107/149] Fixed unused code --- src/DataStreams/CheckConstraintsBlockOutputStream.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index 84c31de99b4..9bb80cfa4e1 100644 --- a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -66,11 +66,6 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) const UInt8 * data = res_column_uint8.getData().data(); size_t size = res_column_uint8.size(); - std::cerr << std::endl; - for (size_t j = 0; j < size; ++j) - std::cerr << data[j] << " "; - std::cerr << std::endl; - /// Is violated. if (!memoryIsByte(data, size, 1)) { From 23af53067d17e5dd1aa0fe844fc195e5fa154470 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 25 Feb 2021 11:35:31 +0300 Subject: [PATCH 108/149] Updated support for Nullable column --- .../CheckConstraintsBlockOutputStream.cpp | 27 ++++++++++++++++--- ...01721_constraints_constant_expressions.sql | 15 ++++++++++- 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index 9bb80cfa4e1..b42ff4b4e1d 100644 --- a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -52,15 +52,34 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) ColumnWithTypeAndName res_column = block_to_calculate.getByName(constraint_ptr->expr->getColumnName()); auto result_type = removeNullable(removeLowCardinality(res_column.type)); - auto result_column = res_column.column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality(); - - if (const auto * column_nullable = checkAndGetColumn(*result_column)) - result_column = column_nullable->getNestedColumnPtr(); if (!isUInt8(result_type)) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Constraint {} does not return a value of type UInt8", backQuote(constraint_ptr->name)); + auto result_column = res_column.column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality(); + + if (const auto * column_nullable = checkAndGetColumn(*result_column)) + { + const auto & nested_column = column_nullable->getNestedColumnPtr(); + + /// Check if constraint value is nullable + const auto & null_map = column_nullable->getNullMapColumn(); + const auto & data = null_map.getData(); + bool null_map_contain_null = std::find(data.begin(), data.end(), true); + + if (null_map_contain_null) + throw Exception( + ErrorCodes::VIOLATED_CONSTRAINT, + "Constraint {} for table {} is violated. Expression: ({})."\ + "Constraint expression returns nullable column that contains null value", + backQuote(constraint_ptr->name), + table_id.getNameForLogs(), + serializeAST(*(constraint_ptr->expr), true)); + + result_column = nested_column; + } + const ColumnUInt8 & res_column_uint8 = assert_cast(*result_column); const UInt8 * data = res_column_uint8.getData().data(); diff --git a/tests/queries/0_stateless/01721_constraints_constant_expressions.sql b/tests/queries/0_stateless/01721_constraints_constant_expressions.sql index c38ed442eef..d70c0cd4dc0 100644 --- a/tests/queries/0_stateless/01721_constraints_constant_expressions.sql +++ b/tests/queries/0_stateless/01721_constraints_constant_expressions.sql @@ -20,8 +20,21 @@ CREATE TABLE constraint_constant_number_expression_non_uint8 CONSTRAINT `c0` CHECK toUInt64(1) ) ENGINE = TinyLog(); -INSERT INTO constraint_constant_number_expression_non_uint8 VALUES (1); -- {serverError 1} +INSERT INTO constraint_constant_number_expression_non_uint8 VALUES (2); -- {serverError 1} SELECT * FROM constraint_constant_number_expression_non_uint8; DROP TABLE constraint_constant_number_expression_non_uint8; + +DROP TABLE IF EXISTS constraint_constant_nullable_expression_that_contains_null; +CREATE TABLE constraint_constant_nullable_expression_that_contains_null +( + id UInt64, + CONSTRAINT `c0` CHECK nullIf(1 % 2, 1) +) ENGINE = TinyLog(); + +INSERT INTO constraint_constant_nullable_expression_that_contains_null VALUES (3); -- {serverError 469} + +SELECT * FROM constraint_constant_nullable_expression_that_contains_null; + +DROP TABLE constraint_constant_nullable_expression_that_contains_null; From da321c2bfecaaafce225e4df0a711ae6a6fe45a4 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 25 Feb 2021 16:08:04 +0300 Subject: [PATCH 109/149] Fixed check for null value in null map --- src/DataStreams/CheckConstraintsBlockOutputStream.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index b42ff4b4e1d..90d6560b9a8 100644 --- a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -66,9 +66,11 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) /// Check if constraint value is nullable const auto & null_map = column_nullable->getNullMapColumn(); const auto & data = null_map.getData(); - bool null_map_contain_null = std::find(data.begin(), data.end(), true); + const auto * it = std::find(data.begin(), data.end(), true); - if (null_map_contain_null) + bool null_map_contains_null = it != data.end(); + + if (null_map_contains_null) throw Exception( ErrorCodes::VIOLATED_CONSTRAINT, "Constraint {} for table {} is violated. Expression: ({})."\ From c51773d2b3435d2a4f4009616858aa744249fe17 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 26 Feb 2021 12:11:30 +0300 Subject: [PATCH 110/149] Updated test --- tests/queries/0_stateless/01720_constraints_complex_types.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01720_constraints_complex_types.sql b/tests/queries/0_stateless/01720_constraints_complex_types.sql index 7e400c4931e..273f509b6eb 100644 --- a/tests/queries/0_stateless/01720_constraints_complex_types.sql +++ b/tests/queries/0_stateless/01720_constraints_complex_types.sql @@ -1,3 +1,5 @@ +SET allow_suspicious_low_cardinality_types = 1; + DROP TABLE IF EXISTS constraint_on_nullable_type; CREATE TABLE constraint_on_nullable_type ( @@ -13,8 +15,6 @@ SELECT * FROM constraint_on_nullable_type; DROP TABLE constraint_on_nullable_type; -SET allow_suspicious_low_cardinality_types = 1; - DROP TABLE IF EXISTS constraint_on_low_cardinality_type; CREATE TABLE constraint_on_low_cardinality_type ( From 2b401cfa7c1f2886f40bc77f37ee4b38b86c5eae Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 26 Feb 2021 11:50:01 +0000 Subject: [PATCH 111/149] Update brotli to a version with a recent origin fix: "encoder: fix rare access to uninitialized data in ring-buffer" --- contrib/brotli | 2 +- contrib/brotli-cmake/CMakeLists.txt | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/contrib/brotli b/contrib/brotli index 5805f99a533..63be8a99401 160000 --- a/contrib/brotli +++ b/contrib/brotli @@ -1 +1 @@ -Subproject commit 5805f99a533a8f8118699c0100d8c102f3605f65 +Subproject commit 63be8a99401992075c23e99f7c84de1c653e39e2 diff --git a/contrib/brotli-cmake/CMakeLists.txt b/contrib/brotli-cmake/CMakeLists.txt index e22f4593c02..4c5f584de9d 100644 --- a/contrib/brotli-cmake/CMakeLists.txt +++ b/contrib/brotli-cmake/CMakeLists.txt @@ -2,6 +2,8 @@ set(BROTLI_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/brotli/c) set(BROTLI_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/brotli/c) set(SRCS + ${BROTLI_SOURCE_DIR}/enc/command.c + ${BROTLI_SOURCE_DIR}/enc/fast_log.c ${BROTLI_SOURCE_DIR}/dec/bit_reader.c ${BROTLI_SOURCE_DIR}/dec/state.c ${BROTLI_SOURCE_DIR}/dec/huffman.c @@ -26,6 +28,9 @@ set(SRCS ${BROTLI_SOURCE_DIR}/enc/memory.c ${BROTLI_SOURCE_DIR}/common/dictionary.c ${BROTLI_SOURCE_DIR}/common/transform.c + ${BROTLI_SOURCE_DIR}/common/platform.c + ${BROTLI_SOURCE_DIR}/common/context.c + ${BROTLI_SOURCE_DIR}/common/constants.c ) add_library(brotli ${SRCS}) From 17d94065018028773a034a240987bb354966b12a Mon Sep 17 00:00:00 2001 From: Vladimir Date: Fri, 26 Feb 2021 15:13:41 +0300 Subject: [PATCH 112/149] Apply suggestions from code review --- docs/ru/sql-reference/data-types/simpleaggregatefunction.md | 2 +- docs/ru/sql-reference/functions/date-time-functions.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index 7677b64e924..668b579ff78 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -36,4 +36,4 @@ CREATE TABLE simple (id UInt64, val SimpleAggregateFunction(sum, Double)) ENGINE=AggregatingMergeTree ORDER BY id; ``` -[Оригинальная статья](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) +[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/data-types/simpleaggregatefunction/) diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index bb4c49e898e..9b3eade9d79 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -305,7 +305,7 @@ WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(d Переводит дату-с-временем или дату в число типа UInt16, содержащее номер ISO года. ISO год отличается от обычного года, потому что в соответствии с [ISO 8601:1988](https://en.wikipedia.org/wiki/ISO_8601) ISO год начинается необязательно первого января. -**Пример:** +**Пример** Запрос: From 932286df1ff07550841902b5f878a9482ca48045 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 26 Feb 2021 16:32:34 +0300 Subject: [PATCH 113/149] Convert columns to nullable for totals in join --- src/Interpreters/HashJoin.cpp | 2 +- src/Interpreters/IJoin.h | 2 ++ src/Interpreters/MergeJoin.cpp | 2 +- src/Interpreters/join_common.cpp | 14 ++++++++++++-- src/Interpreters/join_common.h | 2 +- .../0_stateless/01637_nullable_fuzz3.reference | 4 ++++ tests/queries/0_stateless/01637_nullable_fuzz3.sql | 2 ++ 7 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 142ab07927f..44d1b434bc0 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1337,7 +1337,7 @@ void HashJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) void HashJoin::joinTotals(Block & block) const { - JoinCommon::joinTotals(totals, sample_block_with_columns_to_add, key_names_right, block); + JoinCommon::joinTotals(totals, sample_block_with_columns_to_add, *table_join, block); } diff --git a/src/Interpreters/IJoin.h b/src/Interpreters/IJoin.h index 3d313adcb6c..ade6eaa0cc9 100644 --- a/src/Interpreters/IJoin.h +++ b/src/Interpreters/IJoin.h @@ -28,7 +28,9 @@ public: virtual void joinBlock(Block & block, std::shared_ptr & not_processed) = 0; virtual bool hasTotals() const = 0; + /// Set totals for right table virtual void setTotals(const Block & block) = 0; + /// Add totals to block from left table virtual void joinTotals(Block & block) const = 0; virtual size_t getTotalRowCount() const = 0; diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index e1e4cd9e8a3..ddeaf053225 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -496,7 +496,7 @@ void MergeJoin::setTotals(const Block & totals_block) void MergeJoin::joinTotals(Block & block) const { - JoinCommon::joinTotals(totals, right_columns_to_add, table_join->keyNamesRight(), block); + JoinCommon::joinTotals(totals, right_columns_to_add, *table_join, block); } void MergeJoin::mergeRightBlocks() diff --git a/src/Interpreters/join_common.cpp b/src/Interpreters/join_common.cpp index a4c39a45efa..69727a2a6bb 100644 --- a/src/Interpreters/join_common.cpp +++ b/src/Interpreters/join_common.cpp @@ -251,13 +251,23 @@ void createMissedColumns(Block & block) } } -void joinTotals(const Block & totals, const Block & columns_to_add, const Names & key_names_right, Block & block) +/// Append totals from right to left block, correct types if needed +void joinTotals(const Block & totals, const Block & columns_to_add, const TableJoin & table_join, Block & block) { + if (table_join.forceNullableLeft()) + convertColumnsToNullable(block); + if (Block totals_without_keys = totals) { - for (const auto & name : key_names_right) + for (const auto & name : table_join.keyNamesRight()) totals_without_keys.erase(totals_without_keys.getPositionByName(name)); + for (auto & col : totals_without_keys) + { + if (table_join.rightBecomeNullable(col.type)) + JoinCommon::convertColumnToNullable(col); + } + for (size_t i = 0; i < totals_without_keys.columns(); ++i) block.insert(totals_without_keys.safeGetByPosition(i)); } diff --git a/src/Interpreters/join_common.h b/src/Interpreters/join_common.h index 6f9f7dd1210..76fbcf8d87d 100644 --- a/src/Interpreters/join_common.h +++ b/src/Interpreters/join_common.h @@ -32,7 +32,7 @@ ColumnRawPtrs extractKeysForJoin(const Block & block_keys, const Names & key_nam void checkTypesOfKeys(const Block & block_left, const Names & key_names_left, const Block & block_right, const Names & key_names_right); void createMissedColumns(Block & block); -void joinTotals(const Block & totals, const Block & columns_to_add, const Names & key_names_right, Block & block); +void joinTotals(const Block & totals, const Block & columns_to_add, const TableJoin & table_join, Block & block); void addDefaultValues(IColumn & column, const DataTypePtr & type, size_t count); diff --git a/tests/queries/0_stateless/01637_nullable_fuzz3.reference b/tests/queries/0_stateless/01637_nullable_fuzz3.reference index d9cf16b9d2a..795a0159ac8 100644 --- a/tests/queries/0_stateless/01637_nullable_fuzz3.reference +++ b/tests/queries/0_stateless/01637_nullable_fuzz3.reference @@ -1,2 +1,6 @@ 0 + +0 + +0 0 0 diff --git a/tests/queries/0_stateless/01637_nullable_fuzz3.sql b/tests/queries/0_stateless/01637_nullable_fuzz3.sql index 21bf8999eae..6cfd0fc7d1c 100644 --- a/tests/queries/0_stateless/01637_nullable_fuzz3.sql +++ b/tests/queries/0_stateless/01637_nullable_fuzz3.sql @@ -1,4 +1,6 @@ DROP TABLE IF EXISTS t; CREATE TABLE t (`item_id` UInt64, `price_sold` Float32, `date` Date) ENGINE = MergeTree ORDER BY item_id; SELECT item_id FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) AS l FULL OUTER JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) AS r USING (item_id); +SELECT item_id FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) AS l FULL OUTER JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) AS r USING (item_id) SETTINGS join_use_nulls = '1'; +SELECT * FROM (SELECT item_id, sum(price_sold) as price_sold FROM t GROUP BY item_id WITH TOTALS) AS l FULL OUTER JOIN (SELECT item_id, sum(price_sold) as price_sold FROM t GROUP BY item_id WITH TOTALS) AS r USING (item_id) SETTINGS join_use_nulls = '1'; DROP TABLE t; From 2679613503b020402ff1efc0750f8b31f08cf705 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 26 Feb 2021 18:37:57 +0300 Subject: [PATCH 114/149] Added test to skip list DatabaseReplicated --- tests/queries/skip_list.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 475b87ae36a..36cca55779d 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -260,7 +260,8 @@ "00121_drop_column_zookeeper", "00116_storage_set", "00083_create_merge_tree_zookeeper", - "00062_replicated_merge_tree_alter_zookeeper" + "00062_replicated_merge_tree_alter_zookeeper", + "01720_constraints_complex_types" ], "polymorphic-parts": [ "01508_partition_pruning_long", /// bug, shoud be fixed From d328bfa41f7a4c6d5796b2d8b8747dbe077984a8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 26 Feb 2021 19:29:56 +0300 Subject: [PATCH 115/149] Review fixes. Add setting max_optimizations_to_apply. --- src/Common/ErrorCodes.cpp | 3 ++- src/Core/Settings.h | 1 + src/Interpreters/ActionsDAG.cpp | 9 ++++---- src/Interpreters/ActionsDAG.h | 3 +++ .../ClusterProxy/SelectStreamFactory.cpp | 2 +- src/Interpreters/InterpreterExplainQuery.cpp | 6 +++--- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- .../InterpreterSelectWithUnionQuery.cpp | 2 +- src/Interpreters/MutationsInterpreter.cpp | 2 +- .../QueryPlan/Optimizations/Optimizations.h | 2 +- .../QueryPlanOptimizationSettings.cpp | 12 +++++++++++ .../QueryPlanOptimizationSettings.h | 20 ++++++++++++++++++ .../QueryPlan/Optimizations/optimizeTree.cpp | 21 +++++++++++++++---- src/Processors/QueryPlan/QueryPlan.cpp | 12 +++++------ src/Processors/QueryPlan/QueryPlan.h | 9 ++++---- .../MergeTree/StorageFromMergeTreeDataPart.h | 2 +- src/Storages/StorageBuffer.cpp | 2 +- src/Storages/StorageDistributed.cpp | 2 +- src/Storages/StorageMaterializedView.cpp | 2 +- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- src/Storages/StorageView.cpp | 2 +- .../01655_plan_optimizations.reference | 2 ++ .../0_stateless/01655_plan_optimizations.sh | 5 ++++- 24 files changed, 92 insertions(+), 35 deletions(-) create mode 100644 src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp create mode 100644 src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index d0d83448b68..d14c9a7e45e 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -535,12 +535,13 @@ M(566, CANNOT_RMDIR) \ M(567, DUPLICATED_PART_UUIDS) \ M(568, RAFT_ERROR) \ + M(569, TOO_MANY_QUERY_PLAN_OPTIMIZATIONS) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ M(1001, STD_EXCEPTION) \ M(1002, UNKNOWN_EXCEPTION) \ - M(1003, INVALID_SHARD_ID) + M(1003, INVALID_SHARD_ID) \ /* See END */ diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9bb9ad30f15..549a1716a44 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -431,6 +431,7 @@ class IColumn; M(UnionMode, union_default_mode, UnionMode::Unspecified, "Set default Union Mode in SelectWithUnion query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without Union Mode will throw exception.", 0) \ M(Bool, optimize_aggregators_of_group_by_keys, true, "Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section", 0) \ M(Bool, optimize_group_by_function_keys, true, "Eliminates functions of other keys in GROUP BY section", 0) \ + M(UInt64, query_plan_max_optimizations_to_apply, 10000, "Limit the total number of optimizations applied to query plan. If zero, ignored. If limit reached, throw exception", 0) \ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS below. diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 1406eecc5c0..e994a6a0ef6 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1215,7 +1215,7 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsForFilter(const std::string & co namespace { -struct ConjinctionNodes +struct ConjunctionNodes { std::vector allowed; std::vector rejected; @@ -1225,9 +1225,9 @@ struct ConjinctionNodes /// Assuming predicate is a conjunction (probably, trivial). /// Find separate conjunctions nodes. Split nodes into allowed and rejected sets. /// Allowed predicate is a predicate which can be calculated using only nodes from allowed_nodes set. -ConjinctionNodes getConjinctionNodes(ActionsDAG::Node * predicate, std::unordered_set allowed_nodes) +ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordered_set allowed_nodes) { - ConjinctionNodes conjunction; + ConjunctionNodes conjunction; std::unordered_set allowed; std::unordered_set rejected; @@ -1299,6 +1299,7 @@ ConjinctionNodes getConjinctionNodes(ActionsDAG::Node * predicate, std::unordere if (conjunction.allowed.empty()) { + /// If nothing was added to conjunction, check if it is trivial. if (allowed_nodes.count(predicate)) conjunction.allowed.push_back(predicate); } @@ -1450,7 +1451,7 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, } } - auto conjunction = getConjinctionNodes(predicate, allowed_nodes); + auto conjunction = getConjunctionNodes(predicate, allowed_nodes); auto actions = cloneActionsForConjunction(conjunction.allowed); if (!actions) return nullptr; diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 2e3baa181fd..14ed5df949b 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -152,6 +152,9 @@ public: } }; + /// NOTE: std::list is an implementation detail. + /// It allows to add and remove new nodes inplace without reallocation. + /// Raw pointers to nodes remain valid. using Nodes = std::list; using Inputs = std::vector; diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 47726e49d50..e0fcc4738ba 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -284,7 +284,7 @@ void SelectStreamFactory::createForShard( if (try_results.empty() || local_delay < max_remote_delay) { auto plan = createLocalPlan(modified_query_ast, header, context, stage); - return QueryPipeline::getPipe(std::move(*plan->buildQueryPipeline())); + return QueryPipeline::getPipe(std::move(*plan->buildQueryPipeline(QueryPlanOptimizationSettings(context.getSettingsRef())))); } else { diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index b13350d7ba2..5135e40e4dd 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -117,7 +117,7 @@ struct QueryPlanSettings { QueryPlan::ExplainPlanOptions query_plan_options; - /// Apply query plan optimisations. + /// Apply query plan optimizations. bool optimize = true; constexpr static char name[] = "PLAN"; @@ -251,7 +251,7 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl() interpreter.buildQueryPlan(plan); if (settings.optimize) - plan.optimize(); + plan.optimize(QueryPlanOptimizationSettings(context.getSettingsRef())); plan.explainPlan(buf, settings.query_plan_options); } @@ -265,7 +265,7 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl() InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), context, SelectQueryOptions()); interpreter.buildQueryPlan(plan); - auto pipeline = plan.buildQueryPipeline(); + auto pipeline = plan.buildQueryPipeline(QueryPlanOptimizationSettings(context.getSettingsRef())); if (settings.graph) { diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 9f97160f77f..f13f8fb4106 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -548,7 +548,7 @@ BlockIO InterpreterSelectQuery::execute() buildQueryPlan(query_plan); - res.pipeline = std::move(*query_plan.buildQueryPipeline()); + res.pipeline = std::move(*query_plan.buildQueryPipeline(QueryPlanOptimizationSettings(context->getSettingsRef()))); return res; } diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 59fcff61936..b894db79c7b 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -413,7 +413,7 @@ BlockIO InterpreterSelectWithUnionQuery::execute() QueryPlan query_plan; buildQueryPlan(query_plan); - auto pipeline = query_plan.buildQueryPipeline(); + auto pipeline = query_plan.buildQueryPipeline(QueryPlanOptimizationSettings(context->getSettingsRef())); res.pipeline = std::move(*pipeline); res.pipeline.addInterpreterContext(context); diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 528b5ec6d8e..43ab42b42c7 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -756,7 +756,7 @@ QueryPipelinePtr MutationsInterpreter::addStreamsForLaterStages(const std::vecto } } - auto pipeline = plan.buildQueryPipeline(); + auto pipeline = plan.buildQueryPipeline(QueryPlanOptimizationSettings(context.getSettingsRef())); pipeline->addSimpleTransform([&](const Block & header) { return std::make_shared(header); diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index a5c3af488a9..f96237fc71a 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -9,7 +9,7 @@ namespace QueryPlanOptimizations { /// This is the main function which optimizes the whole QueryPlan tree. -void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes); +void optimizeTree(const QueryPlanOptimizationSettings & settings, QueryPlan::Node & root, QueryPlan::Nodes & nodes); /// Optimization is a function applied to QueryPlan::Node. /// It can read and update subtree of specified node. diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp new file mode 100644 index 00000000000..cbd38d46ebf --- /dev/null +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp @@ -0,0 +1,12 @@ +#include +#include + +namespace DB +{ + +QueryPlanOptimizationSettings::QueryPlanOptimizationSettings(const Settings & settings) +{ + max_optimizations_to_apply = settings.query_plan_max_optimizations_to_apply; +} + +} diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h new file mode 100644 index 00000000000..074298e24a1 --- /dev/null +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +namespace DB +{ + +struct Settings; + +struct QueryPlanOptimizationSettings +{ + QueryPlanOptimizationSettings() = delete; + explicit QueryPlanOptimizationSettings(const Settings & settings); + + /// If not zero, throw if too many optimizations were applied to query plan. + /// It helps to avoid infinite optimization loop. + size_t max_optimizations_to_apply = 0; +}; + +} diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index cc81a7f39fc..858bde9c660 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -1,10 +1,20 @@ #include +#include +#include #include -namespace DB::QueryPlanOptimizations +namespace DB { -void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes) +namespace ErrorCodes +{ + extern const int TOO_MANY_QUERY_PLAN_OPTIMIZATIONS; +} + +namespace QueryPlanOptimizations +{ + +void optimizeTree(const QueryPlanOptimizationSettings & settings, QueryPlan::Node & root, QueryPlan::Nodes & nodes) { const auto & optimizations = getOptimizations(); @@ -23,7 +33,7 @@ void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes) std::stack stack; stack.push(Frame{.node = &root}); - size_t max_optimizations_to_apply = 0; + size_t max_optimizations_to_apply = settings.max_optimizations_to_apply; size_t total_applied_optimizations = 0; while (!stack.empty()) @@ -58,7 +68,9 @@ void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes) continue; if (max_optimizations_to_apply && max_optimizations_to_apply < total_applied_optimizations) - continue; + throw Exception(ErrorCodes::TOO_MANY_QUERY_PLAN_OPTIMIZATIONS, + "Too many optimizations applied to query plan. Current limit {}", + max_optimizations_to_apply); /// Try to apply optimization. auto update_depth = optimization.apply(frame.node, nodes); @@ -81,3 +93,4 @@ void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes) } } +} diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 755944fdf9f..f5d5e0d99b7 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -130,10 +130,10 @@ void QueryPlan::addStep(QueryPlanStepPtr step) " input expected", ErrorCodes::LOGICAL_ERROR); } -QueryPipelinePtr QueryPlan::buildQueryPipeline() +QueryPipelinePtr QueryPlan::buildQueryPipeline(const QueryPlanOptimizationSettings & optimization_settings) { checkInitialized(); - optimize(); + optimize(optimization_settings); struct Frame { @@ -177,7 +177,7 @@ QueryPipelinePtr QueryPlan::buildQueryPipeline() return last_pipeline; } -Pipe QueryPlan::convertToPipe() +Pipe QueryPlan::convertToPipe(const QueryPlanOptimizationSettings & optimization_settings) { if (!isInitialized()) return {}; @@ -185,7 +185,7 @@ Pipe QueryPlan::convertToPipe() if (isCompleted()) throw Exception("Cannot convert completed QueryPlan to Pipe", ErrorCodes::LOGICAL_ERROR); - return QueryPipeline::getPipe(std::move(*buildQueryPipeline())); + return QueryPipeline::getPipe(std::move(*buildQueryPipeline(optimization_settings))); } void QueryPlan::addInterpreterContext(std::shared_ptr context) @@ -333,9 +333,9 @@ void QueryPlan::explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptio } } -void QueryPlan::optimize() +void QueryPlan::optimize(const QueryPlanOptimizationSettings & optimization_settings) { - QueryPlanOptimizations::optimizeTree(*root, nodes); + QueryPlanOptimizations::optimizeTree(optimization_settings, *root, nodes); } } diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h index 9d2d7d93a36..7973f9af45a 100644 --- a/src/Processors/QueryPlan/QueryPlan.h +++ b/src/Processors/QueryPlan/QueryPlan.h @@ -5,6 +5,7 @@ #include #include +#include namespace DB { @@ -27,7 +28,7 @@ class Pipe; /// A tree of query steps. /// The goal of QueryPlan is to build QueryPipeline. -/// QueryPlan let delay pipeline creation which is helpful for pipeline-level optimisations. +/// QueryPlan let delay pipeline creation which is helpful for pipeline-level optimizations. class QueryPlan { public: @@ -43,12 +44,12 @@ public: bool isCompleted() const; /// Tree is not empty and root hasOutputStream() const DataStream & getCurrentDataStream() const; /// Checks that (isInitialized() && !isCompleted()) - void optimize(); + void optimize(const QueryPlanOptimizationSettings & optimization_settings); - QueryPipelinePtr buildQueryPipeline(); + QueryPipelinePtr buildQueryPipeline(const QueryPlanOptimizationSettings & optimization_settings); /// If initialized, build pipeline and convert to pipe. Otherwise, return empty pipe. - Pipe convertToPipe(); + Pipe convertToPipe(const QueryPlanOptimizationSettings & optimization_settings); struct ExplainPlanOptions { diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 98e1880de54..1d011effc69 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -33,7 +33,7 @@ public: std::move(*MergeTreeDataSelectExecutor(part->storage) .readFromParts({part}, column_names, metadata_snapshot, query_info, context, max_block_size, num_streams)); - return query_plan.convertToPipe(); + return query_plan.convertToPipe(QueryPlanOptimizationSettings(context.getSettingsRef())); } diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index e28d5f4d6d1..33aa2140a1f 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -166,7 +166,7 @@ Pipe StorageBuffer::read( { QueryPlan plan; read(plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams); - return plan.convertToPipe(); + return plan.convertToPipe(QueryPlanOptimizationSettings(context.getSettingsRef())); } void StorageBuffer::read( diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index c08dc38fa2d..f66e2f782ca 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -501,7 +501,7 @@ Pipe StorageDistributed::read( { QueryPlan plan; read(plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams); - return plan.convertToPipe(); + return plan.convertToPipe(QueryPlanOptimizationSettings(context.getSettingsRef())); } void StorageDistributed::read( diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index af00b37b1d5..02654b37d68 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -126,7 +126,7 @@ Pipe StorageMaterializedView::read( { QueryPlan plan; read(plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams); - return plan.convertToPipe(); + return plan.convertToPipe(QueryPlanOptimizationSettings(context.getSettingsRef())); } void StorageMaterializedView::read( diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 202e909af0f..c8f44c78e6e 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -198,7 +198,7 @@ Pipe StorageMergeTree::read( { QueryPlan plan; read(plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams); - return plan.convertToPipe(); + return plan.convertToPipe(QueryPlanOptimizationSettings(context.getSettingsRef())); } std::optional StorageMergeTree::totalRows(const Settings &) const diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 518577c473c..a0d96f43c17 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3809,7 +3809,7 @@ Pipe StorageReplicatedMergeTree::read( { QueryPlan plan; read(plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams); - return plan.convertToPipe(); + return plan.convertToPipe(QueryPlanOptimizationSettings(context.getSettingsRef())); } diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 1ee5ab3d0ca..632d3807f83 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -59,7 +59,7 @@ Pipe StorageView::read( { QueryPlan plan; read(plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams); - return plan.convertToPipe(); + return plan.convertToPipe(QueryPlanOptimizationSettings(context.getSettingsRef())); } void StorageView::read( diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index f261e134494..99b32b74ca7 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -1,3 +1,5 @@ +Too many optimizations applied to query plan +Too many optimizations applied to query plan > sipHash should be calculated after filtration FUNCTION sipHash64 Filter column: equals diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index 84452fe651f..3148dc4a597 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -4,6 +4,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +$CLICKHOUSE_CLIENT -q "select x + 1 from (select y + 2 as x from (select dummy + 3 as y)) settings query_plan_max_optimizations_to_apply = 1" 2>&1 | + grep -o "Too many optimizations applied to query plan" + echo "> sipHash should be calculated after filtration" $CLICKHOUSE_CLIENT -q "explain actions = 1 select sum(x), sum(y) from (select sipHash64(number) as x, bitAnd(number, 1024) as y from numbers_mt(1000000000) limit 1000000000) where y = 0" | grep -o "FUNCTION sipHash64\|Filter column: equals" echo "> sorting steps should know about limit" @@ -146,4 +149,4 @@ $CLICKHOUSE_CLIENT -q " $CLICKHOUSE_CLIENT -q " select * from ( select y, sum(x) from (select number as x, number % 4 as y from numbers(10)) group by y with totals - ) where y != 2" \ No newline at end of file + ) where y != 2" From b082b661f78e90632bf05d2929ae9428e030de94 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 26 Feb 2021 19:32:12 +0300 Subject: [PATCH 116/149] Fix ya.make --- src/Processors/ya.make | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Processors/ya.make b/src/Processors/ya.make index a44272cf9c0..4b376cdbfb2 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -113,6 +113,7 @@ SRCS( QueryPlan/MergingFinal.cpp QueryPlan/MergingSortedStep.cpp QueryPlan/OffsetStep.cpp + QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp QueryPlan/Optimizations/filterPushDown.cpp QueryPlan/Optimizations/liftUpArrayJoin.cpp QueryPlan/Optimizations/limitPushDown.cpp From ac4d3b504eb97ddc491b31daee5eeeef04229bd6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Feb 2021 20:15:11 +0300 Subject: [PATCH 117/149] Allow to start up with modified binary under gdb --- programs/server/Server.cpp | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index f501e182cb7..8fcdfc4beac 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -82,6 +82,7 @@ #if defined(OS_LINUX) # include +# include # include #endif @@ -480,16 +481,26 @@ int Server::main(const std::vector & /*args*/) } else { - throw Exception(ErrorCodes::CORRUPTED_DATA, - "Calculated checksum of the ClickHouse binary ({0}) does not correspond" - " to the reference checksum stored in the binary ({1})." - " It may indicate one of the following:" - " - the file {2} was changed just after startup;" - " - the file {2} is damaged on disk due to faulty hardware;" - " - the loaded executable is damaged in memory due to faulty hardware;" - " - the file {2} was intentionally modified;" - " - logical error in code." - , calculated_binary_hash, stored_binary_hash, executable_path); + /// If program is run under debugger, ptrace will fail. + if (ptrace(PTRACE_TRACEME, 0, nullptr, nullptr) == -1) + { + /// Program is run under debugger. Modification of it's binary image is ok for breakpoints. + LOG_WARNING(log, "Server is run under debugger and its binary image is modified (most likely with breakpoints).", + calculated_binary_hash); + } + else + { + throw Exception(ErrorCodes::CORRUPTED_DATA, + "Calculated checksum of the ClickHouse binary ({0}) does not correspond" + " to the reference checksum stored in the binary ({1})." + " It may indicate one of the following:" + " - the file {2} was changed just after startup;" + " - the file {2} is damaged on disk due to faulty hardware;" + " - the loaded executable is damaged in memory due to faulty hardware;" + " - the file {2} was intentionally modified;" + " - logical error in code." + , calculated_binary_hash, stored_binary_hash, executable_path); + } } } else From a48bb7e3a01b42fd1e5b4fda7452fe0d73593b96 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 26 Feb 2021 22:05:25 +0300 Subject: [PATCH 118/149] Fix redundant zookeeper reconnect and possibility of multiple zookeeper session for a single ClickHouse server --- docker/test/integration/base/Dockerfile | 3 +- src/Common/ZooKeeper/IKeeper.h | 3 ++ src/Common/ZooKeeper/TestKeeper.h | 2 +- src/Common/ZooKeeper/ZooKeeper.cpp | 50 ++++++++++--------- src/Common/ZooKeeper/ZooKeeper.h | 15 ++++-- src/Common/ZooKeeper/ZooKeeperImpl.h | 2 + src/Interpreters/Context.cpp | 5 ++ .../integration/test_reload_zookeeper/test.py | 8 +++ 8 files changed, 60 insertions(+), 28 deletions(-) diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index 4963ff0094d..938d8d45ffd 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -18,7 +18,8 @@ RUN apt-get update \ curl \ tar \ krb5-user \ - iproute2 + iproute2 \ + lsof RUN rm -rf \ /var/lib/apt/lists/* \ /var/cache/debconf \ diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index c53ea60ec7c..9ff37a7045d 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -391,6 +391,9 @@ public: virtual void multi( const Requests & requests, MultiCallback callback) = 0; + + /// Expire session and finish all pending requests + virtual void finalize() = 0; }; } diff --git a/src/Common/ZooKeeper/TestKeeper.h b/src/Common/ZooKeeper/TestKeeper.h index ca9f584304f..a64b76336d2 100644 --- a/src/Common/ZooKeeper/TestKeeper.h +++ b/src/Common/ZooKeeper/TestKeeper.h @@ -83,6 +83,7 @@ public: const Requests & requests, MultiCallback callback) override; + void finalize() override; struct Node { @@ -130,7 +131,6 @@ private: void pushRequest(RequestInfo && request); - void finalize(); ThreadFromGlobalPool processing_thread; diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index a1c6eb9b481..599980a867a 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -44,7 +44,7 @@ static void check(Coordination::Error code, const std::string & path) } -void ZooKeeper::init(const std::string & implementation_, const std::string & hosts_, const std::string & identity_, +void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_) { log = &Poco::Logger::get("ZooKeeper"); @@ -60,13 +60,16 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho if (hosts.empty()) throw KeeperException("No hosts passed to ZooKeeper constructor.", Coordination::Error::ZBADARGUMENTS); - std::vector hosts_strings; - splitInto<','>(hosts_strings, hosts); Coordination::ZooKeeper::Nodes nodes; - nodes.reserve(hosts_strings.size()); + nodes.reserve(hosts.size()); + + Strings shuffled_hosts = hosts; + /// Shuffle the hosts to distribute the load among ZooKeeper nodes. + pcg64 generator(randomSeed()); + std::shuffle(shuffled_hosts.begin(), shuffled_hosts.end(), generator); bool dns_error = false; - for (auto & host_string : hosts_strings) + for (auto & host_string : shuffled_hosts) { try { @@ -109,9 +112,9 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho Poco::Timespan(0, operation_timeout_ms_ * 1000)); if (chroot.empty()) - LOG_TRACE(log, "Initialized, hosts: {}", hosts); + LOG_TRACE(log, "Initialized, hosts: {}", fmt::join(hosts, ",")); else - LOG_TRACE(log, "Initialized, hosts: {}, chroot: {}", hosts, chroot); + LOG_TRACE(log, "Initialized, hosts: {}, chroot: {}", fmt::join(hosts, ","), chroot); } else if (implementation == "testkeeper") { @@ -128,7 +131,16 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho throw KeeperException("Zookeeper root doesn't exist. You should create root node " + chroot + " before start.", Coordination::Error::ZNONODE); } -ZooKeeper::ZooKeeper(const std::string & hosts_, const std::string & identity_, int32_t session_timeout_ms_, +ZooKeeper::ZooKeeper(const std::string & hosts_string, const std::string & identity_, int32_t session_timeout_ms_, + int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_) +{ + Strings hosts_; + splitInto<','>(hosts_, hosts_string); + + init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_); +} + +ZooKeeper::ZooKeeper(const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_) { init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_); @@ -141,8 +153,6 @@ struct ZooKeeperArgs Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_name, keys); - std::vector hosts_strings; - session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS; operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS; implementation = "zookeeper"; @@ -150,7 +160,7 @@ struct ZooKeeperArgs { if (startsWith(key, "node")) { - hosts_strings.push_back( + hosts.push_back( (config.getBool(config_name + "." + key + ".secure", false) ? "secure://" : "") + config.getString(config_name + "." + key + ".host") + ":" + config.getString(config_name + "." + key + ".port", "2181") @@ -180,17 +190,6 @@ struct ZooKeeperArgs throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS); } - /// Shuffle the hosts to distribute the load among ZooKeeper nodes. - pcg64 generator(randomSeed()); - std::shuffle(hosts_strings.begin(), hosts_strings.end(), generator); - - for (auto & host : hosts_strings) - { - if (!hosts.empty()) - hosts += ','; - hosts += host; - } - if (!chroot.empty()) { if (chroot.front() != '/') @@ -200,7 +199,7 @@ struct ZooKeeperArgs } } - std::string hosts; + Strings hosts; std::string identity; int session_timeout_ms; int operation_timeout_ms; @@ -922,6 +921,10 @@ Coordination::Error ZooKeeper::tryMultiNoThrow(const Coordination::Requests & re } } +void ZooKeeper::finalize() +{ + impl->finalize(); +} size_t KeeperMultiException::getFailedOpIndex(Coordination::Error exception_code, const Coordination::Responses & responses) { @@ -1000,4 +1003,5 @@ Coordination::RequestPtr makeCheckRequest(const std::string & path, int version) request->version = version; return request; } + } diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 5b37e4d6024..4a65ff070f7 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -50,7 +50,14 @@ class ZooKeeper public: using Ptr = std::shared_ptr; - ZooKeeper(const std::string & hosts_, const std::string & identity_ = "", + /// hosts_string -- comma separated [secure://]host:port list + ZooKeeper(const std::string & hosts_string, const std::string & identity_ = "", + int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS, + int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS, + const std::string & chroot_ = "", + const std::string & implementation_ = "zookeeper"); + + ZooKeeper(const Strings & hosts_, const std::string & identity_ = "", int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS, int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS, const std::string & chroot_ = "", @@ -247,10 +254,12 @@ public: /// Like the previous one but don't throw any exceptions on future.get() FutureMulti tryAsyncMulti(const Coordination::Requests & ops); + void finalize(); + private: friend class EphemeralNodeHolder; - void init(const std::string & implementation_, const std::string & hosts_, const std::string & identity_, + void init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_); /// The following methods don't throw exceptions but return error codes. @@ -266,7 +275,7 @@ private: std::unique_ptr impl; - std::string hosts; + Strings hosts; std::string identity; int32_t session_timeout_ms; int32_t operation_timeout_ms; diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index 71b7cd56149..d8b9e23ced2 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -167,6 +167,8 @@ public: const Requests & requests, MultiCallback callback) override; + void finalize() override { finalize(false, false); } + private: String root_path; ACLs default_acls; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 98e4a87fba3..6f27a6bdb7d 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1661,7 +1661,12 @@ void Context::resetZooKeeper() const static void reloadZooKeeperIfChangedImpl(const ConfigurationPtr & config, const std::string & config_name, zkutil::ZooKeeperPtr & zk) { if (!zk || zk->configChanged(*config, config_name)) + { + if (zk) + zk->finalize(); + zk = std::make_shared(*config, config_name); + } } void Context::reloadZooKeeperIfChanged(const ConfigurationPtr & config) const diff --git a/tests/integration/test_reload_zookeeper/test.py b/tests/integration/test_reload_zookeeper/test.py index 82c47f4ec9e..1fe0ab13a7f 100644 --- a/tests/integration/test_reload_zookeeper/test.py +++ b/tests/integration/test_reload_zookeeper/test.py @@ -74,6 +74,9 @@ def test_reload_zookeeper(start_cluster): with pytest.raises(QueryRuntimeException): node.query("SELECT COUNT() FROM test_table", settings={"select_sequential_consistency" : 1}) + def get_active_zk_connections(): + return str(node.exec_in_container(['bash', '-c', 'lsof -a -i4 -i6 -itcp -w | grep 2181 | grep ESTABLISHED | wc -l'], privileged=True, user='root')).strip() + ## set config to zoo2, server will be normal new_config = """ @@ -89,5 +92,10 @@ def test_reload_zookeeper(start_cluster): node.replace_config("/etc/clickhouse-server/conf.d/zookeeper.xml", new_config) node.query("SYSTEM RELOAD CONFIG") + active_zk_connections = get_active_zk_connections() + assert active_zk_connections == '1', "Total connections to ZooKeeper not equal to 1, {}".format(active_zk_connections) + assert_eq_with_retry(node, "SELECT COUNT() FROM test_table", '1000', retry_count=120, sleep_time=0.5) + active_zk_connections = get_active_zk_connections() + assert active_zk_connections == '1', "Total connections to ZooKeeper not equal to 1, {}".format(active_zk_connections) From bc1a316b28c53b348e91b807fd0dda7abdcc3cdd Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 25 Feb 2021 09:43:29 +0300 Subject: [PATCH 119/149] Bump zookeeper version to 3.6.2 in tests 3.4.12 was released 1.05.2018 --- .../runner/compose/docker_compose_zookeeper.yml | 12 ++++++------ tests/integration/test_zookeeper_config/test.py | 2 +- .../docker-compose/zookeeper-service.yml | 2 +- .../example/docker-compose/zookeeper-service.yml | 2 +- .../docker-compose/zookeeper-service.yml | 2 +- .../docker-compose/zookeeper-service.yml | 2 +- .../docker-compose/zookeeper-service.yml | 2 +- .../rbac/docker-compose/zookeeper-service.yml | 2 +- 8 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docker/test/integration/runner/compose/docker_compose_zookeeper.yml b/docker/test/integration/runner/compose/docker_compose_zookeeper.yml index 49e285b5515..1601d217a25 100644 --- a/docker/test/integration/runner/compose/docker_compose_zookeeper.yml +++ b/docker/test/integration/runner/compose/docker_compose_zookeeper.yml @@ -1,11 +1,11 @@ version: '2.3' services: zoo1: - image: zookeeper:3.4.12 + image: zookeeper:3.6.2 restart: always environment: ZOO_TICK_TIME: 500 - ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888 + ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181 ZOO_MY_ID: 1 JVMFLAGS: -Dzookeeper.forceSync=no volumes: @@ -16,11 +16,11 @@ services: source: ${ZK_DATA_LOG1:-} target: /datalog zoo2: - image: zookeeper:3.4.12 + image: zookeeper:3.6.2 restart: always environment: ZOO_TICK_TIME: 500 - ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888 + ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888 ZOO_MY_ID: 2 JVMFLAGS: -Dzookeeper.forceSync=no volumes: @@ -31,11 +31,11 @@ services: source: ${ZK_DATA_LOG2:-} target: /datalog zoo3: - image: zookeeper:3.4.12 + image: zookeeper:3.6.2 restart: always environment: ZOO_TICK_TIME: 500 - ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888 + ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181 ZOO_MY_ID: 3 JVMFLAGS: -Dzookeeper.forceSync=no volumes: diff --git a/tests/integration/test_zookeeper_config/test.py b/tests/integration/test_zookeeper_config/test.py index eb5ab2da98f..80875da45e0 100644 --- a/tests/integration/test_zookeeper_config/test.py +++ b/tests/integration/test_zookeeper_config/test.py @@ -129,7 +129,7 @@ def test_secure_connection(): # We need absolute path in zookeeper volumes. Generate it dynamically. TEMPLATE = ''' zoo{zoo_id}: - image: zookeeper:3.5.6 + image: zookeeper:3.6.2 restart: always environment: ZOO_TICK_TIME: 500 diff --git a/tests/testflows/aes_encryption/docker-compose/zookeeper-service.yml b/tests/testflows/aes_encryption/docker-compose/zookeeper-service.yml index f3df33358be..f27405b97a2 100644 --- a/tests/testflows/aes_encryption/docker-compose/zookeeper-service.yml +++ b/tests/testflows/aes_encryption/docker-compose/zookeeper-service.yml @@ -2,7 +2,7 @@ version: '2.3' services: zookeeper: - image: zookeeper:3.4.12 + image: zookeeper:3.6.2 expose: - "2181" environment: diff --git a/tests/testflows/example/docker-compose/zookeeper-service.yml b/tests/testflows/example/docker-compose/zookeeper-service.yml index 6691a2df31c..ca732a48dbd 100644 --- a/tests/testflows/example/docker-compose/zookeeper-service.yml +++ b/tests/testflows/example/docker-compose/zookeeper-service.yml @@ -2,7 +2,7 @@ version: '2.3' services: zookeeper: - image: zookeeper:3.4.12 + image: zookeeper:3.6.2 expose: - "2181" environment: diff --git a/tests/testflows/ldap/authentication/docker-compose/zookeeper-service.yml b/tests/testflows/ldap/authentication/docker-compose/zookeeper-service.yml index 6691a2df31c..ca732a48dbd 100644 --- a/tests/testflows/ldap/authentication/docker-compose/zookeeper-service.yml +++ b/tests/testflows/ldap/authentication/docker-compose/zookeeper-service.yml @@ -2,7 +2,7 @@ version: '2.3' services: zookeeper: - image: zookeeper:3.4.12 + image: zookeeper:3.6.2 expose: - "2181" environment: diff --git a/tests/testflows/ldap/external_user_directory/docker-compose/zookeeper-service.yml b/tests/testflows/ldap/external_user_directory/docker-compose/zookeeper-service.yml index 6691a2df31c..ca732a48dbd 100644 --- a/tests/testflows/ldap/external_user_directory/docker-compose/zookeeper-service.yml +++ b/tests/testflows/ldap/external_user_directory/docker-compose/zookeeper-service.yml @@ -2,7 +2,7 @@ version: '2.3' services: zookeeper: - image: zookeeper:3.4.12 + image: zookeeper:3.6.2 expose: - "2181" environment: diff --git a/tests/testflows/ldap/role_mapping/docker-compose/zookeeper-service.yml b/tests/testflows/ldap/role_mapping/docker-compose/zookeeper-service.yml index 6691a2df31c..ca732a48dbd 100644 --- a/tests/testflows/ldap/role_mapping/docker-compose/zookeeper-service.yml +++ b/tests/testflows/ldap/role_mapping/docker-compose/zookeeper-service.yml @@ -2,7 +2,7 @@ version: '2.3' services: zookeeper: - image: zookeeper:3.4.12 + image: zookeeper:3.6.2 expose: - "2181" environment: diff --git a/tests/testflows/rbac/docker-compose/zookeeper-service.yml b/tests/testflows/rbac/docker-compose/zookeeper-service.yml index f3df33358be..f27405b97a2 100755 --- a/tests/testflows/rbac/docker-compose/zookeeper-service.yml +++ b/tests/testflows/rbac/docker-compose/zookeeper-service.yml @@ -2,7 +2,7 @@ version: '2.3' services: zookeeper: - image: zookeeper:3.4.12 + image: zookeeper:3.6.2 expose: - "2181" environment: From df95411d515500f3a15ba70f237febfe4aa2eb2f Mon Sep 17 00:00:00 2001 From: Sina <30809018+Sin4wd@users.noreply.github.com> Date: Sat, 27 Feb 2021 01:04:35 +0330 Subject: [PATCH 120/149] Fix broken links in "See Also" The link was transformed into "https://clickhouse.tech/docs/en/sql-reference/sql-reference/statements/create/table#ttl-expression" which did not exist and got 404. --- docs/en/sql-reference/statements/alter/ttl.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/ttl.md b/docs/en/sql-reference/statements/alter/ttl.md index 5331afdb2f8..e8bfb78ec68 100644 --- a/docs/en/sql-reference/statements/alter/ttl.md +++ b/docs/en/sql-reference/statements/alter/ttl.md @@ -81,5 +81,5 @@ The `TTL` is no longer there, so the second row is not deleted: ### See Also -- More about the [TTL-expression](../../../sql-reference/statements/create/table#ttl-expression). -- Modify column [with TTL](../../../sql-reference/statements/alter/column#alter_modify-column). +- More about the [TTL-expression](../../../../sql-reference/statements/create/table#ttl-expression). +- Modify column [with TTL](../../../../sql-reference/statements/alter/column#alter_modify-column). From 9e93d7f507a61d747758e349fccc2b6b1057e528 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 27 Feb 2021 11:07:14 +0300 Subject: [PATCH 121/149] Fix tidy and add comments --- src/Common/ZooKeeper/TestKeeper.h | 2 +- src/Common/ZooKeeper/ZooKeeper.cpp | 6 +++--- src/Common/ZooKeeper/ZooKeeperImpl.h | 14 +++++++++++++- src/Storages/StorageReplicatedMergeTree.cpp | 6 ++++++ 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/Common/ZooKeeper/TestKeeper.h b/src/Common/ZooKeeper/TestKeeper.h index a64b76336d2..b46f98c0074 100644 --- a/src/Common/ZooKeeper/TestKeeper.h +++ b/src/Common/ZooKeeper/TestKeeper.h @@ -30,7 +30,7 @@ using TestKeeperRequestPtr = std::shared_ptr; * * NOTE: You can add various failure modes for better testing. */ -class TestKeeper : public IKeeper +class TestKeeper final : public IKeeper { public: TestKeeper(const String & root_path_, Poco::Timespan operation_timeout_); diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 599980a867a..330985e1599 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -134,10 +134,10 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ ZooKeeper::ZooKeeper(const std::string & hosts_string, const std::string & identity_, int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_) { - Strings hosts_; - splitInto<','>(hosts_, hosts_string); + Strings hosts_strings; + splitInto<','>(hosts_strings, hosts_string); - init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_); + init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_); } ZooKeeper::ZooKeeper(const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_, diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index d8b9e23ced2..26d944814d8 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -88,7 +88,7 @@ using namespace DB; /** Usage scenario: look at the documentation for IKeeper class. */ -class ZooKeeper : public IKeeper +class ZooKeeper final : public IKeeper { public: struct Node @@ -167,6 +167,18 @@ public: const Requests & requests, MultiCallback callback) override; + /// Without forcefully invalidating (finalizing) ZooKeeper session before + /// establishing a new one, there was a possibility that server is using + /// two ZooKeeper sessions simultaneously in different parts of code. + /// This is stong antipattern and we always prevented it. + + /// ZooKeeper is linearizeable for writes, but not linearizeable for + /// reads, it only maintains "sequential consistency": in every session + /// you observe all events in order but possibly with some delay. If you + /// perform write in one session, then notify different part of code and + /// it will do read in another session, that read may not see the + /// already performed write. + void finalize() override { finalize(false, false); } private: diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index f2c88cdedd9..94bf2dfcf6d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -144,6 +144,12 @@ static const auto MUTATIONS_FINALIZING_IDLE_SLEEP_MS = 5 * 1000; void StorageReplicatedMergeTree::setZooKeeper() { + /// Every ReplicatedMergeTree table is using only one ZooKeeper session. + /// But if several ReplicatedMergeTree tables are using different + /// ZooKeeper sessions, some queries like ATTACH PARTITION FROM may have + /// strange effects. So we always use only one session for all tables. + /// (excluding auxiliary zookeepers) + std::lock_guard lock(current_zookeeper_mutex); if (zookeeper_name == default_zookeeper_name) { From 63b95c7451c70f35d3692b88340d87c4a0e4649d Mon Sep 17 00:00:00 2001 From: Alexander Kazakov Date: Sat, 27 Feb 2021 11:18:28 +0300 Subject: [PATCH 122/149] MySQL dictionary source: A mechanism for retrying unexpected connection loss during communication with mysql server (#21237) * Added a new type of Exception for better recognition of connection failures * Added more logging for mysql communication * Retries on connection loss during query. Make MySQL dictionary source resilient to unexpected loss of connection during querying MySQL. This behavior is controlled with ".fail_on_connection_loss" config parameter, which defaults to false. * Stripped some debugging leftover garbage * Minor followup corrections * Corrections after PR comments * Yet more fixes --- base/mysqlxx/Exception.h | 9 ++++ base/mysqlxx/Pool.cpp | 51 +++++++++++++--------- base/mysqlxx/Pool.h | 10 +++-- base/mysqlxx/Query.cpp | 25 ++++++++++- src/Dictionaries/MySQLDictionarySource.cpp | 44 ++++++++++++++++--- src/Dictionaries/MySQLDictionarySource.h | 4 ++ 6 files changed, 113 insertions(+), 30 deletions(-) diff --git a/base/mysqlxx/Exception.h b/base/mysqlxx/Exception.h index eaeb3565af1..48cd0997b94 100644 --- a/base/mysqlxx/Exception.h +++ b/base/mysqlxx/Exception.h @@ -26,6 +26,15 @@ struct ConnectionFailed : public Exception }; +/// Connection to MySQL server was lost +struct ConnectionLost : public Exception +{ + ConnectionLost(const std::string & msg, int code = 0) : Exception(msg, code) {} + const char * name() const throw() override { return "mysqlxx::ConnectionLost"; } + const char * className() const throw() override { return "mysqlxx::ConnectionLost"; } +}; + + /// Erroneous query. struct BadQuery : public Exception { diff --git a/base/mysqlxx/Pool.cpp b/base/mysqlxx/Pool.cpp index b524087836d..cf8b3cf9267 100644 --- a/base/mysqlxx/Pool.cpp +++ b/base/mysqlxx/Pool.cpp @@ -10,7 +10,6 @@ #include -#include #include @@ -41,7 +40,9 @@ void Pool::Entry::decrementRefCount() Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & config_name, unsigned default_connections_, unsigned max_connections_, const char * parent_config_name_) - : default_connections(default_connections_), max_connections(max_connections_) + : logger(Poco::Logger::get("mysqlxx::Pool")) + , default_connections(default_connections_) + , max_connections(max_connections_) { server = cfg.getString(config_name + ".host"); @@ -130,20 +131,30 @@ Pool::Entry Pool::get() initialize(); for (;;) { + logger.trace("(%s): Iterating through existing MySQL connections", getDescription()); + for (auto & connection : connections) { if (connection->ref_count == 0) return Entry(connection, this); } + logger.trace("(%s): Trying to allocate a new connection.", getDescription()); if (connections.size() < static_cast(max_connections)) { Connection * conn = allocConnection(); if (conn) return Entry(conn, this); + + logger.trace("(%s): Unable to create a new connection: Allocation failed.", getDescription()); + } + else + { + logger.trace("(%s): Unable to create a new connection: Max number of connections has been reached.", getDescription()); } lock.unlock(); + logger.trace("(%s): Sleeping for %d seconds.", getDescription(), MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL); sleepForSeconds(MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL); lock.lock(); } @@ -167,8 +178,7 @@ Pool::Entry Pool::tryGet() if (res.tryForceConnected()) /// Tries to reestablish connection as well return res; - auto & logger = Poco::Util::Application::instance().logger(); - logger.information("Idle connection to mysql server cannot be recovered, dropping it."); + logger.debug("(%s): Idle connection to MySQL server cannot be recovered, dropping it.", getDescription()); /// This one is disconnected, cannot be reestablished and so needs to be disposed of. connection_it = connections.erase(connection_it); @@ -191,6 +201,8 @@ Pool::Entry Pool::tryGet() void Pool::removeConnection(Connection* connection) { + logger.trace("(%s): Removing connection.", getDescription()); + std::lock_guard lock(mutex); if (connection) { @@ -215,8 +227,6 @@ void Pool::Entry::forceConnected() const if (data == nullptr) throw Poco::RuntimeException("Tried to access NULL database connection."); - Poco::Util::Application & app = Poco::Util::Application::instance(); - bool first = true; while (!tryForceConnected()) { @@ -225,7 +235,7 @@ void Pool::Entry::forceConnected() const else sleepForSeconds(MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL); - app.logger().information("MYSQL: Reconnecting to " + pool->description); + pool->logger.debug("Entry: Reconnecting to MySQL server %s", pool->description); data->conn.connect( pool->db.c_str(), pool->server.c_str(), @@ -248,18 +258,22 @@ bool Pool::Entry::tryForceConnected() const { auto * const mysql_driver = data->conn.getDriver(); const auto prev_connection_id = mysql_thread_id(mysql_driver); + + pool->logger.trace("Entry(connection %lu): sending PING to check if it is alive.", prev_connection_id); if (data->conn.ping()) /// Attempts to reestablish lost connection { const auto current_connection_id = mysql_thread_id(mysql_driver); if (prev_connection_id != current_connection_id) { - auto & logger = Poco::Util::Application::instance().logger(); - logger.information("Reconnected to mysql server. Connection id changed: %lu -> %lu", - prev_connection_id, current_connection_id); + pool->logger.debug("Entry(connection %lu): Reconnected to MySQL server. Connection id changed: %lu -> %lu", + current_connection_id, prev_connection_id, current_connection_id); } + + pool->logger.trace("Entry(connection %lu): PING ok.", current_connection_id); return true; } + pool->logger.trace("Entry(connection %lu): PING failed.", prev_connection_id); return false; } @@ -280,15 +294,13 @@ void Pool::initialize() Pool::Connection * Pool::allocConnection(bool dont_throw_if_failed_first_time) { - Poco::Util::Application & app = Poco::Util::Application::instance(); - - std::unique_ptr conn(new Connection); + std::unique_ptr conn_ptr{new Connection}; try { - app.logger().information("MYSQL: Connecting to " + description); + logger.debug("Connecting to %s", description); - conn->conn.connect( + conn_ptr->conn.connect( db.c_str(), server.c_str(), user.c_str(), @@ -305,25 +317,24 @@ Pool::Connection * Pool::allocConnection(bool dont_throw_if_failed_first_time) } catch (mysqlxx::ConnectionFailed & e) { + logger.error(e.what()); + if ((!was_successful && !dont_throw_if_failed_first_time) || e.errnum() == ER_ACCESS_DENIED_ERROR || e.errnum() == ER_DBACCESS_DENIED_ERROR || e.errnum() == ER_BAD_DB_ERROR) { - app.logger().error(e.what()); throw; } else { - app.logger().error(e.what()); return nullptr; } } + connections.push_back(conn_ptr.get()); was_successful = true; - auto * connection = conn.release(); - connections.push_back(connection); - return connection; + return conn_ptr.release(); } } diff --git a/base/mysqlxx/Pool.h b/base/mysqlxx/Pool.h index 0f64e58054c..b6189663f55 100644 --- a/base/mysqlxx/Pool.h +++ b/base/mysqlxx/Pool.h @@ -6,6 +6,8 @@ #include #include +#include + #include @@ -167,13 +169,13 @@ public: unsigned max_connections_ = MYSQLXX_POOL_DEFAULT_MAX_CONNECTIONS, unsigned enable_local_infile_ = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE, bool opt_reconnect_ = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT) - : default_connections(default_connections_), max_connections(max_connections_), - db(db_), server(server_), user(user_), password(password_), port(port_), socket(socket_), + : logger(Poco::Logger::get("mysqlxx::Pool")), default_connections(default_connections_), + max_connections(max_connections_), db(db_), server(server_), user(user_), password(password_), port(port_), socket(socket_), connect_timeout(connect_timeout_), rw_timeout(rw_timeout_), enable_local_infile(enable_local_infile_), opt_reconnect(opt_reconnect_) {} Pool(const Pool & other) - : default_connections{other.default_connections}, + : logger(other.logger), default_connections{other.default_connections}, max_connections{other.max_connections}, db{other.db}, server{other.server}, user{other.user}, password{other.password}, @@ -203,6 +205,8 @@ public: void removeConnection(Connection * connection); protected: + Poco::Logger & logger; + /// Number of MySQL connections which are created at launch. unsigned default_connections; /// Maximum possible number of connections diff --git a/base/mysqlxx/Query.cpp b/base/mysqlxx/Query.cpp index f3485c54edc..d4514c3e988 100644 --- a/base/mysqlxx/Query.cpp +++ b/base/mysqlxx/Query.cpp @@ -1,11 +1,16 @@ #if __has_include() +#include #include #else +#include #include #endif +#include + #include #include +#include namespace mysqlxx @@ -57,8 +62,24 @@ void Query::reset() void Query::executeImpl() { std::string query_string = query_buf.str(); - if (mysql_real_query(conn->getDriver(), query_string.data(), query_string.size())) - throw BadQuery(errorMessage(conn->getDriver()), mysql_errno(conn->getDriver())); + + MYSQL* mysql_driver = conn->getDriver(); + + auto & logger = Poco::Logger::get("mysqlxx::Query"); + logger.trace("Running MySQL query using connection %lu", mysql_thread_id(mysql_driver)); + if (mysql_real_query(mysql_driver, query_string.data(), query_string.size())) + { + const auto err_no = mysql_errno(mysql_driver); + switch (err_no) + { + case CR_SERVER_GONE_ERROR: + [[fallthrough]]; + case CR_SERVER_LOST: + throw ConnectionLost(errorMessage(mysql_driver), err_no); + default: + throw BadQuery(errorMessage(mysql_driver), err_no); + } + } } UseQueryResult Query::use() diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index a21b1bd50fc..572080aee1e 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -47,11 +47,13 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory) # include # include # include "readInvalidateQuery.h" +# include # include namespace DB { static const UInt64 max_block_size = 8192; +static const size_t default_num_tries_on_connection_loss = 3; MySQLDictionarySource::MySQLDictionarySource( @@ -72,7 +74,10 @@ MySQLDictionarySource::MySQLDictionarySource( , query_builder{dict_struct, db, "", table, where, IdentifierQuotingStyle::Backticks} , load_all_query{query_builder.composeLoadAllQuery()} , invalidate_query{config.getString(config_prefix + ".invalidate_query", "")} - , close_connection{config.getBool(config_prefix + ".close_connection", false) || config.getBool(config_prefix + ".share_connection", false)} + , close_connection( + config.getBool(config_prefix + ".close_connection", false) || config.getBool(config_prefix + ".share_connection", false)) + , max_tries_for_mysql_block_input_stream( + config.getBool(config_prefix + ".fail_on_connection_loss", false) ? 1 : default_num_tries_on_connection_loss) { } @@ -94,6 +99,7 @@ MySQLDictionarySource::MySQLDictionarySource(const MySQLDictionarySource & other , invalidate_query{other.invalidate_query} , invalidate_query_response{other.invalidate_query_response} , close_connection{other.close_connection} + , max_tries_for_mysql_block_input_stream{other.max_tries_for_mysql_block_input_stream} { } @@ -114,13 +120,41 @@ std::string MySQLDictionarySource::getUpdateFieldAndDate() } } +BlockInputStreamPtr MySQLDictionarySource::retriedCreateMySqlBIStream(const std::string & data_fetch_query_str, const size_t max_tries) +{ + size_t count_connection_lost = 0; + + while (true) + { + auto connection = pool.get(); + + try + { + return std::make_shared( + connection, data_fetch_query_str, sample_block, max_block_size, close_connection); + } + catch (const mysqlxx::ConnectionLost & ecl) /// There are two retriable failures: CR_SERVER_GONE_ERROR, CR_SERVER_LOST + { + if (++count_connection_lost < max_tries) + { + LOG_WARNING(log, ecl.displayText()); + LOG_WARNING(log, "Lost connection ({}/{}). Trying to reconnect...", count_connection_lost, max_tries); + continue; + } + + LOG_ERROR(log, "Failed ({}/{}) to create BlockInputStream for MySQL dictionary source.", count_connection_lost, max_tries); + throw; + } + } +} + BlockInputStreamPtr MySQLDictionarySource::loadAll() { auto connection = pool.get(); last_modification = getLastModification(connection, false); LOG_TRACE(log, load_all_query); - return std::make_shared(connection, load_all_query, sample_block, max_block_size, close_connection); + return retriedCreateMySqlBIStream(load_all_query, max_tries_for_mysql_block_input_stream); } BlockInputStreamPtr MySQLDictionarySource::loadUpdatedAll() @@ -130,7 +164,7 @@ BlockInputStreamPtr MySQLDictionarySource::loadUpdatedAll() std::string load_update_query = getUpdateFieldAndDate(); LOG_TRACE(log, load_update_query); - return std::make_shared(connection, load_update_query, sample_block, max_block_size, close_connection); + return retriedCreateMySqlBIStream(load_update_query, max_tries_for_mysql_block_input_stream); } BlockInputStreamPtr MySQLDictionarySource::loadIds(const std::vector & ids) @@ -138,7 +172,7 @@ BlockInputStreamPtr MySQLDictionarySource::loadIds(const std::vector & i /// We do not log in here and do not update the modification time, as the request can be large, and often called. const auto query = query_builder.composeLoadIdsQuery(ids); - return std::make_shared(pool.get(), query, sample_block, max_block_size, close_connection); + return retriedCreateMySqlBIStream(query, max_tries_for_mysql_block_input_stream); } BlockInputStreamPtr MySQLDictionarySource::loadKeys(const Columns & key_columns, const std::vector & requested_rows) @@ -146,7 +180,7 @@ BlockInputStreamPtr MySQLDictionarySource::loadKeys(const Columns & key_columns, /// We do not log in here and do not update the modification time, as the request can be large, and often called. const auto query = query_builder.composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::AND_OR_CHAIN); - return std::make_shared(pool.get(), query, sample_block, max_block_size, close_connection); + return retriedCreateMySqlBIStream(query, max_tries_for_mysql_block_input_stream); } bool MySQLDictionarySource::isModified() const diff --git a/src/Dictionaries/MySQLDictionarySource.h b/src/Dictionaries/MySQLDictionarySource.h index 34f784cdfeb..54bc2da6a56 100644 --- a/src/Dictionaries/MySQLDictionarySource.h +++ b/src/Dictionaries/MySQLDictionarySource.h @@ -69,6 +69,9 @@ private: // execute invalidate_query. expects single cell in result std::string doInvalidateQuery(const std::string & request) const; + /// A helper method for recovering from "Lost connection to MySQL server during query" errors + BlockInputStreamPtr retriedCreateMySqlBIStream(const std::string & query_str, const size_t max_tries); + Poco::Logger * log; std::chrono::time_point update_time; @@ -86,6 +89,7 @@ private: std::string invalidate_query; mutable std::string invalidate_query_response; const bool close_connection; + const size_t max_tries_for_mysql_block_input_stream; }; } From 9dc3952ff35f17a5dddb9e50b02da615949a87c5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 27 Feb 2021 16:30:43 +0300 Subject: [PATCH 123/149] Fix typo --- src/Common/ZooKeeper/ZooKeeperImpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index 26d944814d8..afd2e89538f 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -170,7 +170,7 @@ public: /// Without forcefully invalidating (finalizing) ZooKeeper session before /// establishing a new one, there was a possibility that server is using /// two ZooKeeper sessions simultaneously in different parts of code. - /// This is stong antipattern and we always prevented it. + /// This is strong antipattern and we always prevented it. /// ZooKeeper is linearizeable for writes, but not linearizeable for /// reads, it only maintains "sequential consistency": in every session From 4f61ef3099f42f17b496a0b0424773978d9a32dc Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 27 Feb 2021 08:49:45 +0000 Subject: [PATCH 124/149] Tests for zstd and zlib --- ...lib_http_compression_json_format.reference | 23 +++++++++++++++++++ ..._long_zlib_http_compression_json_format.sh | 7 ++++++ ...std_http_compression_json_format.reference | 23 +++++++++++++++++++ ..._long_zstd_http_compression_json_format.sh | 7 ++++++ 4 files changed, 60 insertions(+) create mode 100644 tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.reference create mode 100755 tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh create mode 100644 tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.reference create mode 100755 tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.sh diff --git a/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.reference b/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.reference new file mode 100644 index 00000000000..7c089a2fd05 --- /dev/null +++ b/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.reference @@ -0,0 +1,23 @@ + }, + { + "datetime": "2020-12-12", + "pipeline": "test-pipeline", + "host": "clickhouse-test-host-001.clickhouse.com", + "home": "clickhouse", + "detail": "clickhouse", + "row_number": "999998" + }, + { + "datetime": "2020-12-12", + "pipeline": "test-pipeline", + "host": "clickhouse-test-host-001.clickhouse.com", + "home": "clickhouse", + "detail": "clickhouse", + "row_number": "999999" + } + ], + + "rows": 1000000, + + "rows_before_limit_at_least": 1048080, + diff --git a/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh b/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh new file mode 100755 index 00000000000..e663b329660 --- /dev/null +++ b/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: gzip' "${CLICKHOUSE_URL}&enable_http_compression=1&http_zlib_compression_level=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) FORMAT JSON" | gzip -d | tail -n30 | head -n23 diff --git a/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.reference b/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.reference new file mode 100644 index 00000000000..7c089a2fd05 --- /dev/null +++ b/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.reference @@ -0,0 +1,23 @@ + }, + { + "datetime": "2020-12-12", + "pipeline": "test-pipeline", + "host": "clickhouse-test-host-001.clickhouse.com", + "home": "clickhouse", + "detail": "clickhouse", + "row_number": "999998" + }, + { + "datetime": "2020-12-12", + "pipeline": "test-pipeline", + "host": "clickhouse-test-host-001.clickhouse.com", + "home": "clickhouse", + "detail": "clickhouse", + "row_number": "999999" + } + ], + + "rows": 1000000, + + "rows_before_limit_at_least": 1048080, + diff --git a/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.sh b/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.sh new file mode 100755 index 00000000000..f520a21e9b3 --- /dev/null +++ b/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: zstd' "${CLICKHOUSE_URL}&enable_http_compression=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) FORMAT JSON" | zstd -d | tail -n30 | head -n23 From 2cafd54c17c75450bc3df9bc909e533621fe986e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Feb 2021 17:42:31 +0300 Subject: [PATCH 125/149] Add new dataset: Cell Towers --- .../example-datasets/cell-towers.md | 133 ++++++++++++++++++ .../getting-started/example-datasets/index.md | 1 + 2 files changed, 134 insertions(+) create mode 100644 docs/en/getting-started/example-datasets/cell-towers.md diff --git a/docs/en/getting-started/example-datasets/cell-towers.md b/docs/en/getting-started/example-datasets/cell-towers.md new file mode 100644 index 00000000000..f032be76da0 --- /dev/null +++ b/docs/en/getting-started/example-datasets/cell-towers.md @@ -0,0 +1,133 @@ +--- +toc_priority: 21 +toc_title: Cell Stations +--- + +# Cell Stations + +This dataset is from [OpenCellid](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers. + +As of 2021 it contains more than 40 million records about cell towers (GSM, LTE, UMTS, etc.) around the world with their geographical coordinates and metadata (country code, network, etc). + +OpenCelliD Project is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License, and we redistribute a snapshot of this dataset under the terms of the same license. The up to date version of the dataset is available to download after sign in. + + +## Get the Dataset + +Download the snapshot of the dataset from Feb 2021: [https://datasets.clickhouse.tech/cell_towers.csv.xz] (729 MB). + +Optionally validate the integrity: +``` +md5sum cell_towers.csv.xz +8cf986f4a0d9f12c6f384a0e9192c908 cell_towers.csv.xz +``` + +Decompress it with the following command: +``` +xz -d cell_towers.csv.xz +``` + +Create a table: + +``` +CREATE TABLE cell_towers +( + radio Enum8('' = 0, 'CDMA' = 1, 'GSM' = 2, 'LTE' = 3, 'NR' = 4, 'UMTS' = 5), + mcc UInt16, + net UInt16, + area UInt16, + cell UInt64, + unit Int16, + lon Float64, + lat Float64, + range UInt32, + samples UInt32, + changeable UInt8, + created DateTime, + updated DateTime, + averageSignal UInt8 +) +ENGINE = MergeTree ORDER BY (radio, mcc, net, created); +``` + +Insert the dataset: +``` +clickhouse-client --query "INSERT INTO cell_towers FORMAT CSVWithNames" < cell_towers.csv +``` + + +## Run some queries + +Number of cell towers by type: +``` +SELECT radio, count() AS c FROM cell_towers GROUP BY radio ORDER BY c DESC + +┌─radio─┬────────c─┐ +│ UMTS │ 20686487 │ +│ LTE │ 12101148 │ +│ GSM │ 9931312 │ +│ CDMA │ 556344 │ +│ NR │ 867 │ +└───────┴──────────┘ + +5 rows in set. Elapsed: 0.011 sec. Processed 43.28 million rows, 43.28 MB (3.83 billion rows/s., 3.83 GB/s.) +``` + +Cell towers by mobile country code (MCC): +``` +SELECT mcc, count() FROM cell_towers GROUP BY mcc ORDER BY count() DESC LIMIT 10 + +┌─mcc─┬─count()─┐ +│ 310 │ 5024650 │ +│ 262 │ 2622423 │ +│ 250 │ 1953176 │ +│ 208 │ 1891187 │ +│ 724 │ 1836150 │ +│ 404 │ 1729151 │ +│ 234 │ 1618924 │ +│ 510 │ 1353998 │ +│ 440 │ 1343355 │ +│ 311 │ 1332798 │ +└─────┴─────────┘ + +10 rows in set. Elapsed: 0.019 sec. Processed 43.28 million rows, 86.55 MB (2.33 billion rows/s., 4.65 GB/s.) +``` + +See the dictionary here: [https://en.wikipedia.org/wiki/Mobile_country_code](https://en.wikipedia.org/wiki/Mobile_country_code). + +So, the top countries are USA, Germany and Russia. + +You may want to create an [External Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts/) in ClickHouse to decode these values. + + +### Example of using `pointInPolygon` function + +Create a table where we will store polygons: + +``` +CREATE TEMPORARY TABLE moscow (polygon Array(Tuple(Float64, Float64))); +``` + +This is a rough shape of Moscow (without "new Moscow"): + +``` +INSERT INTO moscow VALUES ([(37.84172564285271, 55.78000432402266), (37.8381207618713, 55.775874525970494), (37.83979446823122, 55.775626746008065), (37.84243326983639, 55.77446586811748), (37.84262672750849, 55.771974101091104), (37.84153238623039, 55.77114545193181), (37.841124690460184, 55.76722010265554), (37.84239076983644, 55.76654891107098), (37.842283558197025, 55.76258709833121), (37.8421759312134, 55.758073999993734), (37.84198330422974, 55.75381499999371), (37.8416827275085, 55.749277102484484), (37.84157576190186, 55.74794544108413), (37.83897929098507, 55.74525257875241), (37.83739676451868, 55.74404373042019), (37.838732481460525, 55.74298009816793), (37.841183997352545, 55.743060321833575), (37.84097476190185, 55.73938799999373), (37.84048155819702, 55.73570799999372), (37.840095812164286, 55.73228210777237), (37.83983814285274, 55.73080491981639), (37.83846476321406, 55.729799917464675), (37.83835745269769, 55.72919751082619), (37.838636380279524, 55.72859509486539), (37.8395161005249, 55.727705075632784), (37.83897964285276, 55.722727886185154), (37.83862557539366, 55.72034817326636), (37.83559735744853, 55.71944437307499), (37.835370708803126, 55.71831419154461), (37.83738169402022, 55.71765218986692), (37.83823396494291, 55.71691750159089), (37.838056931213345, 55.71547311301385), (37.836812846557606, 55.71221445615604), (37.83522525396725, 55.709331054395555), (37.83269301586908, 55.70953687463627), (37.829667367706236, 55.70903403789297), (37.83311126588435, 55.70552351822608), (37.83058993121339, 55.70041317726053), (37.82983872750851, 55.69883771404813), (37.82934501586913, 55.69718947487017), (37.828926414016685, 55.69504441658371), (37.82876530422971, 55.69287499999378), (37.82894754100031, 55.690759754047335), (37.827697554878185, 55.68951421135665), (37.82447346292115, 55.68965045405069), (37.83136543914793, 55.68322046195302), (37.833554015869154, 55.67814012759211), (37.83544184655761, 55.67295011628339), (37.837480388885474, 55.6672498719639), (37.838960677246064, 55.66316274139358), (37.83926093121332, 55.66046999999383), (37.839025050262435, 55.65869897264431), (37.83670784390257, 55.65794084879904), (37.835656529083245, 55.65694309303843), (37.83704060449217, 55.65689306460552), (37.83696819873806, 55.65550363526252), (37.83760389616388, 55.65487847246661), (37.83687972750851, 55.65356745541324), (37.83515216004943, 55.65155951234079), (37.83312418518067, 55.64979413590619), (37.82801726983639, 55.64640836412121), (37.820614174591, 55.64164525405531), (37.818908190475426, 55.6421883258084), (37.81717543386075, 55.64112490388471), (37.81690987037274, 55.63916106913107), (37.815099354492155, 55.637925371757085), (37.808769150787356, 55.633798276884455), (37.80100123544311, 55.62873670012244), (37.79598013491824, 55.62554336109055), (37.78634567724606, 55.62033499605651), (37.78334147619623, 55.618768681480326), (37.77746201055901, 55.619855533402706), (37.77527329626457, 55.61909966711279), (37.77801986242668, 55.618770300976294), (37.778212973541216, 55.617257701952106), (37.77784818518065, 55.61574504433011), (37.77016867724609, 55.61148576294007), (37.760191219573976, 55.60599579539028), (37.75338926983641, 55.60227892751446), (37.746329965606634, 55.59920577639331), (37.73939925396728, 55.59631430313617), (37.73273665739439, 55.5935318803559), (37.7299954450912, 55.59350760316188), (37.7268679946899, 55.59469840523759), (37.72626726983634, 55.59229549697373), (37.7262673598022, 55.59081598950582), (37.71897193121335, 55.5877595845419), (37.70871550793456, 55.58393177431724), (37.700497489410374, 55.580917323756644), (37.69204305026244, 55.57778089778455), (37.68544477378839, 55.57815154690915), (37.68391050793454, 55.57472945079756), (37.678803592590306, 55.57328235936491), (37.6743402539673, 55.57255251445782), (37.66813862698363, 55.57216388774464), (37.617927457672096, 55.57505691895805), (37.60443099999999, 55.5757737568051), (37.599683515869145, 55.57749105910326), (37.59754177842709, 55.57796291823627), (37.59625834786988, 55.57906686095235), (37.59501783265684, 55.57746616444403), (37.593090671936025, 55.57671634534502), (37.587018007904, 55.577944600233785), (37.578692203704804, 55.57982895000019), (37.57327546607398, 55.58116294118248), (37.57385012109279, 55.581550362779), (37.57399562266922, 55.5820107079112), (37.5735356072979, 55.58226289171689), (37.57290393054962, 55.582393529795155), (37.57037722355653, 55.581919415056234), (37.5592298306885, 55.584471614867844), (37.54189249206543, 55.58867650795186), (37.5297256269836, 55.59158133551745), (37.517837865081766, 55.59443656218868), (37.51200186508174, 55.59635625174229), (37.506808949737554, 55.59907823904434), (37.49820432275389, 55.6062944994944), (37.494406071441674, 55.60967103463367), (37.494760001358024, 55.61066689753365), (37.49397137107085, 55.61220931698269), (37.49016528606031, 55.613417718449064), (37.48773249206542, 55.61530616333343), (37.47921386508177, 55.622640129112334), (37.470652153442394, 55.62993723476164), (37.46273446298218, 55.6368075123157), (37.46350692265317, 55.64068225239439), (37.46050283203121, 55.640794546982576), (37.457627470916734, 55.64118904154646), (37.450718034393326, 55.64690488145138), (37.44239252645875, 55.65397824729769), (37.434587576721185, 55.66053543155961), (37.43582144975277, 55.661693766520735), (37.43576786245721, 55.662755031737014), (37.430982915344174, 55.664610641628116), (37.428547447097685, 55.66778515273695), (37.42945134592044, 55.668633314343566), (37.42859571562949, 55.66948145750025), (37.4262836402282, 55.670813882451405), (37.418709037048295, 55.6811141674414), (37.41922139651101, 55.68235377885389), (37.419218771842885, 55.68359335082235), (37.417196501327446, 55.684375235224735), (37.41607020370478, 55.68540557585352), (37.415640857147146, 55.68686637150793), (37.414632153442334, 55.68903015131686), (37.413344899475064, 55.690896881757396), (37.41171432275391, 55.69264232162232), (37.40948282275393, 55.69455101638112), (37.40703674603271, 55.69638690385348), (37.39607169577025, 55.70451821283731), (37.38952706878662, 55.70942491932811), (37.387778313491815, 55.71149057784176), (37.39049275399779, 55.71419814298992), (37.385557272491454, 55.7155489617061), (37.38388335714726, 55.71849856042102), (37.378368238098155, 55.7292763261685), (37.37763597123337, 55.730845879211614), (37.37890062088197, 55.73167906388319), (37.37750451918789, 55.734703664681774), (37.375610832015965, 55.734851959522246), (37.3723813571472, 55.74105626086403), (37.37014935714723, 55.746115620904355), (37.36944173016362, 55.750883999993725), (37.36975304365541, 55.76335905525834), (37.37244070571134, 55.76432079697595), (37.3724259757175, 55.76636979670426), (37.369922155757884, 55.76735417953104), (37.369892695770275, 55.76823419316575), (37.370214730163575, 55.782312184391266), (37.370493611114505, 55.78436801120489), (37.37120164550783, 55.78596427165359), (37.37284851456452, 55.7874378183096), (37.37608325135799, 55.7886695054807), (37.3764587460632, 55.78947647305964), (37.37530000265506, 55.79146512926804), (37.38235915344241, 55.79899647809345), (37.384344043655396, 55.80113596939471), (37.38594269577028, 55.80322699999366), (37.38711208598329, 55.804919036911976), (37.3880239841309, 55.806610999993666), (37.38928977249147, 55.81001864976979), (37.39038389947512, 55.81348641242801), (37.39235781481933, 55.81983538336746), (37.393709457672124, 55.82417822811877), (37.394685720901464, 55.82792275755836), (37.39557615344238, 55.830447148154136), (37.39844478226658, 55.83167107969975), (37.40019761214057, 55.83151823557964), (37.400398790382326, 55.83264967594742), (37.39659544313046, 55.83322180909622), (37.39667059524539, 55.83402792148566), (37.39682089947515, 55.83638877400216), (37.39643489154053, 55.83861656112751), (37.3955338994751, 55.84072348043264), (37.392680272491454, 55.84502158126453), (37.39241188227847, 55.84659117913199), (37.392529730163616, 55.84816071336481), (37.39486835714723, 55.85288092980303), (37.39873052645878, 55.859893456073635), (37.40272161111449, 55.86441833633205), (37.40697072750854, 55.867579567544375), (37.410007082016016, 55.868369880337), (37.4120992989502, 55.86920843741314), (37.412668021163924, 55.87055369615854), (37.41482461111453, 55.87170587948249), (37.41862266137694, 55.873183961039565), (37.42413732540892, 55.874879126654704), (37.4312182698669, 55.875614937236705), (37.43111093783558, 55.8762723478417), (37.43332105622856, 55.87706546369396), (37.43385747619623, 55.87790681284802), (37.441303050262405, 55.88027084462084), (37.44747234260555, 55.87942070143253), (37.44716141796871, 55.88072960917233), (37.44769797085568, 55.88121221323979), (37.45204320500181, 55.882080694420715), (37.45673176190186, 55.882346110794586), (37.463383999999984, 55.88252729504517), (37.46682797486874, 55.88294937719063), (37.470014457672086, 55.88361266759345), (37.47751410450743, 55.88546991372396), (37.47860317658232, 55.88534929207307), (37.48165826025772, 55.882563306475106), (37.48316434442331, 55.8815803226785), (37.483831555817645, 55.882427612793315), (37.483182967125686, 55.88372791409729), (37.483092277908824, 55.88495581062434), (37.4855716508179, 55.8875561994203), (37.486440636245746, 55.887827444039566), (37.49014203439328, 55.88897899871799), (37.493210285705544, 55.890208937135604), (37.497512451065035, 55.891342397444696), (37.49780744510645, 55.89174030252967), (37.49940333499519, 55.89239745507079), (37.50018383334346, 55.89339220941865), (37.52421672750851, 55.903869074155224), (37.52977457672118, 55.90564076517974), (37.53503220370484, 55.90661661218259), (37.54042858064267, 55.90714113744566), (37.54320461007303, 55.905645048442985), (37.545686966066306, 55.906608607018505), (37.54743976120755, 55.90788552162358), (37.55796999999999, 55.90901557907218), (37.572711542327866, 55.91059395704873), (37.57942799999998, 55.91073854155573), (37.58502865872187, 55.91009969268444), (37.58739968913264, 55.90794809960554), (37.59131567193598, 55.908713267595054), (37.612687423278814, 55.902866854295375), (37.62348079629517, 55.90041967242986), (37.635797880950896, 55.898141151686396), (37.649487626983664, 55.89639275532968), (37.65619302513125, 55.89572360207488), (37.66294133862307, 55.895295577183965), (37.66874564418033, 55.89505457604897), (37.67375601586915, 55.89254677027454), (37.67744661901856, 55.8947775867987), (37.688347, 55.89450045676125), (37.69480554232789, 55.89422926332761), (37.70107096560668, 55.89322256101114), (37.705962965606716, 55.891763491662616), (37.711885134918205, 55.889110234998974), (37.71682005026245, 55.886577568759876), (37.7199315476074, 55.88458159806678), (37.72234560316464, 55.882281005794134), (37.72364385977171, 55.8809452036196), (37.725371142837474, 55.8809722706006), (37.727870902099546, 55.88037213862385), (37.73394330422971, 55.877941504088696), (37.745339592590376, 55.87208120378722), (37.75525267724611, 55.86703807949492), (37.76919976190188, 55.859821640197474), (37.827835219574, 55.82962968399116), (37.83341438888553, 55.82575289922351), (37.83652584655761, 55.82188784027888), (37.83809213491821, 55.81612575504693), (37.83605359521481, 55.81460347077685), (37.83632178569025, 55.81276696067908), (37.838623105812026, 55.811486181656385), (37.83912198147584, 55.807329380532785), (37.839079078033414, 55.80510270463816), (37.83965844708251, 55.79940712529036), (37.840581150787344, 55.79131399999368), (37.84172564285271, 55.78000432402266)]); +``` + +Check how many cell towers are in Moscow: + +``` +SELECT count() FROM cell_towers WHERE pointInPolygon((lon, lat), (SELECT * FROM moscow)) + +┌─count()─┐ +│ 310463 │ +└─────────┘ + +1 rows in set. Elapsed: 0.067 sec. Processed 43.28 million rows, 692.42 MB (645.83 million rows/s., 10.33 GB/s.) +``` + +The data is also available for interactive queries in the [Playground](https://gh-api.clickhouse.tech/play?user=play), [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=). + +Although you cannot create temporary tables there. + +[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets/cell-towers/) diff --git a/docs/en/getting-started/example-datasets/index.md b/docs/en/getting-started/example-datasets/index.md index 72f44d8caf1..53007c33306 100644 --- a/docs/en/getting-started/example-datasets/index.md +++ b/docs/en/getting-started/example-datasets/index.md @@ -20,5 +20,6 @@ The list of documented datasets: - [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md) - [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md) - [Brown University Benchmark](../../getting-started/example-datasets/brown-benchmark.md) +- [Cell Towers](../../getting-started/example-datasets/cell-towers.md) [Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets) From 5e3c9a5debedf927a789e75b8daed4d657d40e3d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Feb 2021 17:44:12 +0300 Subject: [PATCH 126/149] Rename --- docs/en/getting-started/example-datasets/cell-towers.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/getting-started/example-datasets/cell-towers.md b/docs/en/getting-started/example-datasets/cell-towers.md index f032be76da0..76effdd4c62 100644 --- a/docs/en/getting-started/example-datasets/cell-towers.md +++ b/docs/en/getting-started/example-datasets/cell-towers.md @@ -1,9 +1,9 @@ --- toc_priority: 21 -toc_title: Cell Stations +toc_title: Cell Towers --- -# Cell Stations +# Cell Towers This dataset is from [OpenCellid](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers. From 315824978de64164442ce8c5c6b5cb161041a72b Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sat, 27 Feb 2021 19:19:21 +0300 Subject: [PATCH 127/149] CheckConstraintsBlockOutputStream optimize nullable column case --- src/DataStreams/CheckConstraintsBlockOutputStream.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index 90d6560b9a8..3b0b4db72f9 100644 --- a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -65,10 +65,8 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) /// Check if constraint value is nullable const auto & null_map = column_nullable->getNullMapColumn(); - const auto & data = null_map.getData(); - const auto * it = std::find(data.begin(), data.end(), true); - - bool null_map_contains_null = it != data.end(); + const PaddedPODArray & data = null_map.getData(); + bool null_map_contains_null = !memoryIsZero(data.raw_data(), data.size() * sizeof(UInt8)); if (null_map_contains_null) throw Exception( From bb5a8241b94739a3828bf98e041a6a489d32e7f9 Mon Sep 17 00:00:00 2001 From: Nikolay <211292+kolya7k@users.noreply.github.com> Date: Sat, 27 Feb 2021 21:05:04 +0300 Subject: [PATCH 128/149] Add information about my ClickHouse PHP extension I made a native PHP extension for ClickHouse using the clickhouse-cpp library. My goal is to make a fast extension for ClickHouse with interface similar to mysqli. --- docs/ru/interfaces/third-party/client-libraries.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/ru/interfaces/third-party/client-libraries.md b/docs/ru/interfaces/third-party/client-libraries.md index 26e05b02509..97fa382fdd9 100644 --- a/docs/ru/interfaces/third-party/client-libraries.md +++ b/docs/ru/interfaces/third-party/client-libraries.md @@ -22,6 +22,7 @@ toc_title: "\u041a\u043b\u0438\u0435\u043d\u0442\u0441\u043a\u0438\u0435\u0020\u - [seva-code/php-click-house-client](https://packagist.org/packages/seva-code/php-click-house-client) - [SeasClick C++ client](https://github.com/SeasX/SeasClick) - [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel) + - [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php) - Go - [clickhouse](https://github.com/kshvakov/clickhouse/) - [go-clickhouse](https://github.com/roistat/go-clickhouse) From fd1cf49e926e2c56dacb794f70a04c1901fb8e33 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Feb 2021 22:22:38 +0300 Subject: [PATCH 129/149] Rewrite extractTextFromHTML function --- docker/test/fasttest/run.sh | 1 - src/Functions/extractTextFromHTML.cpp | 306 +++++++++ src/Functions/htmlOrXmlCoarseParse.cpp | 582 ------------------ src/Functions/registerFunctionsString.cpp | 13 +- .../01674_htm_xml_coarse_parse.sql | 13 +- .../01746_extract_text_from_html.reference | 106 ++++ .../01746_extract_text_from_html.sql | 61 ++ 7 files changed, 485 insertions(+), 597 deletions(-) create mode 100644 src/Functions/extractTextFromHTML.cpp delete mode 100644 src/Functions/htmlOrXmlCoarseParse.cpp create mode 100644 tests/queries/0_stateless/01746_extract_text_from_html.reference create mode 100644 tests/queries/0_stateless/01746_extract_text_from_html.sql diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 370311b13c5..1bfc91ecd92 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -356,7 +356,6 @@ function run_tests # JSON functions 01666_blns - 01674_htm_xml_coarse_parse ) (time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 ||:) | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" diff --git a/src/Functions/extractTextFromHTML.cpp b/src/Functions/extractTextFromHTML.cpp new file mode 100644 index 00000000000..5bee4dc541f --- /dev/null +++ b/src/Functions/extractTextFromHTML.cpp @@ -0,0 +1,306 @@ +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + +ALWAYS_INLINE bool startsWith(const char * s, const char * end, const char * prefix) +{ + return s + strlen(prefix) < end && 0 == memcmp(s, prefix, strlen(prefix)); +} + +ALWAYS_INLINE bool checkAndSkip(const char * __restrict & s, const char * end, const char * prefix) +{ + if (startsWith(s, end, prefix)) + { + s += strlen(prefix); + return true; + } + return false; +} + +bool processComment(const char * __restrict & src, const char * end) +{ + if (!checkAndSkip(src, end, "world'); +Helloworld +SELECT extractTextFromHTML('Helloworld'); +Helloworld +SELECT extractTextFromHTML('Helloworld'); +Helloworld +SELECT extractTextFromHTML('Hello World'); +Hello World +SELECT extractTextFromHTML('Hello World'); +Hello World +SELECT extractTextFromHTML('HelloWorld'); +HelloWorld +SELECT extractTextFromHTML('Hello World'); +Hello World +SELECT extractTextFromHTML('Hello World'); +Hello World +SELECT extractTextFromHTML('Hello World'); +Hello World +SELECT extractTextFromHTML('HelloWorld'); +HelloWorld +SELECT extractTextFromHTML('Hello World'); +Hello World +SELECT extractTextFromHTML(''); + \t Hello,\rworld \n +SELECT extractTextFromHTML('Hello world!'); +Hello Hello\tworld world! +SELECT extractTextFromHTML('Helloworld!'); +HelloHello\tworldworld! +SELECT extractTextFromHTML('Hello world]]> world!'); +Hello Hello world world! +SELECT extractTextFromHTML('John Smith]]>'); +John Smith +SELECT extractTextFromHTML('John ]]>'); +John +SELECT extractTextFromHTML('John Smith]]>'); +John +SELECT extractTextFromHTML('John ]]>]]>'); +John Smith +SELECT extractTextFromHTML('John ]]> ]]>'); +John Smith +SELECT extractTextFromHTML('John]]> ]]>'); +John Smith +SELECT extractTextFromHTML('John ]]>]]>]]>'); +John ]]>Smith +SELECT extractTextFromHTML('Hello goodbye'); +Hello goodbye +SELECT extractTextFromHTML('Hello goodbye'); +Hello goodbye +SELECT extractTextFromHTML('HelloWorld goodbye'); +HelloWorld goodbye +SELECT extractTextFromHTML('Hello goodbye'); +Hello goodbye +SELECT extractTextFromHTML('Hello goodbye'); +Hello goodbye +SELECT extractTextFromHTML('HelloWorld goodbye'); +HelloWorld goodbye +SELECT extractTextFromHTML('HelloWorld goodbye'); +HelloWorld goodbye +SELECT extractTextFromHTML('Hello goodbye'); +Hello goodbye +SELECT extractTextFromHTML('Hello goodbye'); +Hello goodbye +SELECT extractTextFromHTML('Hello]]> goodbye'); +Hello]]> goodbye +SELECT extractTextFromHTML('Hello goodbye'); +Hello goodbye diff --git a/tests/queries/0_stateless/01746_extract_text_from_html.sql b/tests/queries/0_stateless/01746_extract_text_from_html.sql new file mode 100644 index 00000000000..9bdd153228f --- /dev/null +++ b/tests/queries/0_stateless/01746_extract_text_from_html.sql @@ -0,0 +1,61 @@ +-- { echo } + +SELECT extractTextFromHTML(''); +SELECT extractTextFromHTML(' '); +SELECT extractTextFromHTML(' '); +SELECT extractTextFromHTML('Hello'); +SELECT extractTextFromHTML('Hello, world'); +SELECT extractTextFromHTML('Hello, world'); +SELECT extractTextFromHTML(' Hello, world'); +SELECT extractTextFromHTML(' Hello, world '); +SELECT extractTextFromHTML(' \t Hello,\rworld \n '); + +SELECT extractTextFromHTML('Hello world'); +SELECT extractTextFromHTML('Hello'); +SELECT extractTextFromHTML('Hello<>world'); +SELECT extractTextFromHTML('Helloworld'); +SELECT extractTextFromHTML('Helloworld'); +SELECT extractTextFromHTML('Helloworld'); +SELECT extractTextFromHTML('Helloworld'); +SELECT extractTextFromHTML('Helloworld'); + +SELECT extractTextFromHTML('Hello World'); +SELECT extractTextFromHTML('Hello World'); +SELECT extractTextFromHTML('HelloWorld'); +SELECT extractTextFromHTML('Hello World'); +SELECT extractTextFromHTML('Hello World'); +SELECT extractTextFromHTML('Hello World'); +SELECT extractTextFromHTML('HelloWorld'); +SELECT extractTextFromHTML('Hello World'); + +SELECT extractTextFromHTML(''); +SELECT extractTextFromHTML('Hello world!'); +SELECT extractTextFromHTML('Helloworld!'); + +SELECT extractTextFromHTML('Hello world]]> world!'); +SELECT extractTextFromHTML('John Smith]]>'); +SELECT extractTextFromHTML('John ]]>'); +SELECT extractTextFromHTML('John Smith]]>'); +SELECT extractTextFromHTML('John ]]>]]>'); +SELECT extractTextFromHTML('John ]]> ]]>'); +SELECT extractTextFromHTML('John]]> ]]>'); +SELECT extractTextFromHTML('John ]]>]]>]]>'); + +SELECT extractTextFromHTML('Hello goodbye'); +SELECT extractTextFromHTML('Hello goodbye'); +SELECT extractTextFromHTML('HelloWorld goodbye'); +SELECT extractTextFromHTML('Hello goodbye'); +SELECT extractTextFromHTML('Hello goodbye'); +SELECT extractTextFromHTML('HelloWorld goodbye'); +SELECT extractTextFromHTML('HelloWorld goodbye'); + +SELECT extractTextFromHTML('Hello goodbye'); +SELECT extractTextFromHTML('Hello goodbye'); +SELECT extractTextFromHTML('Hello]]> goodbye'); +SELECT extractTextFromHTML('Hello goodbye'); From 2ac673b12a9f02a36136263abd873159e28e4de8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Feb 2021 22:33:55 +0300 Subject: [PATCH 130/149] Update logic and tests --- src/Functions/extractTextFromHTML.cpp | 22 +++++-- .../01674_htm_xml_coarse_parse.reference | 2 +- .../01746_extract_text_from_html.reference | 57 +------------------ .../01746_extract_text_from_html.sql | 1 + 4 files changed, 22 insertions(+), 60 deletions(-) diff --git a/src/Functions/extractTextFromHTML.cpp b/src/Functions/extractTextFromHTML.cpp index 5bee4dc541f..c6a9b84b33e 100644 --- a/src/Functions/extractTextFromHTML.cpp +++ b/src/Functions/extractTextFromHTML.cpp @@ -61,7 +61,7 @@ bool processCDATA(const char * __restrict & src, const char * end, char * __rest if (!checkAndSkip(src, end, "Hello, world world goodbye'); Hello goodbye -SELECT extractTextFromHTML('HelloWorld goodbye'); HelloWorld goodbye -SELECT extractTextFromHTML('HelloWorld goodbye'); HelloWorld goodbye -SELECT extractTextFromHTML('Hello goodbye'); Hello goodbye -SELECT extractTextFromHTML('Hello goodbye'); Hello goodbye -SELECT extractTextFromHTML('Hello]]> goodbye'); -Hello]]> goodbye -SELECT extractTextFromHTML('Hello goodbye'); +Hello goodbye +Hello Hello goodbye diff --git a/tests/queries/0_stateless/01746_extract_text_from_html.sql b/tests/queries/0_stateless/01746_extract_text_from_html.sql index 9bdd153228f..0004849df87 100644 --- a/tests/queries/0_stateless/01746_extract_text_from_html.sql +++ b/tests/queries/0_stateless/01746_extract_text_from_html.sql @@ -57,5 +57,6 @@ SELECT extractTextFromHTML('Hello]]> goodbye'); +SELECT extractTextFromHTML('Hello]]> goodbye'); SELECT extractTextFromHTML('Hello goodbye'); From 0ab4afeeed567b4626b45bd7c7b984c085a6916b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 28 Feb 2021 00:55:56 +0300 Subject: [PATCH 131/149] Tests and documentation --- .../01674_htm_xml_coarse_parse.reference | 2 +- .../01746_extract_text_from_html.reference | 107 ++++++++++++++---- .../01746_extract_text_from_html.sql | 10 ++ 3 files changed, 98 insertions(+), 21 deletions(-) diff --git a/tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference b/tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference index 72af13aedd0..9cca4934551 100644 --- a/tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference +++ b/tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference @@ -2,7 +2,7 @@ Here is CDTATA. This is a white space test. -This is a complex test. world goodbye'); Hello goodbye -HelloWorld goodbye -HelloWorld goodbye +SELECT extractTextFromHTML('HelloWorld goodbye'); +Hello World goodbye +SELECT extractTextFromHTML('HelloWorld goodbye'); +Hello World goodbye +SELECT extractTextFromHTML('Hello goodbye'); Hello goodbye +SELECT extractTextFromHTML('Hello goodbye'); Hello goodbye +SELECT extractTextFromHTML('Hello]]> goodbye'); Hello +SELECT extractTextFromHTML('Hello]]> goodbye'); Hello goodbye -Hello +SELECT extractTextFromHTML('Hello]]> goodbye'); +Hello ]]> goodbye +SELECT extractTextFromHTML('Hello goodbye'); Hello goodbye +SELECT extractTextFromHTML('Hello goodbye'); +Hello goodbye +SELECT extractTextFromHTML(']]>'); +]]> +SELECT extractTextFromHTML(' + +
xkcd.com + +'); +xkcd.com diff --git a/tests/queries/0_stateless/01746_extract_text_from_html.sql b/tests/queries/0_stateless/01746_extract_text_from_html.sql index 0004849df87..b4ccc775bef 100644 --- a/tests/queries/0_stateless/01746_extract_text_from_html.sql +++ b/tests/queries/0_stateless/01746_extract_text_from_html.sql @@ -58,5 +58,15 @@ SELECT extractTextFromHTML('Hello]]> goodbye'); SELECT extractTextFromHTML('Hello]]> goodbye'); +SELECT extractTextFromHTML('Hello]]> goodbye'); +SELECT extractTextFromHTML('Hello goodbye'); SELECT extractTextFromHTML('Hello goodbye'); + +SELECT extractTextFromHTML(']]>'); + +SELECT extractTextFromHTML(' + +
xkcd.com + +'); From 4ab18cdcd8a5eff3f4e386a86361a60f61222e23 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 28 Feb 2021 00:59:27 +0300 Subject: [PATCH 132/149] Tests and documentation --- src/Functions/extractTextFromHTML.cpp | 92 +++++++++++++++++++-------- 1 file changed, 65 insertions(+), 27 deletions(-) diff --git a/src/Functions/extractTextFromHTML.cpp b/src/Functions/extractTextFromHTML.cpp index c6a9b84b33e..4b35eacaef0 100644 --- a/src/Functions/extractTextFromHTML.cpp +++ b/src/Functions/extractTextFromHTML.cpp @@ -6,6 +6,58 @@ #include +/** A function to extract text from HTML or XHTML. + * It does not necessarily 100% conforms to any of the HTML, XML or XHTML standards, + * but the implementation is reasonably accurate and it is fast. + * + * The rules are the following: + * + * 1. Comments are skipped. Example: + * Comment must end with -->. Nested comments are not possible. + * Note: constructions like are not valid comments in HTML but will be skipped by other rules. + * + * 2. CDATA is pasted verbatim. + * Note: CDATA is XML/XHTML specific. But we still process it for "best-effort" approach. + * + * 3. 'script' and 'style' elements are removed with all their content. + * Note: it's assumed that closing tag cannot appear inside content. + * For example, in JS string literal is has to be escaped as "<\/script>". + * Note: comments and CDATA is possible inside script or style - then closing tags are not searched inside CDATA. + * Example: ]]> + * But still searched inside comments. Sometimes it becomes complicated: + * var y = "-->"; alert(x + y); + * Note: script and style can be the names of XML namespaces - then they are not treat like usual script or style. + * Example: Hello. + * Note: whitespaces are possible after closing tag name: but not before: < / script>. + * + * 4. Other tags or tag-like elements are skipped without inner content. + * Example: . + * Note: it's expected that this HTML is illegal: + * Note: it will also skip something like tags: <>, , etc. + * Note: tag without end will be skipped to the end of input: + * 5. HTML and XML entities are not decoded. + * It should be processed by separate function. + * + * 6. Whitespaces in text are collapsed or inserted by specific rules. + * Whitespaces at beginning and at the end are removed. + * Consecutive whitespaces are collapsed. + * But if text is separated by other elements and there is no whitespace, it is inserted. + * It may be unnatural, examples: Helloworld, Helloworld + * - in HTML there will be no whitespace, but the function will insert it. + * But also consider: Hello

world

, Hello
world. + * This behaviour is reasonable for data analysis, e.g. convert HTML to a bag of words. + * + * 7. Also note that correct handling of whitespaces would require + * support of
 and CSS display and white-space properties.
+  *
+  * Usage example:
+  *
+  * SELECT extractTextFromHTML(html) FROM url('https://yandex.ru/', RawBLOB, 'html String')
+  *
+  * - ClickHouse has embedded web browser.
+  */
+
 namespace DB
 {
 
@@ -56,18 +108,11 @@ bool processComment(const char * __restrict & src, const char * end)
     return true;
 }
 
-bool processCDATA(const char * __restrict & src, const char * end, char * __restrict & dst, bool & pending_whitespace)
+bool processCDATA(const char * __restrict & src, const char * end, char * __restrict & dst)
 {
     if (!checkAndSkip(src, end, "(src, end);
+
+    if (needs_whitespace && src < lt)
     {
-        pending_whitespace = false;
         *dst = ' ';
         ++dst;
     }
 
-    const char * lt = find_first_symbols<'<'>(src, end);
-
     while (true)
     {
         const char * ws = find_first_symbols<' ', '\t', '\n', '\r', '\f', '\v'>(src, lt);
@@ -204,10 +245,7 @@ void copyText(const char * __restrict & src, const char * end, char * __restrict
 
         src = ws;
         while (src < lt && isWhitespaceASCII(*src))
-        {
-            pending_whitespace = true;
             ++src;
-        }
 
         if (src < lt)
         {
@@ -232,16 +270,16 @@ size_t extract(const char * __restrict src, size_t size, char * __restrict dst)
       * - CDATA should be copied verbatim;
       */
 
-    char * dst_begin = dst;
     const char * end = src + size;
-    bool pending_whitespace = false;
+    char * dst_begin = dst;
 
     while (src < end)
     {
-        copyText(src, end, dst, pending_whitespace);
+        bool needs_whitespace = dst != dst_begin && dst[-1] != ' ';
+        copyText(src, end, dst, needs_whitespace);
 
         processComment(src, end)
-            || processCDATA(src, end, dst, pending_whitespace)
+            || processCDATA(src, end, dst)
             || processElementAndSkipContent(src, end, "script")
             || processElementAndSkipContent(src, end, "style")
             || skipTag(src, end);

From 01ef06a42c4c213a6ec0d0c9188ab10e5a2f07b9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov 
Date: Sun, 28 Feb 2021 02:31:47 +0300
Subject: [PATCH 133/149] Fix broken links

---
 docs/en/sql-reference/functions/hash-functions.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index 6bf1bebabaa..465ad01527f 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -9,7 +9,7 @@ Hash functions can be used for the deterministic pseudo-random shuffling of elem
 
 ## halfMD5 {#hash-functions-halfmd5}
 
-[Interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order.
+[Interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order.
 
 ``` sql
 halfMD5(par1, ...)
@@ -54,7 +54,7 @@ sipHash64(par1,...)
 
 This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) function.
 
-Function [interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm:
+Function [interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm:
 
 1.  After hashing all the input parameters, the function gets the array of hashes.
 2.  Function takes the first and the second elements and calculates a hash for the array of them.

From e5ae9cbb6365dcf2122672e6587a95f19ebbd187 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov 
Date: Sun, 28 Feb 2021 04:03:22 +0300
Subject: [PATCH 134/149] Fix Arcadia

---
 src/Functions/ya.make | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/ya.make b/src/Functions/ya.make
index 20ba5f846a3..f8beaa8540c 100644
--- a/src/Functions/ya.make
+++ b/src/Functions/ya.make
@@ -246,6 +246,7 @@ SRCS(
     extractAllGroupsHorizontal.cpp
     extractAllGroupsVertical.cpp
     extractGroups.cpp
+    extractTextFromHTML.cpp
     extractTimeZoneFromFunctionArguments.cpp
     filesystem.cpp
     finalizeAggregation.cpp
@@ -291,7 +292,6 @@ SRCS(
     hasToken.cpp
     hasTokenCaseInsensitive.cpp
     hostName.cpp
-    htmlOrXmlCoarseParse.cpp
     hypot.cpp
     identity.cpp
     if.cpp

From ae9fea1d0af118a8f87b224d194d61da1567188b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov 
Date: Sun, 28 Feb 2021 04:05:04 +0300
Subject: [PATCH 135/149] Fix gcc and clang-tidy

---
 src/Functions/extractTextFromHTML.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Functions/extractTextFromHTML.cpp b/src/Functions/extractTextFromHTML.cpp
index 4b35eacaef0..528bd0c311f 100644
--- a/src/Functions/extractTextFromHTML.cpp
+++ b/src/Functions/extractTextFromHTML.cpp
@@ -70,12 +70,12 @@ namespace ErrorCodes
 namespace
 {
 
-ALWAYS_INLINE bool startsWith(const char * s, const char * end, const char * prefix)
+inline bool startsWith(const char * s, const char * end, const char * prefix)
 {
     return s + strlen(prefix) < end && 0 == memcmp(s, prefix, strlen(prefix));
 }
 
-ALWAYS_INLINE bool checkAndSkip(const char * __restrict & s, const char * end, const char * prefix)
+inline bool checkAndSkip(const char * __restrict & s, const char * end, const char * prefix)
 {
     if (startsWith(s, end, prefix))
     {
@@ -140,7 +140,7 @@ bool processCDATA(const char * __restrict & src, const char * end, char * __rest
 
 bool processElementAndSkipContent(const char * __restrict & src, const char * end, const char * tag_name)
 {
-    auto old_src = src;
+    const auto * old_src = src;
 
     if (!(src < end && *src == '<'))
         return false;

From 220a494fa479069c22b048c2aee2f4c3d4186d3a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov 
Date: Sun, 28 Feb 2021 04:42:35 +0300
Subject: [PATCH 136/149] Fix idiotic syntax highlight in docs #18432

---
 website/css/highlight.css | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/css/highlight.css b/website/css/highlight.css
index 55a0054b07f..7cc8a4865dd 100644
--- a/website/css/highlight.css
+++ b/website/css/highlight.css
@@ -11,7 +11,7 @@
 .syntax .hll { background-color: #b9b6b0 }
 .syntax  { background: #f8f9fa; color: #2f1e2e }
 .syntax .c { color: #8d8687 } /* Comment */
-.syntax .err { color: #ef6155 } /* Error */
+.syntax .err {} /* Error */
 .syntax .k { color: #000000; font-weight: bold } /* Keyword */
 .syntax .l { color: #0088ff } /* Literal */
 .syntax .n { color: #2f1e2e } /* Name */

From 8b58dba09410f4539b997f3b79aff6c0ede7307c Mon Sep 17 00:00:00 2001
From: Azat Khuzhin 
Date: Sun, 28 Feb 2021 10:52:09 +0300
Subject: [PATCH 137/149] Drop unused HexWriteBuffer

---
 src/IO/HexWriteBuffer.cpp                | 30 ------------------------
 src/IO/HexWriteBuffer.h                  | 28 ----------------------
 src/IO/ya.make                           |  1 -
 src/Interpreters/Cluster.cpp             |  1 -
 src/Storages/MergeTree/MergeTreeData.cpp |  1 -
 5 files changed, 61 deletions(-)
 delete mode 100644 src/IO/HexWriteBuffer.cpp
 delete mode 100644 src/IO/HexWriteBuffer.h

diff --git a/src/IO/HexWriteBuffer.cpp b/src/IO/HexWriteBuffer.cpp
deleted file mode 100644
index 4e3403ba74b..00000000000
--- a/src/IO/HexWriteBuffer.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-#include 
-#include 
-#include 
-#include 
-
-
-namespace DB
-{
-
-void HexWriteBuffer::nextImpl()
-{
-    if (!offset())
-        return;
-
-    for (Position p = working_buffer.begin(); p != pos; ++p)
-    {
-        UInt8 byte = *p;
-        out.write(hexDigitUppercase(byte / 16));
-        out.write(hexDigitUppercase(byte % 16));
-    }
-}
-
-HexWriteBuffer::~HexWriteBuffer()
-{
-    /// FIXME move final flush into the caller
-    MemoryTracker::LockExceptionInThread lock;
-    nextImpl();
-}
-
-}
diff --git a/src/IO/HexWriteBuffer.h b/src/IO/HexWriteBuffer.h
deleted file mode 100644
index a68dd29065b..00000000000
--- a/src/IO/HexWriteBuffer.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#pragma once
-
-#include 
-
-
-/// Since HexWriteBuffer is often created in the inner loop, we'll make its buffer size small.
-#define DBMS_HEX_WRITE_BUFFER_SIZE 32
-
-
-namespace DB
-{
-
-/** Everything that is written into it, translates to HEX (in capital letters) and writes to another WriteBuffer.
-  */
-class HexWriteBuffer final : public WriteBuffer
-{
-protected:
-    char buf[DBMS_HEX_WRITE_BUFFER_SIZE]; //-V730
-    WriteBuffer & out;
-
-    void nextImpl() override;
-
-public:
-    HexWriteBuffer(WriteBuffer & out_) : WriteBuffer(buf, sizeof(buf)), out(out_) {}
-    ~HexWriteBuffer() override;
-};
-
-}
diff --git a/src/IO/ya.make b/src/IO/ya.make
index 980719aa74f..6605cf64277 100644
--- a/src/IO/ya.make
+++ b/src/IO/ya.make
@@ -29,7 +29,6 @@ SRCS(
     HTTPChunkedReadBuffer.cpp
     HTTPCommon.cpp
     HashingWriteBuffer.cpp
-    HexWriteBuffer.cpp
     LZMADeflatingWriteBuffer.cpp
     LZMAInflatingReadBuffer.cpp
     LimitReadBuffer.cpp
diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp
index c9c56c96cbe..fb9788e84c4 100644
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@@ -6,7 +6,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 2d841b98c59..d9e24581c0c 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -13,7 +13,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 

From 568a49dad59acbc036eb6dacecfac297d618e853 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin 
Date: Sun, 28 Feb 2021 10:53:13 +0300
Subject: [PATCH 138/149] Remove unused AsynchronousWriteBuffer

---
 src/IO/AsynchronousWriteBuffer.h | 71 --------------------------------
 src/IO/tests/CMakeLists.txt      |  3 --
 src/IO/tests/async_write.cpp     | 26 ------------
 src/IO/tests/parse_int_perf.cpp  |  1 -
 4 files changed, 101 deletions(-)
 delete mode 100644 src/IO/AsynchronousWriteBuffer.h
 delete mode 100644 src/IO/tests/async_write.cpp

diff --git a/src/IO/AsynchronousWriteBuffer.h b/src/IO/AsynchronousWriteBuffer.h
deleted file mode 100644
index 8c44f8c7d4a..00000000000
--- a/src/IO/AsynchronousWriteBuffer.h
+++ /dev/null
@@ -1,71 +0,0 @@
-#pragma once
-
-#include 
-#include 
-#include 
-#include 
-
-
-namespace DB
-{
-
-
-/** Writes data asynchronously using double buffering.
-  */
-class AsynchronousWriteBuffer : public WriteBuffer
-{
-private:
-    WriteBuffer & out;               /// The main buffer, responsible for writing data.
-    std::vector  memory;       /// A piece of memory for duplicating the buffer.
-    ThreadPool pool;                 /// For asynchronous data writing.
-    bool started;                    /// Has an asynchronous data write started?
-
-    /// Swap the main and duplicate buffers.
-    void swapBuffers()
-    {
-        swap(out);
-    }
-
-    void nextImpl() override
-    {
-        if (!offset())
-            return;
-
-        if (started)
-            pool.wait();
-        else
-            started = true;
-
-        swapBuffers();
-
-        /// The data will be written in separate stream.
-        pool.scheduleOrThrowOnError([this] { thread(); });
-    }
-
-public:
-    AsynchronousWriteBuffer(WriteBuffer & out_) : WriteBuffer(nullptr, 0), out(out_), memory(out.buffer().size()), pool(1), started(false)
-    {
-        /// Data is written to the duplicate buffer.
-        set(memory.data(), memory.size());
-    }
-
-    ~AsynchronousWriteBuffer() override
-    {
-        /// FIXME move final flush into the caller
-        MemoryTracker::LockExceptionInThread lock;
-
-        if (started)
-            pool.wait();
-
-        swapBuffers();
-        out.next();
-    }
-
-    /// That is executed in a separate thread
-    void thread()
-    {
-        out.next();
-    }
-};
-
-}
diff --git a/src/IO/tests/CMakeLists.txt b/src/IO/tests/CMakeLists.txt
index fcd59d94cb0..79800d8339c 100644
--- a/src/IO/tests/CMakeLists.txt
+++ b/src/IO/tests/CMakeLists.txt
@@ -25,9 +25,6 @@ target_link_libraries (var_uint PRIVATE clickhouse_common_io)
 add_executable (read_escaped_string read_escaped_string.cpp)
 target_link_libraries (read_escaped_string PRIVATE clickhouse_common_io)
 
-add_executable (async_write async_write.cpp)
-target_link_libraries (async_write PRIVATE dbms)
-
 add_executable (parse_int_perf parse_int_perf.cpp)
 target_link_libraries (parse_int_perf PRIVATE clickhouse_common_io)
 
diff --git a/src/IO/tests/async_write.cpp b/src/IO/tests/async_write.cpp
deleted file mode 100644
index e3bff7cf341..00000000000
--- a/src/IO/tests/async_write.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-#include 
-
-#include 
-#include 
-#include 
-#include 
-#include 
-
-
-int main(int, char **)
-try
-{
-    DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
-    DB::WriteBufferFromFileDescriptor out1(STDOUT_FILENO);
-    DB::AsynchronousWriteBuffer out2(out1);
-    DB::CompressedWriteBuffer out3(out2);
-
-    DB::copyData(in1, out3);
-
-    return 0;
-}
-catch (const DB::Exception & e)
-{
-    std::cerr << e.what() << ", " << e.displayText() << std::endl;
-    return 1;
-}
diff --git a/src/IO/tests/parse_int_perf.cpp b/src/IO/tests/parse_int_perf.cpp
index 93f49d80258..e35a3d8a857 100644
--- a/src/IO/tests/parse_int_perf.cpp
+++ b/src/IO/tests/parse_int_perf.cpp
@@ -62,7 +62,6 @@ int main(int argc, char ** argv)
         {
             DB::WriteBufferFromVector wb(formatted);
         //    DB::CompressedWriteBuffer wb2(wb1);
-        //    DB::AsynchronousWriteBuffer wb(wb2);
             Stopwatch watch;
 
             UInt64 tsc = rdtsc();

From 303c389b368598a57ef85ccca1930533fd8a5c13 Mon Sep 17 00:00:00 2001
From: keenwolf 
Date: Sun, 28 Feb 2021 18:11:54 +0800
Subject: [PATCH 139/149] change the time_t to unsigned to handle properly the
 start date of 1970-01-01

---
 base/common/DateLUTImpl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/common/DateLUTImpl.h b/base/common/DateLUTImpl.h
index 8991f69d3f3..471169ebcec 100644
--- a/base/common/DateLUTImpl.h
+++ b/base/common/DateLUTImpl.h
@@ -320,7 +320,7 @@ public:
         /// To consider the DST changing situation within this day.
         /// also make the special timezones with no whole hour offset such as 'Australia/Lord_Howe' been taken into account
         DayNum index = findIndex(t);
-        time_t res = t - lut[index].date;
+        UInt32 res = t - lut[index].date;
         if (lut[index].amount_of_offset_change != 0 && t >= lut[index].date + lut[index].time_at_offset_change)
             res += lut[index].amount_of_offset_change;
 

From eb0387c5a960d029c063bc3fb7992011f6877ca3 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin 
Date: Sun, 28 Feb 2021 10:18:49 +0300
Subject: [PATCH 140/149] Fix abnormal server termination for nested writers

Writers with nested writer can call next() from the dtor for nested
writer and this will cause exception again, so the buffer position
should be updated on exceptions.

Found by stress test (thread) here [1] and here [2]:

    2021.02.27 19:27:53.498977 [ 302 ] {}  BaseDaemon: (version 21.3.1.6130, build id: 2DAEC5DEBF03C5A1C3BF66B7779C886F16239345) (from thread 1614) Terminate called for uncaught exception:
    Code: 24, e.displayText() = DB::Exception: Cannot write to ostream at offset 2097498, Stack trace (when copying this message, always include the lines below):

    0. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/exception:0: Poco::Exception::Exception(std::__1::basic_string, std::__1::allocator > const&, int) @ 0x15bef2ab in /usr/bin/clickhouse
    1. ./obj-x86_64-linux-gnu/../src/Common/Exception.cpp:56: DB::Exception::Exception(std::__1::basic_string, std::__1::allocator > const&, int, bool) @ 0x8aea92e in /usr/bin/clickhouse
    2. ./obj-x86_64-linux-gnu/../src/IO/WriteBufferFromOStream.cpp:0: DB::WriteBufferFromOStream::nextImpl() @ 0x8bbbc45 in /usr/bin/clickhouse
    3. ./obj-x86_64-linux-gnu/../src/IO/BufferBase.h:39: DB::WriteBufferFromOStream::~WriteBufferFromOStream() @ 0x8bbc077 in /usr/bin/clickhouse
    4. ./obj-x86_64-linux-gnu/../src/IO/WriteBufferFromOStream.cpp:44: DB::WriteBufferFromOStream::~WriteBufferFromOStream() @ 0x8bbc18a in /usr/bin/clickhouse
    5. ./obj-x86_64-linux-gnu/../src/IO/BufferWithOwnMemory.h:137: DB::ZstdDeflatingWriteBuffer::~ZstdDeflatingWriteBuffer() @ 0x118bdc29 in /usr/bin/clickhouse
    6. ./obj-x86_64-linux-gnu/../src/IO/ZstdDeflatingWriteBuffer.cpp:32: DB::ZstdDeflatingWriteBuffer::~ZstdDeflatingWriteBuffer() @ 0x118be3ea in /usr/bin/clickhouse
    7. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/memory:0: DB::WriteBufferFromHTTPServerResponse::finalize() @ 0x12f1dceb in /usr/bin/clickhouse
    8. ./obj-x86_64-linux-gnu/../src/Server/HTTPHandler.cpp:703: DB::HTTPHandler::trySendExceptionToClient(std::__1::basic_string, std::__1::allocator > const&, int, DB::HTTPServerRequest&, DB::HTTPServerResponse&, DB::HTTPHandler::Output&) @ 0x12e9fecc in /usr/bin/clickhouse
    9. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/string:1444: DB::HTTPHandler::handleRequest(DB::HTTPServerRequest&, DB::HTTPServerResponse&) @ 0x12ea0d60 in /usr/bin/clickhouse
    10. ./obj-x86_64-linux-gnu/../src/Server/HTTP/HTTPServerConnection.cpp:0: DB::HTTPServerConnection::run() @ 0x12f16db1 in /usr/bin/clickhouse
    11. ./obj-x86_64-linux-gnu/../contrib/poco/Net/src/TCPServerConnection.cpp:57: Poco::Net::TCPServerConnection::start() @ 0x15b184f3 in /usr/bin/clickhouse
    12. ./obj-x86_64-linux-gnu/../contrib/poco/Net/src/TCPServerDispatcher.cpp:0: Poco::Net::TCPServerDispatcher::run() @ 0x15b18c1f in /usr/bin/clickhouse
    13. ./obj-x86_64-linux-gnu/../contrib/poco/Foundation/include/Poco/ScopedLock.h:36: Poco::PooledThread::run() @ 0x15c7fdb2 in /usr/bin/clickhouse
    14. ./obj-x86_64-linux-gnu/../contrib/poco/Foundation/src/Thread.cpp:56: Poco::(anonymous namespace)::RunnableHolder::run() @ 0x15c7e350 in /usr/bin/clickhouse
    15. ./obj-x86_64-linux-gnu/../contrib/poco/Foundation/include/Poco/SharedPtr.h:277: Poco::ThreadImpl::runnableEntry(void*) @ 0x15c7cb58 in /usr/bin/clickhouse
    16. __tsan_thread_start_func @ 0x8a04ced in /usr/bin/clickhouse
    17. start_thread @ 0x9609 in /usr/lib/x86_64-linux-gnu/libpthread-2.31.so
    18. __clone @ 0x122293 in /usr/lib/x86_64-linux-gnu/libc-2.31.so
     (version 21.3.1.6130)

  [1]: https://clickhouse-test-reports.s3.yandex.net/21279/4f61ef3099f42f17b496a0b0424773978d9a32dc/stress_test_(thread).html#fail1
  [2]: https://clickhouse-test-reports.s3.yandex.net/21292/ae9fea1d0af118a8f87b224d194d61da1567188b/stress_test_(thread).html#fail1

v2: https://clickhouse-test-reports.s3.yandex.net/21305/e969daa6e86c5e09cfef08cfde19712982b64e59/stress_test_(thread).html#fail1
---
 src/IO/BrotliWriteBuffer.cpp                  | 65 +++++++++++++------
 src/IO/BrotliWriteBuffer.h                    |  5 +-
 src/IO/LZMADeflatingWriteBuffer.cpp           | 59 ++++++++++++-----
 src/IO/LZMADeflatingWriteBuffer.h             |  5 +-
 src/IO/ZlibDeflatingWriteBuffer.cpp           | 45 ++++++++++---
 src/IO/ZlibDeflatingWriteBuffer.h             | 11 ++--
 src/IO/ZstdDeflatingWriteBuffer.cpp           | 50 ++++++++++----
 src/IO/ZstdDeflatingWriteBuffer.h             | 13 ++--
 src/IO/tests/lzma_buffers.cpp                 |  2 +-
 src/IO/tests/zlib_buffers.cpp                 |  2 +-
 src/IO/tests/zstd_buffers.cpp                 |  2 +-
 .../WriteBufferFromHTTPServerResponse.cpp     |  2 +
 12 files changed, 187 insertions(+), 74 deletions(-)

diff --git a/src/IO/BrotliWriteBuffer.cpp b/src/IO/BrotliWriteBuffer.cpp
index e562cc70e61..e87eeb1a2be 100644
--- a/src/IO/BrotliWriteBuffer.cpp
+++ b/src/IO/BrotliWriteBuffer.cpp
@@ -64,29 +64,38 @@ void BrotliWriteBuffer::nextImpl()
     in_data = reinterpret_cast(working_buffer.begin());
     in_available = offset();
 
-    do
+    try
     {
-        out->nextIfAtEnd();
-        out_data = reinterpret_cast(out->position());
-        out_capacity = out->buffer().end() - out->position();
-
-        int result = BrotliEncoderCompressStream(
-                brotli->state,
-                in_available ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH,
-                &in_available,
-                &in_data,
-                &out_capacity,
-                &out_data,
-                nullptr);
-
-        out->position() = out->buffer().end() - out_capacity;
-
-        if (result == 0)
+        do
         {
-            throw Exception("brotli compress failed", ErrorCodes::BROTLI_WRITE_FAILED);
+            out->nextIfAtEnd();
+            out_data = reinterpret_cast(out->position());
+            out_capacity = out->buffer().end() - out->position();
+
+            int result = BrotliEncoderCompressStream(
+                    brotli->state,
+                    in_available ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH,
+                    &in_available,
+                    &in_data,
+                    &out_capacity,
+                    &out_data,
+                    nullptr);
+
+            out->position() = out->buffer().end() - out_capacity;
+
+            if (result == 0)
+            {
+                throw Exception("brotli compress failed", ErrorCodes::BROTLI_WRITE_FAILED);
+            }
         }
+        while (in_available > 0);
+    }
+    catch (...)
+    {
+        /// Do not try to write next time after exception.
+        out->position() = out->buffer().begin();
+        throw;
     }
-    while (in_available > 0);
 }
 
 void BrotliWriteBuffer::finish()
@@ -94,6 +103,23 @@ void BrotliWriteBuffer::finish()
     if (finished)
         return;
 
+    try
+    {
+        finishImpl();
+        out->next();
+        finished = true;
+    }
+    catch (...)
+    {
+        /// Do not try to flush next time after exception.
+        out->position() = out->buffer().begin();
+        finished = true;
+        throw;
+    }
+}
+
+void BrotliWriteBuffer::finishImpl()
+{
     next();
 
     while (true)
@@ -115,7 +141,6 @@ void BrotliWriteBuffer::finish()
 
         if (BrotliEncoderIsFinished(brotli->state))
         {
-            finished = true;
             return;
         }
 
diff --git a/src/IO/BrotliWriteBuffer.h b/src/IO/BrotliWriteBuffer.h
index 5a294354f49..26788bc6795 100644
--- a/src/IO/BrotliWriteBuffer.h
+++ b/src/IO/BrotliWriteBuffer.h
@@ -18,11 +18,14 @@ public:
 
     ~BrotliWriteBuffer() override;
 
-    void finish();
+    void finalize() override { finish(); }
 
 private:
     void nextImpl() override;
 
+    void finish();
+    void finishImpl();
+
     class BrotliStateWrapper;
     std::unique_ptr brotli;
 
diff --git a/src/IO/LZMADeflatingWriteBuffer.cpp b/src/IO/LZMADeflatingWriteBuffer.cpp
index 5803bc1e9f1..96f1d34b01b 100644
--- a/src/IO/LZMADeflatingWriteBuffer.cpp
+++ b/src/IO/LZMADeflatingWriteBuffer.cpp
@@ -64,27 +64,36 @@ void LZMADeflatingWriteBuffer::nextImpl()
     lstr.next_in = reinterpret_cast(working_buffer.begin());
     lstr.avail_in = offset();
 
-    lzma_action action = LZMA_RUN;
-    do
+    try
     {
-        out->nextIfAtEnd();
-        lstr.next_out = reinterpret_cast(out->position());
-        lstr.avail_out = out->buffer().end() - out->position();
+        lzma_action action = LZMA_RUN;
+        do
+        {
+            out->nextIfAtEnd();
+            lstr.next_out = reinterpret_cast(out->position());
+            lstr.avail_out = out->buffer().end() - out->position();
 
-        lzma_ret ret = lzma_code(&lstr, action);
-        out->position() = out->buffer().end() - lstr.avail_out;
+            lzma_ret ret = lzma_code(&lstr, action);
+            out->position() = out->buffer().end() - lstr.avail_out;
 
-        if (ret == LZMA_STREAM_END)
-            return;
+            if (ret == LZMA_STREAM_END)
+                return;
 
-        if (ret != LZMA_OK)
-            throw Exception(
-                ErrorCodes::LZMA_STREAM_ENCODER_FAILED,
-                "lzma stream encoding failed: error code: {}; lzma_version: {}",
-                ret,
-                LZMA_VERSION_STRING);
+            if (ret != LZMA_OK)
+                throw Exception(
+                    ErrorCodes::LZMA_STREAM_ENCODER_FAILED,
+                    "lzma stream encoding failed: error code: {}; lzma_version: {}",
+                    ret,
+                    LZMA_VERSION_STRING);
 
-    } while (lstr.avail_in > 0 || lstr.avail_out == 0);
+        } while (lstr.avail_in > 0 || lstr.avail_out == 0);
+    }
+    catch (...)
+    {
+        /// Do not try to write next time after exception.
+        out->position() = out->buffer().begin();
+        throw;
+    }
 }
 
 
@@ -93,6 +102,23 @@ void LZMADeflatingWriteBuffer::finish()
     if (finished)
         return;
 
+    try
+    {
+        finishImpl();
+        out->next();
+        finished = true;
+    }
+    catch (...)
+    {
+        /// Do not try to flush next time after exception.
+        out->position() = out->buffer().begin();
+        finished = true;
+        throw;
+    }
+}
+
+void LZMADeflatingWriteBuffer::finishImpl()
+{
     next();
 
     do
@@ -106,7 +132,6 @@ void LZMADeflatingWriteBuffer::finish()
 
         if (ret == LZMA_STREAM_END)
         {
-            finished = true;
             return;
         }
 
diff --git a/src/IO/LZMADeflatingWriteBuffer.h b/src/IO/LZMADeflatingWriteBuffer.h
index efa4532d372..98eb1732e76 100644
--- a/src/IO/LZMADeflatingWriteBuffer.h
+++ b/src/IO/LZMADeflatingWriteBuffer.h
@@ -24,13 +24,16 @@ public:
         char * existing_memory = nullptr,
         size_t alignment = 0);
 
-    void finish();
+    void finalize() override { finish(); }
 
     ~LZMADeflatingWriteBuffer() override;
 
 private:
     void nextImpl() override;
 
+    void finish();
+    void finishImpl();
+
     std::unique_ptr out;
     lzma_stream lstr;
     bool finished = false;
diff --git a/src/IO/ZlibDeflatingWriteBuffer.cpp b/src/IO/ZlibDeflatingWriteBuffer.cpp
index 4b838ac6d0a..5da82b52279 100644
--- a/src/IO/ZlibDeflatingWriteBuffer.cpp
+++ b/src/IO/ZlibDeflatingWriteBuffer.cpp
@@ -75,19 +75,28 @@ void ZlibDeflatingWriteBuffer::nextImpl()
     zstr.next_in = reinterpret_cast(working_buffer.begin());
     zstr.avail_in = offset();
 
-    do
+    try
     {
-        out->nextIfAtEnd();
-        zstr.next_out = reinterpret_cast(out->position());
-        zstr.avail_out = out->buffer().end() - out->position();
+        do
+        {
+            out->nextIfAtEnd();
+            zstr.next_out = reinterpret_cast(out->position());
+            zstr.avail_out = out->buffer().end() - out->position();
 
-        int rc = deflate(&zstr, Z_NO_FLUSH);
-        out->position() = out->buffer().end() - zstr.avail_out;
+            int rc = deflate(&zstr, Z_NO_FLUSH);
+            out->position() = out->buffer().end() - zstr.avail_out;
 
-        if (rc != Z_OK)
-            throw Exception(std::string("deflate failed: ") + zError(rc), ErrorCodes::ZLIB_DEFLATE_FAILED);
+            if (rc != Z_OK)
+                throw Exception(std::string("deflate failed: ") + zError(rc), ErrorCodes::ZLIB_DEFLATE_FAILED);
+        }
+        while (zstr.avail_in > 0 || zstr.avail_out == 0);
+    }
+    catch (...)
+    {
+        /// Do not try to write next time after exception.
+        out->position() = out->buffer().begin();
+        throw;
     }
-    while (zstr.avail_in > 0 || zstr.avail_out == 0);
 }
 
 void ZlibDeflatingWriteBuffer::finish()
@@ -95,6 +104,23 @@ void ZlibDeflatingWriteBuffer::finish()
     if (finished)
         return;
 
+    try
+    {
+        finishImpl();
+        out->next();
+        finished = true;
+    }
+    catch (...)
+    {
+        /// Do not try to flush next time after exception.
+        out->position() = out->buffer().begin();
+        finished = true;
+        throw;
+    }
+}
+
+void ZlibDeflatingWriteBuffer::finishImpl()
+{
     next();
 
     /// https://github.com/zlib-ng/zlib-ng/issues/494
@@ -123,7 +149,6 @@ void ZlibDeflatingWriteBuffer::finish()
 
         if (rc == Z_STREAM_END)
         {
-            finished = true;
             return;
         }
 
diff --git a/src/IO/ZlibDeflatingWriteBuffer.h b/src/IO/ZlibDeflatingWriteBuffer.h
index f9df8f8157b..6f623f55f56 100644
--- a/src/IO/ZlibDeflatingWriteBuffer.h
+++ b/src/IO/ZlibDeflatingWriteBuffer.h
@@ -22,16 +22,19 @@ public:
             char * existing_memory = nullptr,
             size_t alignment = 0);
 
-    /// Flush all pending data and write zlib footer to the underlying buffer.
-    /// After the first call to this function, subsequent calls will have no effect and
-    /// an attempt to write to this buffer will result in exception.
-    void finish();
+    void finalize() override { finish(); }
 
     ~ZlibDeflatingWriteBuffer() override;
 
 private:
     void nextImpl() override;
 
+    void finishImpl();
+    /// Flush all pending data and write zlib footer to the underlying buffer.
+    /// After the first call to this function, subsequent calls will have no effect and
+    /// an attempt to write to this buffer will result in exception.
+    void finish();
+
     std::unique_ptr out;
     z_stream zstr;
     bool finished = false;
diff --git a/src/IO/ZstdDeflatingWriteBuffer.cpp b/src/IO/ZstdDeflatingWriteBuffer.cpp
index 9b79d5ae513..27694797db6 100644
--- a/src/IO/ZstdDeflatingWriteBuffer.cpp
+++ b/src/IO/ZstdDeflatingWriteBuffer.cpp
@@ -61,28 +61,53 @@ void ZstdDeflatingWriteBuffer::nextImpl()
     input.size = offset();
     input.pos = 0;
 
-    bool finished = false;
-    do
+    try
     {
-        out->nextIfAtEnd();
+        bool ended = false;
+        do
+        {
+            out->nextIfAtEnd();
 
-        output.dst = reinterpret_cast(out->buffer().begin());
-        output.size = out->buffer().size();
-        output.pos = out->offset();
+            output.dst = reinterpret_cast(out->buffer().begin());
+            output.size = out->buffer().size();
+            output.pos = out->offset();
 
 
-        ZSTD_compressStream2(cctx, &output, &input, mode);
-        out->position() = out->buffer().begin() + output.pos;
-        finished = (input.pos == input.size);
-    } while (!finished);
-
+            ZSTD_compressStream2(cctx, &output, &input, mode);
+            out->position() = out->buffer().begin() + output.pos;
+            ended = (input.pos == input.size);
+        } while (!ended);
+    }
+    catch (...)
+    {
+        /// Do not try to write next time after exception.
+        out->position() = out->buffer().begin();
+        throw;
+    }
 }
 
 void ZstdDeflatingWriteBuffer::finish()
 {
-    if (flushed)
+    if (finished)
         return;
 
+    try
+    {
+        finishImpl();
+        out->next();
+        finished = true;
+    }
+    catch (...)
+    {
+        /// Do not try to flush next time after exception.
+        out->position() = out->buffer().begin();
+        finished = true;
+        throw;
+    }
+}
+
+void ZstdDeflatingWriteBuffer::finishImpl()
+{
     next();
 
     out->nextIfAtEnd();
@@ -99,7 +124,6 @@ void ZstdDeflatingWriteBuffer::finish()
     if (ZSTD_isError(remaining))
         throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder end failed: zstd version: {}", ZSTD_VERSION_STRING);
     out->position() = out->buffer().begin() + output.pos;
-    flushed = true;
 }
 
 }
diff --git a/src/IO/ZstdDeflatingWriteBuffer.h b/src/IO/ZstdDeflatingWriteBuffer.h
index 2c7dd38dbb0..b4ecc44d6f4 100644
--- a/src/IO/ZstdDeflatingWriteBuffer.h
+++ b/src/IO/ZstdDeflatingWriteBuffer.h
@@ -20,21 +20,24 @@ public:
         char * existing_memory = nullptr,
         size_t alignment = 0);
 
-    /// Flush all pending data and write zstd footer to the underlying buffer.
-    /// After the first call to this function, subsequent calls will have no effect and
-    /// an attempt to write to this buffer will result in exception.
-    void finish();
+    void finalize() override { finish(); }
 
     ~ZstdDeflatingWriteBuffer() override;
 
 private:
     void nextImpl() override;
 
+    /// Flush all pending data and write zstd footer to the underlying buffer.
+    /// After the first call to this function, subsequent calls will have no effect and
+    /// an attempt to write to this buffer will result in exception.
+    void finish();
+    void finishImpl();
+
     std::unique_ptr out;
     ZSTD_CCtx * cctx;
     ZSTD_inBuffer input;
     ZSTD_outBuffer output;
-    bool flushed = false;
+    bool finished = false;
 };
 
 }
diff --git a/src/IO/tests/lzma_buffers.cpp b/src/IO/tests/lzma_buffers.cpp
index 7eb6bf8b81c..ff3d518bfab 100644
--- a/src/IO/tests/lzma_buffers.cpp
+++ b/src/IO/tests/lzma_buffers.cpp
@@ -28,7 +28,7 @@ try
             DB::writeIntText(i, lzma_buf);
             DB::writeChar('\t', lzma_buf);
         }
-        lzma_buf.finish();
+        lzma_buf.finalize();
 
         stopwatch.stop();
 
diff --git a/src/IO/tests/zlib_buffers.cpp b/src/IO/tests/zlib_buffers.cpp
index 3428d5e995a..2068a3e6668 100644
--- a/src/IO/tests/zlib_buffers.cpp
+++ b/src/IO/tests/zlib_buffers.cpp
@@ -30,7 +30,7 @@ try
             DB::writeIntText(i, deflating_buf);
             DB::writeChar('\t', deflating_buf);
         }
-        deflating_buf.finish();
+        deflating_buf.finalize();
 
         stopwatch.stop();
         std::cout << "Writing done. Elapsed: " << stopwatch.elapsedSeconds() << " s."
diff --git a/src/IO/tests/zstd_buffers.cpp b/src/IO/tests/zstd_buffers.cpp
index f269c0b22fd..533229f4878 100644
--- a/src/IO/tests/zstd_buffers.cpp
+++ b/src/IO/tests/zstd_buffers.cpp
@@ -30,7 +30,7 @@ try
             DB::writeIntText(i, zstd_buf);
             DB::writeChar('\t', zstd_buf);
         }
-        zstd_buf.finish();
+        zstd_buf.finalize();
 
         stopwatch.stop();
 
diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
index 81f8cc30468..355af038da9 100644
--- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
+++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
@@ -171,6 +171,8 @@ void WriteBufferFromHTTPServerResponse::finalize()
     try
     {
         next();
+        if (out)
+            out->finalize();
         out.reset();
     }
     catch (...)

From 2df33be7c22307ecbe4b85259c3cca05a23840b8 Mon Sep 17 00:00:00 2001
From: fuwhu 
Date: Sun, 28 Feb 2021 23:17:31 +0800
Subject: [PATCH 141/149] Remove unused code in MergeTreeWriteAheadLog::restore

---
 src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
index e726f0ffd51..7ddc8d93b03 100644
--- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
@@ -147,7 +147,6 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor
             }
             else if (action_type == ActionType::ADD_PART)
             {
-                auto part_disk = storage.reserveSpace(0)->getDisk();
                 auto single_disk_volume = std::make_shared("volume_" + part_name, disk, 0);
 
                 part = storage.createPart(

From 9b633ac5646a4e9e6bc31a8f1f43b5b0ee5429aa Mon Sep 17 00:00:00 2001
From: alexey-milovidov 
Date: Sun, 28 Feb 2021 23:27:01 +0300
Subject: [PATCH 142/149] Update ontime.md

---
 .../getting-started/example-datasets/ontime.md   | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/docs/en/getting-started/example-datasets/ontime.md b/docs/en/getting-started/example-datasets/ontime.md
index 6e46cddba52..83673cdceb6 100644
--- a/docs/en/getting-started/example-datasets/ontime.md
+++ b/docs/en/getting-started/example-datasets/ontime.md
@@ -15,17 +15,9 @@ This dataset can be obtained in two ways:
 Downloading data:
 
 ``` bash
-for s in `seq 1987 2018`
-do
-for m in `seq 1 12`
-do
-wget https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_${s}_${m}.zip
-done
-done
+echo https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_{1987..2021}_{1..12}.zip | xargs -P10 wget --no-check-certificate --continue
 ```
 
-(from https://github.com/Percona-Lab/ontime-airline-performance/blob/master/download.sh )
-
 Creating a table:
 
 ``` sql
@@ -145,12 +137,14 @@ ORDER BY (Carrier, FlightDate)
 SETTINGS index_granularity = 8192;
 ```
 
-Loading data:
+Loading data with multiple threads:
 
 ``` bash
-$ for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done
+ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'"
 ```
 
+(if you will have memory shortage or other issues on your server, remove the `-P $(nproc)` part)
+
 ## Download of Prepared Partitions {#download-of-prepared-partitions}
 
 ``` bash

From 11f2a271a20664eab916af4dd08f13c7b765f04c Mon Sep 17 00:00:00 2001
From: alesapin 
Date: Mon, 1 Mar 2021 10:40:00 +0300
Subject: [PATCH 143/149] Remove useless unit test

---
 src/Coordination/tests/gtest_for_build.cpp | 118 ---------------------
 1 file changed, 118 deletions(-)

diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index c064fcdbef4..37517808ef0 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -195,124 +195,6 @@ TEST(CoordinationTest, TestSummingRaft1)
     s1.launcher.shutdown(5);
 }
 
-TEST(CoordinationTest, TestSummingRaft3)
-{
-    ChangelogDirTest test1("./logs1");
-    SummingRaftServer s1(1, "localhost", 44444, "./logs1");
-    ChangelogDirTest test2("./logs2");
-    SummingRaftServer s2(2, "localhost", 44445, "./logs2");
-    ChangelogDirTest test3("./logs3");
-    SummingRaftServer s3(3, "localhost", 44446, "./logs3");
-
-    nuraft::srv_config first_config(1, 0, "localhost:44444", "", false, 0);
-    auto ret1 = s2.raft_instance->add_srv(first_config);
-    while (!ret1->get_accepted())
-    {
-
-        std::cout << "failed to add server: "
-                  << ret1->get_result_str() << std::endl;
-
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-        ret1 = s2.raft_instance->add_srv(first_config);
-    }
-
-    while (s1.raft_instance->get_leader() != 2)
-    {
-        std::cout << "Waiting s1 to join to s2 quorum\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    nuraft::srv_config third_config(3, 0, "localhost:44446", "", false, 0);
-    auto ret3 = s2.raft_instance->add_srv(third_config);
-    if (!ret3->get_accepted())
-    {
-        std::cout << "failed to add server: "
-                  << ret3->get_result_str() << std::endl;
-
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-        ret3 = s2.raft_instance->add_srv(third_config);
-    }
-
-    while (s3.raft_instance->get_leader() != 2)
-    {
-        std::cout << "Waiting s3 to join to s2 quorum\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    /// S2 is leader
-    EXPECT_EQ(s1.raft_instance->get_leader(), 2);
-    EXPECT_EQ(s2.raft_instance->get_leader(), 2);
-    EXPECT_EQ(s3.raft_instance->get_leader(), 2);
-
-    std::cerr << "Starting to add entries\n";
-    auto entry = getBuffer(1);
-    auto ret = s2.raft_instance->append_entries({entry});
-    while (!ret->get_accepted() || ret->get_result_code() != nuraft::cmd_result_code::OK)
-    {
-        std::cerr <<  ret->get_accepted() << "failed to replicate: entry 1" << ret->get_result_code() << std::endl;
-        ret = s2.raft_instance->append_entries({entry});
-    }
-
-    while (s1.state_machine->getValue() != 1)
-    {
-        std::cout << "Waiting s1 to apply entry\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    while (s2.state_machine->getValue() != 1)
-    {
-        std::cout << "Waiting s2 to apply entry\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    while (s3.state_machine->getValue() != 1)
-    {
-        std::cout << "Waiting s3 to apply entry\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    EXPECT_EQ(s1.state_machine->getValue(), 1);
-    EXPECT_EQ(s2.state_machine->getValue(), 1);
-    EXPECT_EQ(s3.state_machine->getValue(), 1);
-
-    auto non_leader_entry = getBuffer(3);
-    auto ret_non_leader1 = s1.raft_instance->append_entries({non_leader_entry});
-
-    EXPECT_FALSE(ret_non_leader1->get_accepted());
-
-    auto ret_non_leader3 = s3.raft_instance->append_entries({non_leader_entry});
-
-    EXPECT_FALSE(ret_non_leader3->get_accepted());
-
-    auto leader_entry = getBuffer(77);
-    auto ret_leader = s2.raft_instance->append_entries({leader_entry});
-    while (!ret_leader->get_accepted() || ret_leader->get_result_code() != nuraft::cmd_result_code::OK)
-    {
-        std::cerr << "failed to replicate: entry 78" << ret_leader->get_result_code() << std::endl;
-        ret_leader = s2.raft_instance->append_entries({leader_entry});
-    }
-
-    while (s1.state_machine->getValue() != 78)
-    {
-        std::cout << "Waiting s1 to apply entry\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    while (s3.state_machine->getValue() != 78)
-    {
-        std::cout << "Waiting s3 to apply entry\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    EXPECT_EQ(s1.state_machine->getValue(), 78);
-    EXPECT_EQ(s2.state_machine->getValue(), 78);
-    EXPECT_EQ(s3.state_machine->getValue(), 78);
-
-    s1.launcher.shutdown(5);
-    s2.launcher.shutdown(5);
-    s3.launcher.shutdown(5);
-}
-
 nuraft::ptr getZooKeeperLogEntry(int64_t session_id, const Coordination::ZooKeeperRequestPtr & request)
 {
     DB::WriteBufferFromNuraftBuffer buf;

From 366fba4b0493904cf9a68f158b437cf6ac5a7114 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Mon, 1 Mar 2021 16:06:44 +0300
Subject: [PATCH 144/149] Update 01592_long_window_functions1.sql

---
 tests/queries/0_stateless/01592_long_window_functions1.sql | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01592_long_window_functions1.sql b/tests/queries/0_stateless/01592_long_window_functions1.sql
index c7751ab9f33..bb0f77ff60a 100644
--- a/tests/queries/0_stateless/01592_long_window_functions1.sql
+++ b/tests/queries/0_stateless/01592_long_window_functions1.sql
@@ -1,6 +1,7 @@
 drop table if exists stack;
 
 set allow_experimental_window_functions = 1;
+set max_insert_threads = 4;
 
 create table stack(item_id Int64, brand_id Int64, rack_id Int64, dt DateTime, expiration_dt DateTime, quantity UInt64)
 Engine = MergeTree 
@@ -10,7 +11,7 @@ order by (brand_id, toStartOfHour(dt));
 insert into stack 
 select number%99991, number%11, number%1111, toDateTime('2020-01-01 00:00:00')+number/100, 
    toDateTime('2020-02-01 00:00:00')+number/10, intDiv(number,100)+1
-from numbers(10000000);
+from numbers_mt(10000000);
 
 select '---- arrays ----';
 

From a4f2ee0752278198833f495aa9643e0b56ac0685 Mon Sep 17 00:00:00 2001
From: Roman Bug 
Date: Mon, 1 Mar 2021 16:51:38 +0300
Subject: [PATCH 145/149] DOCSUP-6144: Edit and translate PR to Russian
 (#19780)

Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com>
Co-authored-by: romanzhukov 
Co-authored-by: Vladimir 
---
 docs/en/operations/system-tables/index.md     |  4 +-
 .../en/sql-reference/statements/select/all.md |  6 +--
 docs/ru/operations/system-tables/index.md     | 45 +++++++++++++++----
 .../external-dicts-dict-sources.md            |  2 +-
 .../ru/sql-reference/statements/select/all.md | 22 +++++++++
 5 files changed, 65 insertions(+), 14 deletions(-)
 create mode 100644 docs/ru/sql-reference/statements/select/all.md

diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md
index 5dc23aee686..e66f082167e 100644
--- a/docs/en/operations/system-tables/index.md
+++ b/docs/en/operations/system-tables/index.md
@@ -20,7 +20,7 @@ System tables:
 
 Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start.
 
-Unlike other system tables, the system log tables [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), crash_log and [text_log](../../operations/system-tables/text_log.md) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem by default. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
+Unlike other system tables, the system log tables [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), [crash_log](../../operations/system-tables/crash-log.md) and [text_log](../../operations/system-tables/text_log.md) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a filesystem by default. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
 
 System log tables can be customized by creating a config file with the same name as the table under `/etc/clickhouse-server/config.d/`, or setting corresponding elements in `/etc/clickhouse-server/config.xml`. Elements can be customized are:
 
@@ -33,7 +33,7 @@ System log tables can be customized by creating a config file with the same name
 
 An example:
 
-```
+```xml
 
     
         system
diff --git a/docs/en/sql-reference/statements/select/all.md b/docs/en/sql-reference/statements/select/all.md
index 5e0de4c142b..891b82c4319 100644
--- a/docs/en/sql-reference/statements/select/all.md
+++ b/docs/en/sql-reference/statements/select/all.md
@@ -4,10 +4,8 @@ toc_title: ALL
 
 # ALL Clause {#select-all}
 
-`SELECT ALL` is identical to `SELECT` without `DISTINCT`.
+If there are multiple matching rows in the table, then `ALL` returns all of them. `SELECT ALL` is identical to `SELECT` without `DISTINCT`. If both `ALL` and `DISTINCT` specified, exception will be thrown.
 
-- If `ALL` specified, ignore it.
-- If both `ALL` and `DISTINCT` specified, exception will be thrown.
 
 `ALL` can also be specified inside aggregate function with the same effect(noop), for instance:
 
@@ -19,3 +17,5 @@ equals to
 ```sql
 SELECT sum(number) FROM numbers(10);
 ```
+
+[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/all) 
diff --git a/docs/ru/operations/system-tables/index.md b/docs/ru/operations/system-tables/index.md
index 93ea1c92068..cdea6102a81 100644
--- a/docs/ru/operations/system-tables/index.md
+++ b/docs/ru/operations/system-tables/index.md
@@ -9,25 +9,54 @@ toc_title: "\u0421\u0438\u0441\u0442\u0435\u043c\u043d\u044b\u0435\u0020\u0442\u
 
 Системные таблицы содержат информацию о:
 
--   Состоянии сервера, процессов и окружении.
--   Внутренних процессах сервера.
+-   состоянии сервера, процессов и окружении.
+-   внутренних процессах сервера.
 
 Системные таблицы:
 
--   Находятся в базе данных `system`.
--   Доступны только для чтения данных.
--   Не могут быть удалены или изменены, но их можно отсоединить.
+-   находятся в базе данных `system`.
+-   доступны только для чтения данных.
+-   не могут быть удалены или изменены, но их можно отсоединить.
 
-Системные таблицы `metric_log`, `query_log`, `query_thread_log`, `trace_log` системные таблицы хранят данные в файловой системе. Остальные системные таблицы хранят свои данные в оперативной памяти. Сервер ClickHouse создает такие системные таблицы при запуске.
+Большинство системных таблиц хранят свои данные в оперативной памяти. Сервер ClickHouse создает эти системные таблицы при старте.
+
+В отличие от других системных таблиц, таблицы с системными логами [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), [crash_log](../../operations/system-tables/crash-log.md) и [text_log](../../operations/system-tables/text_log.md) используют движок таблиц [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) и по умолчанию хранят свои данные в файловой системе. Если удалить таблицу из файловой системы, сервер ClickHouse снова создаст пустую таблицу во время следующей записи данных. Если схема системной таблицы изменилась в новом релизе, то ClickHouse переименует текущую таблицу и создаст новую.
+
+Таблицы с системными логами `log` можно настроить, создав конфигурационный файл с тем же именем, что и таблица в разделе `/etc/clickhouse-server/config.d/`, или указав соответствующие элементы в `/etc/clickhouse-server/config.xml`. Настраиваться могут следующие элементы:
+
+-   `database` — база данных, к которой принадлежит системная таблица. Эта опция на текущий момент устарела. Все системные таблицы находятся в базе данных `system`.
+-   `table` — таблица для добавления данных.
+-   `partition_by` — [ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md).
+-   `ttl` — [время жизни](../../sql-reference/statements/alter/ttl.md) таблицы.
+-   `flush_interval_milliseconds` — интервал сброса данных на диск, в миллисекундах.
+-   `engine` — полное имя движка (начиная с `ENGINE =` ) с параметрами. Эта опция противоречит `partition_by` и `ttl`. Если указать оба параметра вместе, сервер вернет ошибку и завершит работу.
+
+Пример:
+
+```xml
+
+    
+        system
+        query_log
+ toYYYYMM(event_date) + event_date + INTERVAL 30 DAY DELETE + + 7500 +
+
+``` + +По умолчанию размер таблицы не ограничен. Управлять размером таблицы можно используя [TTL](../../sql-reference/statements/alter/ttl.md#manipuliatsii-s-ttl-tablitsy) для удаления устаревших записей журнала. Также вы можете использовать функцию партиционирования для таблиц `MergeTree`. ### Источники системных показателей Для сбора системных показателей сервер ClickHouse использует: -- Возможности `CAP_NET_ADMIN`. +- возможности `CAP_NET_ADMIN`. - [procfs](https://ru.wikipedia.org/wiki/Procfs) (только Linux). -**procfs** Если для сервера ClickHouse не включено `CAP_NET_ADMIN`, он пытается обратиться к `ProcfsMetricsProvider`. `ProcfsMetricsProvider` позволяет собирать системные показатели для каждого запроса (для CPU и I/O). diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md index 3bb11b638b2..77275b65a05 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md @@ -572,7 +572,7 @@ SOURCE(CLICKHOUSE( или ``` sql -SOURCE(MONGO( +SOURCE(MONGODB( host 'localhost' port 27017 user '' diff --git a/docs/ru/sql-reference/statements/select/all.md b/docs/ru/sql-reference/statements/select/all.md new file mode 100644 index 00000000000..4049d77a173 --- /dev/null +++ b/docs/ru/sql-reference/statements/select/all.md @@ -0,0 +1,22 @@ +--- +toc_title: ALL +--- + +# Секция ALL {#select-all} + +Если в таблице несколько совпадающих строк, то `ALL` возвращает все из них. Поведение запроса `SELECT ALL` точно такое же, как и `SELECT` без аргумента `DISTINCT`. Если указаны оба аргумента: `ALL` и `DISTINCT`, функция вернет исключение. + + +`ALL` может быть указан внутри агрегатной функции, например, результат выполнения запроса: + +```sql +SELECT sum(ALL number) FROM numbers(10); +``` + +равен результату выполнения запроса: + +```sql +SELECT sum(number) FROM numbers(10); +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/select/all) From 3764a2a2beb106d595ba5c90b2c72ead12b58c58 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 1 Mar 2021 19:15:59 +0300 Subject: [PATCH 146/149] AggregateFunctionSumMap better comment message --- src/AggregateFunctions/AggregateFunctionSumMap.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h index f6a473546f9..30efb3d5fa0 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -118,6 +118,8 @@ public: WhichDataType value_type_to_check(value_type); /// Do not promote decimal because of implementation issues of this function design + /// Currently we cannot get result column type in case of decimal we cannot get decimal scale + /// in method void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override /// If we decide to make this function more efficient we should promote decimal type during summ if (value_type_to_check.isDecimal()) result_type = value_type_without_nullable; @@ -337,7 +339,11 @@ public: if (elem.second[col].isNull()) to_values_col.insertDefault(); else + { + auto element_field = elem.second[col]; + to_values_col.insert(elem.second[col]); + } } } } From 019a2090c71898719858ad451a4e83de842fcf30 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 1 Mar 2021 19:18:14 +0300 Subject: [PATCH 147/149] Fix unused variable --- src/AggregateFunctions/AggregateFunctionSumMap.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h index 30efb3d5fa0..8af20fe0e2d 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -340,8 +340,6 @@ public: to_values_col.insertDefault(); else { - auto element_field = elem.second[col]; - to_values_col.insert(elem.second[col]); } } From 7ed5900251ef5945dcca32d0b0c1280b2c920800 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 1 Mar 2021 19:18:56 +0300 Subject: [PATCH 148/149] Updated style --- src/AggregateFunctions/AggregateFunctionSumMap.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h index 8af20fe0e2d..3233199f01e 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -339,9 +339,7 @@ public: if (elem.second[col].isNull()) to_values_col.insertDefault(); else - { to_values_col.insert(elem.second[col]); - } } } } From 15b3f379a56c8f648cff38695ec2aa617c13fe58 Mon Sep 17 00:00:00 2001 From: olgarev <56617294+olgarev@users.noreply.github.com> Date: Mon, 1 Mar 2021 19:41:16 +0300 Subject: [PATCH 149/149] DOCSUP-4915: documented geo data types (#21294) * Initial * Fix links * Fix master * Mistakes corrected * Minor fix * Fixes and intro. * Update geo.md * Apply suggestions from code review Co-authored-by: Anton Popov Co-authored-by: Olga Revyakina Co-authored-by: Anton Popov --- .../template-data-type.md | 2 +- docs/en/operations/settings/settings.md | 15 ++- docs/en/sql-reference/data-types/geo.md | 106 ++++++++++++++++++ docs/ru/operations/settings/settings.md | 11 ++ docs/ru/sql-reference/data-types/geo.md | 106 ++++++++++++++++++ 5 files changed, 237 insertions(+), 3 deletions(-) create mode 100644 docs/en/sql-reference/data-types/geo.md create mode 100644 docs/ru/sql-reference/data-types/geo.md diff --git a/docs/_description_templates/template-data-type.md b/docs/_description_templates/template-data-type.md index edb6586ee7d..5e560b9325d 100644 --- a/docs/_description_templates/template-data-type.md +++ b/docs/_description_templates/template-data-type.md @@ -26,4 +26,4 @@ The name of an additional section can be any, for example, **Usage**. - [link](#) -[Original article](https://clickhouse.tech/docs/en/data_types//) +[Original article](https://clickhouse.tech/docs/en/data-types//) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 1988e2ec6fa..3c343e09fd3 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2658,8 +2658,6 @@ Result: Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour. -[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) - ## engine_file_empty_if_not_exists {#engine-file-empty_if-not-exists} Allows to select data from a file engine table without file. @@ -2679,3 +2677,16 @@ Possible values: - 1 — Enabled. Default value: `0`. + +## allow_experimental_geo_types {#allow-experimental-geo-types} + +Allows working with experimental [geo data types](../../sql-reference/data-types/geo.md). + +Possible values: + +- 0 — Working with geo data types is disabled. +- 1 — Working with geo data types is enabled. + +Default value: `0`. + +[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) diff --git a/docs/en/sql-reference/data-types/geo.md b/docs/en/sql-reference/data-types/geo.md new file mode 100644 index 00000000000..9ed328e0de6 --- /dev/null +++ b/docs/en/sql-reference/data-types/geo.md @@ -0,0 +1,106 @@ +--- +toc_priority: 62 +toc_title: Geo +--- + +# Geo Data Types {#geo-data-types} + +Clickhouse supports data types for representing geographical objects — locations, lands, etc. + +!!! warning "Warning" + Currently geo data types are an experimental feature. To work with them you must set `allow_experimental_geo_types = 1`. + +**See Also** +- [Representing simple geographical features](https://en.wikipedia.org/wiki/GeoJSON). +- [allow_experimental_geo_types](../../operations/settings/settings.md#allow-experimental-geo-types) setting. + +## Point {#point-data-type} + +`Point` is represented by its X and Y coordinates, stored as a [Tuple](tuple.md)([Float64](float.md), [Float64](float.md)). + +**Example** + +Query: + +```sql +SET allow_experimental_geo_types = 1; +CREATE TABLE geo_point (p Point) ENGINE = Memory(); +INSERT INTO geo_point VALUES((10, 10)); +SELECT p, toTypeName(p) FROM geo_point; +``` +Result: + +``` text +┌─p─────┬─toTypeName(p)─┐ +│ (10,10) │ Point │ +└───────┴───────────────┘ +``` + +## Ring {#ring-data-type} + +`Ring` is a simple polygon without holes stored as an array of points: [Array](array.md)([Point](#point-data-type)). + +**Example** + +Query: + +```sql +SET allow_experimental_geo_types = 1; +CREATE TABLE geo_ring (r Ring) ENGINE = Memory(); +INSERT INTO geo_ring VALUES([(0, 0), (10, 0), (10, 10), (0, 10)]); +SELECT r, toTypeName(r) FROM geo_ring; +``` +Result: + +``` text +┌─r─────────────────────────────┬─toTypeName(r)─┐ +│ [(0,0),(10,0),(10,10),(0,10)] │ Ring │ +└───────────────────────────────┴───────────────┘ +``` + +## Polygon {#polygon-data-type} + +`Polygon` is a polygon with holes stored as an array of rings: [Array](array.md)([Ring](#ring-data-type)). First element of outer array is the outer shape of polygon and all the following elements are holes. + +**Example** + +This is a polygon with one hole: + +```sql +SET allow_experimental_geo_types = 1; +CREATE TABLE geo_polygon (pg Polygon) ENGINE = Memory(); +INSERT INTO geo_polygon VALUES([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]]); +SELECT pg, toTypeName(pg) FROM geo_polygon; +``` + +Result: + +``` text +┌─pg────────────────────────────────────────────────────────────┬─toTypeName(pg)─┐ +│ [[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]] │ Polygon │ +└───────────────────────────────────────────────────────────────┴────────────────┘ +``` + +## MultiPolygon {#multipolygon-data-type} + +`MultiPolygon` consists of multiple polygons and is stored as an array of polygons: [Array](array.md)([Polygon](#polygon-data-type)). + +**Example** + +This multipolygon consists of two separate polygons — the first one without holes, and the second with one hole: + +```sql +SET allow_experimental_geo_types = 1; +CREATE TABLE geo_multipolygon (mpg MultiPolygon) ENGINE = Memory(); +INSERT INTO geo_multipolygon VALUES([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]]); +SELECT mpg, toTypeName(mpg) FROM geo_multipolygon; +``` +Result: + +``` text +┌─mpg─────────────────────────────────────────────────────────────────────────────────────────────┬─toTypeName(mpg)─┐ +│ [[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] │ MultiPolygon │ +└─────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────┘ +``` + +[Original article](https://clickhouse.tech/docs/en/data-types/geo/) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index a67b4a283fa..f8f587c8a36 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2552,4 +2552,15 @@ SELECT * FROM test2; Обратите внимание на то, что эта настройка влияет на поведение [материализованных представлений](../../sql-reference/statements/create/view.md#materialized) и БД [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md). +## allow_experimental_geo_types {#allow-experimental-geo-types} + +Разрешает использование экспериментальных типов данных для работы с [географическими структурами](../../sql-reference/data-types/geo.md). + +Возможные значения: + +- 0 — Использование типов данных для работы с географическими структурами не поддерживается. +- 1 — Использование типов данных для работы с географическими структурами поддерживается. + +Значение по умолчанию: `0`. + [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) diff --git a/docs/ru/sql-reference/data-types/geo.md b/docs/ru/sql-reference/data-types/geo.md new file mode 100644 index 00000000000..23293b30927 --- /dev/null +++ b/docs/ru/sql-reference/data-types/geo.md @@ -0,0 +1,106 @@ +--- +toc_priority: 62 +toc_title: Географические структуры +--- + +# Типы данных для работы с географическими структурами {#geo-data-types} + +ClickHouse поддерживает типы данных для отображения географических объектов — точек (местоположений), территорий и т.п. + +!!! warning "Предупреждение" + Сейчас использование типов данных для работы с географическими структурами является экспериментальной возможностью. Чтобы использовать эти типы данных, включите настройку `allow_experimental_geo_types = 1`. + +**См. также** +- [Хранение географических структур данных](https://ru.wikipedia.org/wiki/GeoJSON). +- Настройка [allow_experimental_geo_types](../../operations/settings/settings.md#allow-experimental-geo-types). + +## Point {#point-data-type} + +Тип `Point` (точка) определяется парой координат X и Y и хранится в виде кортежа [Tuple](tuple.md)([Float64](float.md), [Float64](float.md)). + +**Пример** + +Запрос: + +```sql +SET allow_experimental_geo_types = 1; +CREATE TABLE geo_point (p Point) ENGINE = Memory(); +INSERT INTO geo_point VALUES((10, 10)); +SELECT p, toTypeName(p) FROM geo_point; +``` +Результат: + +``` text +┌─p─────┬─toTypeName(p)─┐ +│ (10,10) │ Point │ +└───────┴───────────────┘ +``` + +## Ring {#ring-data-type} + +Тип `Ring` описывает простой многоугольник без внутренних областей (дыр) и хранится в виде массива точек: [Array](array.md)([Point](#point-data-type)). + +**Пример** + +Запрос: + +```sql +SET allow_experimental_geo_types = 1; +CREATE TABLE geo_ring (r Ring) ENGINE = Memory(); +INSERT INTO geo_ring VALUES([(0, 0), (10, 0), (10, 10), (0, 10)]); +SELECT r, toTypeName(r) FROM geo_ring; +``` +Результат: + +``` text +┌─r─────────────────────────────┬─toTypeName(r)─┐ +│ [(0,0),(10,0),(10,10),(0,10)] │ Ring │ +└───────────────────────────────┴───────────────┘ +``` + +## Polygon {#polygon-data-type} + +Тип `Polygon` описывает многоугольник с внутренними областями (дырами) и хранится в виде массива: [Array](array.md)([Ring](#ring-data-type)). Первый элемент массива описывает внешний многоугольник (контур), а остальные элементы описывают дыры. + +**Пример** + +Запись в этой таблице описывает многоугольник с одной дырой: + +```sql +SET allow_experimental_geo_types = 1; +CREATE TABLE geo_polygon (pg Polygon) ENGINE = Memory(); +INSERT INTO geo_polygon VALUES([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]]); +SELECT pg, toTypeName(pg) FROM geo_polygon; +``` + +Результат: + +``` text +┌─pg────────────────────────────────────────────────────────────┬─toTypeName(pg)─┐ +│ [[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]] │ Polygon │ +└───────────────────────────────────────────────────────────────┴────────────────┘ +``` + +## MultiPolygon {#multipolygon-data-type} + +Тип `MultiPolygon` описывает элемент, состоящий из нескольких простых многоугольников (полигональную сетку). Он хранится в виде массива многоугольников: [Array](array.md)([Polygon](#polygon-data-type)). + +**Пример** + +Запись в этой таблице описывает элемент, состоящий из двух многоугольников — первый без дыр, а второй с одной дырой: + +```sql +SET allow_experimental_geo_types = 1; +CREATE TABLE geo_multipolygon (mpg MultiPolygon) ENGINE = Memory(); +INSERT INTO geo_multipolygon VALUES([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]]); +SELECT mpg, toTypeName(mpg) FROM geo_multipolygon; +``` +Result: + +``` text +┌─mpg─────────────────────────────────────────────────────────────────────────────────────────────┬─toTypeName(mpg)─┐ +│ [[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] │ MultiPolygon │ +└─────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────┘ +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/data-types/geo/)