From 8b7eeea1625f0dba774a7a94e69e618da14ee6a5 Mon Sep 17 00:00:00 2001 From: Andrei Ch Date: Sat, 11 Dec 2021 23:09:06 +0300 Subject: [PATCH 01/37] parsing enums in TSV/CSV --- docs/ru/interfaces/formats.md | 6 ++ docs/ru/operations/settings/settings.md | 83 +++++++++++++++++++------ 2 files changed, 69 insertions(+), 20 deletions(-) diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 79d760271f5..996514aa068 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -129,6 +129,9 @@ world Каждый элемент структуры типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) представляется как отдельный массив. +Значения перечисления, в качестве входных данных, могут быть представлены как имя или как идентификаторы. Сначала мы пытаемся сопоставить входное значение с именем перечисления. В случае неудачи и при условии, что входное значение является числом, мы пытаемся сопоставить это число с идентификатором перечисления. +Если входные данные содержат только ENUM идентификаторы, рекомендуется включить настройку [input_format_tsv_enum_as_number](../operations/settings/settings.md#settings-input_format_tsv_enum_as_number) для оптимизации парсинга перечисления. + Например: ``` sql @@ -362,6 +365,9 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR Если установлена настройка [input_format_defaults_for_omitted_fields = 1](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) и тип столбца не `Nullable(T)`, то пустые значения без кавычек заменяются значениями по умолчанию для типа данных столбца. +Значения перечисления, в качестве входных данных, могут быть представлены как имя или как идентификаторы. Сначала мы пытаемся сопоставить входное значение с именем перечисления. В случае неудачи и при условии, что входное значение является числом, мы пытаемся сопоставить это число с идентификатором перечисления. +Если входные данные содержат только идентификаторы перечисления, рекомендуется включить настройку [input_format_tsv_enum_as_number](../operations/settings/settings.md#settings-input_format_tsv_enum_as_number) для оптимизации парсинга перечисления. + Формат CSV поддерживает вывод totals и extremes аналогично `TabSeparated`. ## CSVWithNames {#csvwithnames} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 1b4da512c9f..267dae3ae2a 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -391,12 +391,12 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), ( ## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number} -Включает или отключает парсинг значений перечислений как идентификаторов перечислений для входного формата TSV. +При включенном режиме всегда обрабатывайте значения перечисления как идентификаторы перечисления для входного формата TSV. Для оптимизации парсинга, рекомендуется включать этот параметр, если данные содержат только идентификаторы перечисления. Возможные значения: -- 0 — парсинг значений перечисления как значений. -- 1 — парсинг значений перечисления как идентификаторов перечисления. +- 0 — данные перечисления обработаны как значения или как идентификаторы. +- 1 — данные перечисления обработаны только как идентификаторы. Значение по умолчанию: 0. @@ -410,10 +410,39 @@ CREATE TABLE table_with_enum_column_for_tsv_insert (Id Int32,Value Enum('first' При включенной настройке `input_format_tsv_enum_as_number`: +Запрос: + ```sql SET input_format_tsv_enum_as_number = 1; INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; -INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 1; +SELECT * FROM table_with_enum_column_for_tsv_insert; +``` + +Результат: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +``` + +Запрос: + +```sql +SET input_format_tsv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 'first'; +``` + +сгенерирует исключение. + +При отключенной настройке `input_format_tsv_enum_as_number`: + +Запрос: + +```sql +SET input_format_tsv_enum_as_number = 0; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 'first'; SELECT * FROM table_with_enum_column_for_tsv_insert; ``` @@ -428,15 +457,6 @@ SELECT * FROM table_with_enum_column_for_tsv_insert; └─────┴────────┘ ``` -При отключенной настройке `input_format_tsv_enum_as_number` запрос `INSERT`: - -```sql -SET input_format_tsv_enum_as_number = 0; -INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; -``` - -сгенерирует исключение. - ## input_format_null_as_default {#settings-input-format-null-as-default} Включает или отключает инициализацию [значениями по умолчанию](../../sql-reference/statements/create/table.md#create-default-values) ячеек с [NULL](../../sql-reference/syntax.md#null-literal), если тип данных столбца не позволяет [хранить NULL](../../sql-reference/data-types/nullable.md#data_type-nullable). @@ -1511,12 +1531,12 @@ SELECT area/period FROM account_orders FORMAT JSON; ## input_format_csv_enum_as_number {#settings-input_format_csv_enum_as_number} -Включает или отключает парсинг значений перечислений как идентификаторов перечислений для входного формата CSV. +При включенном режиме всегда обрабатывайте значения перечисления как идентификаторы перечисления для входного формата CSV. Для оптимизации парсинга, рекомендуется включать этот параметр, если данные содержат только идентификаторы перечисления. Возможные значения: -- 0 — парсинг значений перечисления как значений. -- 1 — парсинг значений перечисления как идентификаторов перечисления. +- 0 — данные перечисления обработаны как значения или как идентификаторы. +- 1 — данные перечисления обработаны только как идентификаторы. Значение по умолчанию: 0. @@ -1530,10 +1550,11 @@ CREATE TABLE table_with_enum_column_for_csv_insert (Id Int32,Value Enum('first' При включенной настройке `input_format_csv_enum_as_number`: +Запрос: + ```sql SET input_format_csv_enum_as_number = 1; INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2; -SELECT * FROM table_with_enum_column_for_csv_insert; ``` Результат: @@ -1544,15 +1565,37 @@ SELECT * FROM table_with_enum_column_for_csv_insert; └─────┴────────┘ ``` -При отключенной настройке `input_format_csv_enum_as_number` запрос `INSERT`: +Запрос: ```sql -SET input_format_csv_enum_as_number = 0; -INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2; +SET input_format_csv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 103,'first' ``` сгенерирует исключение. +При отключенной настройке `input_format_csv_enum_as_number`: + +Запрос: + +```sql +SET input_format_csv_enum_as_number = 0; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2 +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 103,'first' +SELECT * FROM table_with_enum_column_for_csv_insert; +``` + +Результат: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +┌──Id─┬─Value─┐ +│ 103 │ first │ +└─────┴───────┘ +``` + ## output_format_csv_crlf_end_of_line {#settings-output-format-csv-crlf-end-of-line} Использовать в качестве разделителя строк для CSV формата CRLF (DOS/Windows стиль) вместо LF (Unix стиль). From 61a1eb28281a5353f67d7c3a45dae3460664ed4e Mon Sep 17 00:00:00 2001 From: Andrei Ch Date: Sun, 12 Dec 2021 00:21:05 +0300 Subject: [PATCH 02/37] Update RabbitMQ --- docs/en/engines/table-engines/integrations/rabbitmq.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index ebb42461204..78c144ac76f 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -37,6 +37,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [rabbitmq_skip_broken_messages = N,] [rabbitmq_max_block_size = N,] [rabbitmq_flush_interval_ms = N] + [rabbitmq_queue_settings_list = 'x-dead-letter-exchange=my-dlx,x-max-length=10,x-overflow=reject-publish'] ``` Required parameters: @@ -59,6 +60,7 @@ Optional parameters: - `rabbitmq_skip_broken_messages` – RabbitMQ message parser tolerance to schema-incompatible messages per block. Default: `0`. If `rabbitmq_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data). - `rabbitmq_max_block_size` - `rabbitmq_flush_interval_ms` +- `rabbitmq_queue_settings_list` - allows to set RabbitMQ settings when creating a queue. Available settings: `x-max-length`, `x-max-length-bytes`, `x-message-ttl`, `x-expires`, `x-priority`, `x-max-priority`, `x-overflow`, `x-dead-letter-exchange`, `x-queue-type`. The `durable` setting is enabled automatically for the queue. SSL connection: From c62e425aa162f9708deb967ef05430a57548266c Mon Sep 17 00:00:00 2001 From: Andrei Ch Date: Sun, 12 Dec 2021 00:32:10 +0300 Subject: [PATCH 03/37] stop referencing insert_sample_with_metadata --- docs/ru/interfaces/formats.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 996514aa068..82e2992df55 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -699,7 +699,7 @@ CREATE TABLE IF NOT EXISTS example_table - Если `input_format_defaults_for_omitted_fields = 1`, то значение по умолчанию для `x` равно `0`, а значение по умолчанию `a` равно `x * 2`. !!! note "Предупреждение" - Если `input_format_defaults_for_omitted_fields = 1`, то при обработке запросов ClickHouse потребляет больше вычислительных ресурсов, чем если `input_format_defaults_for_omitted_fields = 0`. + При добавлении данных с помощью `input_format_defaults_for_omitted_fields = 1`, ClickHouse потребляет больше вычислительных ресурсов по сравнению с `input_format_defaults_for_omitted_fields = 0`. ### Выборка данных {#vyborka-dannykh} From 830e793bd1df1032a7d789cd87cc466043f55a10 Mon Sep 17 00:00:00 2001 From: andrc1901 <92211164+andrc1901@users.noreply.github.com> Date: Sun, 12 Dec 2021 00:57:33 +0300 Subject: [PATCH 04/37] Update formats.md --- docs/ru/interfaces/formats.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 82e2992df55..b797c9ba6a7 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -130,7 +130,7 @@ world Каждый элемент структуры типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) представляется как отдельный массив. Значения перечисления, в качестве входных данных, могут быть представлены как имя или как идентификаторы. Сначала мы пытаемся сопоставить входное значение с именем перечисления. В случае неудачи и при условии, что входное значение является числом, мы пытаемся сопоставить это число с идентификатором перечисления. -Если входные данные содержат только ENUM идентификаторы, рекомендуется включить настройку [input_format_tsv_enum_as_number](../operations/settings/settings.md#settings-input_format_tsv_enum_as_number) для оптимизации парсинга перечисления. +Если входные данные содержат только идентификаторы перечисления, рекомендуется включить настройку [input_format_tsv_enum_as_number](../operations/settings/settings.md#settings-input_format_tsv_enum_as_number) для оптимизации парсинга перечисления. Например: From 70c46920e3ef393526946017fbe8d721a94b79e5 Mon Sep 17 00:00:00 2001 From: andrc1901 <92211164+andrc1901@users.noreply.github.com> Date: Mon, 13 Dec 2021 21:20:07 +0300 Subject: [PATCH 05/37] Update docs/ru/interfaces/formats.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/interfaces/formats.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index b797c9ba6a7..eef50eb6eaf 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -129,7 +129,7 @@ world Каждый элемент структуры типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) представляется как отдельный массив. -Значения перечисления, в качестве входных данных, могут быть представлены как имя или как идентификаторы. Сначала мы пытаемся сопоставить входное значение с именем перечисления. В случае неудачи и при условии, что входное значение является числом, мы пытаемся сопоставить это число с идентификатором перечисления. +Входящие параметры типа "перечисление" (`ENUM`) могут передаваться в виде значений или порядковых номеров. Сначала переданное значение будет сопоставляться с элементами перечисления. Если совпадение не будет найдено и при этом переданное значение является числом, оно будет трактоваться как порядковый номер в перечислении. Если входные данные содержат только идентификаторы перечисления, рекомендуется включить настройку [input_format_tsv_enum_as_number](../operations/settings/settings.md#settings-input_format_tsv_enum_as_number) для оптимизации парсинга перечисления. Например: From 27f5ade498dc44e2ee2b16d6e872eb88ea901dbf Mon Sep 17 00:00:00 2001 From: andrc1901 <92211164+andrc1901@users.noreply.github.com> Date: Mon, 13 Dec 2021 21:20:49 +0300 Subject: [PATCH 06/37] Update docs/ru/interfaces/formats.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/interfaces/formats.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index eef50eb6eaf..a158dc63074 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -130,7 +130,7 @@ world Каждый элемент структуры типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) представляется как отдельный массив. Входящие параметры типа "перечисление" (`ENUM`) могут передаваться в виде значений или порядковых номеров. Сначала переданное значение будет сопоставляться с элементами перечисления. Если совпадение не будет найдено и при этом переданное значение является числом, оно будет трактоваться как порядковый номер в перечислении. -Если входные данные содержат только идентификаторы перечисления, рекомендуется включить настройку [input_format_tsv_enum_as_number](../operations/settings/settings.md#settings-input_format_tsv_enum_as_number) для оптимизации парсинга перечисления. +Если входящие параметры типа `ENUM` содержат только порядковые номера, рекомендуется включить настройку [input_format_tsv_enum_as_number](../operations/settings/settings.md#settings-input_format_tsv_enum_as_number) для ускорения парсинга. Например: From 9fbc0eb4b9e67bee88da4053e72424a5ab1cd606 Mon Sep 17 00:00:00 2001 From: andrc1901 <92211164+andrc1901@users.noreply.github.com> Date: Mon, 13 Dec 2021 21:21:28 +0300 Subject: [PATCH 07/37] Update docs/ru/interfaces/formats.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/interfaces/formats.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index a158dc63074..a384776e519 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -365,8 +365,8 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR Если установлена настройка [input_format_defaults_for_omitted_fields = 1](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) и тип столбца не `Nullable(T)`, то пустые значения без кавычек заменяются значениями по умолчанию для типа данных столбца. -Значения перечисления, в качестве входных данных, могут быть представлены как имя или как идентификаторы. Сначала мы пытаемся сопоставить входное значение с именем перечисления. В случае неудачи и при условии, что входное значение является числом, мы пытаемся сопоставить это число с идентификатором перечисления. -Если входные данные содержат только идентификаторы перечисления, рекомендуется включить настройку [input_format_tsv_enum_as_number](../operations/settings/settings.md#settings-input_format_tsv_enum_as_number) для оптимизации парсинга перечисления. +Входящие параметры типа "перечисление" (`ENUM`) могут передаваться в виде значений или порядковых номеров. Сначала переданное значение будет сопоставляться с элементами перечисления. Если совпадение не будет найдено и при этом переданное значение является числом, оно будет трактоваться как порядковый номер в перечислении. +Если входящие параметры типа `ENUM` содержат только порядковые номера, рекомендуется включить настройку [input_format_tsv_enum_as_number](../operations/settings/settings.md#settings-input_format_tsv_enum_as_number) для ускорения парсинга. Формат CSV поддерживает вывод totals и extremes аналогично `TabSeparated`. From 7117e2c48972a1d8b881a729bef2c098c6b371e1 Mon Sep 17 00:00:00 2001 From: andrc1901 <92211164+andrc1901@users.noreply.github.com> Date: Mon, 13 Dec 2021 21:21:41 +0300 Subject: [PATCH 08/37] Update docs/ru/operations/settings/settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/operations/settings/settings.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 267dae3ae2a..c60d46705e3 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1531,7 +1531,8 @@ SELECT area/period FROM account_orders FORMAT JSON; ## input_format_csv_enum_as_number {#settings-input_format_csv_enum_as_number} -При включенном режиме всегда обрабатывайте значения перечисления как идентификаторы перечисления для входного формата CSV. Для оптимизации парсинга, рекомендуется включать этот параметр, если данные содержат только идентификаторы перечисления. +Включает или отключает парсинг значений перечислений как порядковых номеров. +Если режим включен, то во входящих данных в формате `CSV` значения перечисления (тип `ENUM`) всегда трактуются как порядковые номера, а не как элементы перечисления. Эту настройку рекомендуется включать для оптимизации парсинга, если данные типа `ENUM` содержат только порядковые номера, а не сами элементы перечисления. Возможные значения: From d3e5b97ac9e3c659cb00a20a709db8665daf14d5 Mon Sep 17 00:00:00 2001 From: andrc1901 <92211164+andrc1901@users.noreply.github.com> Date: Mon, 13 Dec 2021 21:22:03 +0300 Subject: [PATCH 09/37] Update docs/ru/operations/settings/settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/operations/settings/settings.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index c60d46705e3..e473c37986a 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1536,8 +1536,8 @@ SELECT area/period FROM account_orders FORMAT JSON; Возможные значения: -- 0 — данные перечисления обработаны как значения или как идентификаторы. -- 1 — данные перечисления обработаны только как идентификаторы. +- 0 — входящие значения типа `ENUM` сначала сопоставляются с элементами перечисления, а если совпадений не найдено, то трактуются как порядковые номера. +- 1 — входящие значения типа `ENUM` сразу трактуются как порядковые номера. Значение по умолчанию: 0. From b928c551c60ec97b93cfeaf6f283a730962c0aa8 Mon Sep 17 00:00:00 2001 From: andrc1901 <92211164+andrc1901@users.noreply.github.com> Date: Mon, 13 Dec 2021 21:23:11 +0300 Subject: [PATCH 10/37] Update docs/ru/operations/settings/settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/operations/settings/settings.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index e473c37986a..680d868a536 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -391,7 +391,9 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), ( ## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number} -При включенном режиме всегда обрабатывайте значения перечисления как идентификаторы перечисления для входного формата TSV. Для оптимизации парсинга, рекомендуется включать этот параметр, если данные содержат только идентификаторы перечисления. +Включает или отключает парсинг значений перечислений как порядковых номеров. + +Если режим включен, то во входящих данных в формате `TCV` значения перечисления (тип `ENUM`) всегда трактуются как порядковые номера, а не как элементы перечисления. Эту настройку рекомендуется включать для оптимизации парсинга, если данные типа `ENUM` содержат только порядковые номера, а не сами элементы перечисления. Возможные значения: From 43a3d927f67774e7f18eac2b214cde010fb2d73b Mon Sep 17 00:00:00 2001 From: andrc1901 <92211164+andrc1901@users.noreply.github.com> Date: Mon, 13 Dec 2021 21:23:22 +0300 Subject: [PATCH 11/37] Update docs/ru/operations/settings/settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/operations/settings/settings.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 680d868a536..94bd2078373 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -397,8 +397,8 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), ( Возможные значения: -- 0 — данные перечисления обработаны как значения или как идентификаторы. -- 1 — данные перечисления обработаны только как идентификаторы. +- 0 — входящие значения типа `ENUM` сначала сопоставляются с элементами перечисления, а если совпадений не найдено, то трактуются как порядковые номера. +- 1 — входящие значения типа `ENUM` сразу трактуются как порядковые номера. Значение по умолчанию: 0. From d394f0e753ef679d9fc2ad8f4d331e5f4a4bf801 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 14 Dec 2021 17:19:18 +0300 Subject: [PATCH 12/37] Allpy some more optimizations to NO_QUERY ast. --- src/Interpreters/TreeRewriter.cpp | 2 +- .../integration/test_storage_rabbitmq/test.py | 22 +++++++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index d864bb54b2e..639d38d36f1 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1117,7 +1117,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( result.rewrite_subqueries = PredicateExpressionsOptimizer(getContext(), tables_with_columns, settings).optimize(*select_query); /// Only apply AST optimization for initial queries. - if (getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) + if (getContext()->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY) TreeOptimizer::apply(query, result, tables_with_columns, getContext()); /// array_join_alias_to_name, array_join_result_to_source. diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 66ec97ac027..5342473aefa 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -284,6 +284,12 @@ def test_rabbitmq_materialized_view(rabbitmq_cluster): ORDER BY key; CREATE MATERIALIZED VIEW test.consumer TO test.view AS SELECT * FROM test.rabbitmq; + + CREATE TABLE test.view2 (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer2 TO test.view2 AS + SELECT * FROM test.rabbitmq group by (key, value); ''') credentials = pika.PlainCredentials('root', 'clickhouse') @@ -297,14 +303,26 @@ def test_rabbitmq_materialized_view(rabbitmq_cluster): for message in messages: channel.basic_publish(exchange='mv', routing_key='', body=message) - while True: + time_limit_sec = 60 + deadline = time.monotonic() + time_limit_sec + + while time.monotonic() < deadline: result = instance.query('SELECT * FROM test.view ORDER BY key') if (rabbitmq_check_result(result)): break - connection.close() rabbitmq_check_result(result, True) + deadline = time.monotonic() + time_limit_sec + + while time.monotonic() < deadline: + result = instance.query('SELECT * FROM test.view2 ORDER BY key') + if (rabbitmq_check_result(result)): + break + + rabbitmq_check_result(result, True) + connection.close() + def test_rabbitmq_materialized_view_with_subquery(rabbitmq_cluster): instance.query(''' From 47f549166d5c1b7932067d145cf80a2b469c3901 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 14 Dec 2021 16:53:47 +0300 Subject: [PATCH 13/37] Table Override for PostgreSQL --- src/Core/PostgreSQL/insertPostgreSQLValue.cpp | 2 +- src/Databases/DatabaseFactory.cpp | 2 +- .../MySQL/InterpretersMySQLDDLQuery.cpp | 18 +-- src/Parsers/ASTTableOverrides.cpp | 18 +++ src/Parsers/ASTTableOverrides.h | 2 + .../PostgreSQLReplicationHandler.cpp | 8 +- .../StorageMaterializedPostgreSQL.cpp | 148 ++++++++++++------ .../StorageMaterializedPostgreSQL.h | 8 +- .../test.py | 31 +++- 9 files changed, 165 insertions(+), 72 deletions(-) diff --git a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp index 1c3230ec826..f4d47049554 100644 --- a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp +++ b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp @@ -85,7 +85,7 @@ void insertPostgreSQLValue( assert_cast(column).insertData(value.data(), value.size()); break; case ExternalResultDescription::ValueType::vtUUID: - assert_cast(column).insert(parse(value.data(), value.size())); + assert_cast(column).insertValue(parse(value.data(), value.size())); break; case ExternalResultDescription::ValueType::vtDate: assert_cast(column).insertValue(UInt16{LocalDate{std::string(value)}.getDayNum()}); diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index a4c8f3cdb77..5ea44e0f94c 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -117,7 +117,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String static const std::unordered_set engines_with_arguments{"MySQL", "MaterializeMySQL", "MaterializedMySQL", "Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite"}; - static const std::unordered_set engines_with_table_overrides{"MaterializeMySQL", "MaterializedMySQL"}; + static const std::unordered_set engines_with_table_overrides{"MaterializeMySQL", "MaterializedMySQL", "MaterializedPostgreSQL"}; bool engine_may_have_arguments = engines_with_arguments.contains(engine_name); if (engine_define->engine->arguments && !engine_may_have_arguments) diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index e599cd97e13..a283bd001e2 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -435,22 +435,6 @@ void InterpreterCreateImpl::validate(const InterpreterCreateImpl::TQuery & creat } } -static ASTPtr tryGetTableOverride(const String & mapped_database, const String & table) -{ - if (auto database_ptr = DatabaseCatalog::instance().tryGetDatabase(mapped_database)) - { - auto create_query = database_ptr->getCreateDatabaseQuery(); - if (auto create_database_query = create_query->as()) - { - if (create_database_query->table_overrides) - { - return create_database_query->table_overrides->tryGetTableOverride(table); - } - } - } - return nullptr; -} - ASTs InterpreterCreateImpl::getRewrittenQueries( const TQuery & create_query, ContextPtr context, const String & mapped_to_database, const String & mysql_database) { @@ -535,7 +519,7 @@ ASTs InterpreterCreateImpl::getRewrittenQueries( rewritten_query->set(rewritten_query->storage, storage); rewritten_query->set(rewritten_query->columns_list, columns); - if (auto table_override = tryGetTableOverride(mapped_to_database, create_query.table)) + if (auto table_override = ASTTableOverride::tryGetTableOverride(mapped_to_database, create_query.table)) { auto override = table_override->as(); override->applyToCreateTableQuery(rewritten_query.get()); diff --git a/src/Parsers/ASTTableOverrides.cpp b/src/Parsers/ASTTableOverrides.cpp index 0270c2cad82..ee351ab3eb3 100644 --- a/src/Parsers/ASTTableOverrides.cpp +++ b/src/Parsers/ASTTableOverrides.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include namespace DB { @@ -71,6 +73,22 @@ void ASTTableOverride::formatImpl(const FormatSettings & settings_, FormatState settings.ostr << nl_or_nothing << ')'; } +ASTPtr ASTTableOverride::tryGetTableOverride(const String & mapped_database, const String & table) +{ + if (auto database_ptr = DatabaseCatalog::instance().tryGetDatabase(mapped_database)) + { + auto create_query = database_ptr->getCreateDatabaseQuery(); + if (auto create_database_query = create_query->as()) + { + if (create_database_query->table_overrides) + { + return create_database_query->table_overrides->tryGetTableOverride(table); + } + } + } + return nullptr; +} + void ASTTableOverride::applyToCreateTableQuery(ASTCreateQuery * create_query) const { if (columns) diff --git a/src/Parsers/ASTTableOverrides.h b/src/Parsers/ASTTableOverrides.h index d2cab9f27c4..edbc35553b3 100644 --- a/src/Parsers/ASTTableOverrides.h +++ b/src/Parsers/ASTTableOverrides.h @@ -27,6 +27,8 @@ public: ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; void applyToCreateTableQuery(ASTCreateQuery * create_query) const; + + static ASTPtr tryGetTableOverride(const String & mapped_database, const String & table); }; /// List of table overrides, for example: diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 7cc71a63443..d706615b762 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -1,5 +1,6 @@ #include "PostgreSQLReplicationHandler.h" +#include #include #include #include @@ -279,7 +280,9 @@ ASTPtr PostgreSQLReplicationHandler::getCreateNestedTableQuery(StorageMaterializ auto table_structure = std::make_unique(fetchPostgreSQLTableStructure(tx, table_name, postgres_schema, true, true, true)); if (!table_structure) throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to get PostgreSQL table structure"); - return storage->getCreateNestedTableQuery(std::move(table_structure)); + + auto table_override = ASTTableOverride::tryGetTableOverride(current_database_name, table_name); + return storage->getCreateNestedTableQuery(std::move(table_structure), table_override->as()); } @@ -297,7 +300,8 @@ StoragePtr PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection & query_str = fmt::format("SELECT * FROM {}", quoted_name); LOG_DEBUG(log, "Loading PostgreSQL table {}.{}", postgres_database, quoted_name); - materialized_storage->createNestedIfNeeded(fetchTableStructure(*tx, table_name)); + auto table_override = ASTTableOverride::tryGetTableOverride(current_database_name, table_name); + materialized_storage->createNestedIfNeeded(fetchTableStructure(*tx, table_name), table_override->as()); auto nested_storage = materialized_storage->getNested(); auto insert = std::make_shared(); diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index 591e10a88b9..a680792acad 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -181,18 +182,18 @@ StorageID StorageMaterializedPostgreSQL::getNestedStorageID() const } -void StorageMaterializedPostgreSQL::createNestedIfNeeded(PostgreSQLTableStructurePtr table_structure) +void StorageMaterializedPostgreSQL::createNestedIfNeeded(PostgreSQLTableStructurePtr table_structure, const ASTTableOverride * table_override) { if (tryGetNested()) return; - const auto ast_create = getCreateNestedTableQuery(std::move(table_structure)); - auto table_id = getStorageID(); - auto tmp_nested_table_id = StorageID(table_id.database_name, getNestedTableName()); - LOG_DEBUG(log, "Creating clickhouse table for postgresql table {}", table_id.getNameForLogs()); - try { + const auto ast_create = getCreateNestedTableQuery(std::move(table_structure), table_override); + auto table_id = getStorageID(); + auto tmp_nested_table_id = StorageID(table_id.database_name, getNestedTableName()); + LOG_DEBUG(log, "Creating clickhouse table for postgresql table {}", table_id.getNameForLogs()); + InterpreterCreateQuery interpreter(ast_create, nested_context); interpreter.execute(); @@ -200,10 +201,10 @@ void StorageMaterializedPostgreSQL::createNestedIfNeeded(PostgreSQLTableStructur /// Save storage_id with correct uuid. nested_table_id = nested_storage->getStorageID(); } - catch (Exception & e) + catch (...) { - e.addMessage("while creating nested table: {}", tmp_nested_table_id.getNameForLogs()); tryLogCurrentException(__PRETTY_FUNCTION__); + throw; } } @@ -362,12 +363,31 @@ ASTPtr StorageMaterializedPostgreSQL::getColumnDeclaration(const DataTypePtr & d } +std::shared_ptr StorageMaterializedPostgreSQL::getColumnsExpressionList(const NamesAndTypesList & columns) const +{ + auto columns_expression_list = std::make_shared(); + for (const auto & [name, type] : columns) + { + const auto & column_declaration = std::make_shared(); + + column_declaration->name = name; + column_declaration->type = getColumnDeclaration(type); + + columns_expression_list->children.emplace_back(column_declaration); + } + return columns_expression_list; +} + + /// For single storage MaterializedPostgreSQL get columns and primary key columns from storage definition. /// For database engine MaterializedPostgreSQL get columns and primary key columns by fetching from PostgreSQL, also using the same /// transaction with snapshot, which is used for initial tables dump. -ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery(PostgreSQLTableStructurePtr table_structure) +ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery( + PostgreSQLTableStructurePtr table_structure, const ASTTableOverride * table_override) { auto create_table_query = std::make_shared(); + if (table_override) + table_override->applyToCreateTableQuery(create_table_query.get()); auto table_id = getStorageID(); create_table_query->setTable(getNestedTableName()); @@ -375,40 +395,86 @@ ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery(PostgreSQLTableS if (is_materialized_postgresql_database) create_table_query->uuid = table_id.uuid; + auto storage = std::make_shared(); + storage->set(storage->engine, makeASTFunction("ReplacingMergeTree", std::make_shared("_version"))); + auto columns_declare_list = std::make_shared(); - auto columns_expression_list = std::make_shared(); auto order_by_expression = std::make_shared(); auto metadata_snapshot = getInMemoryMetadataPtr(); - const auto & columns = metadata_snapshot->getColumns(); + + ConstraintsDescription constraints; NamesAndTypesList ordinary_columns_and_types; - if (!is_materialized_postgresql_database) + if (is_materialized_postgresql_database) { - ordinary_columns_and_types = columns.getOrdinary(); - } - else - { - if (!table_structure) + if (!table_structure && !table_override) { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "No table structure returned for table {}.{}", table_id.database_name, table_id.table_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No table structure returned for table {}.{}", + table_id.database_name, table_id.table_name); } - if (!table_structure->columns) + if (!table_structure->columns && (!table_override || !table_override->columns)) { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "No columns returned for table {}.{}", table_id.database_name, table_id.table_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No columns returned for table {}.{}", + table_id.database_name, table_id.table_name); } - ordinary_columns_and_types = *table_structure->columns; + bool has_order_by_override = table_override && table_override->storage && table_override->storage->order_by; + if (has_order_by_override && !table_structure->replica_identity_columns) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Having PRIMARY KEY OVERRIDE is allowed only if there is " + "replica identity index for PostgreSQL table. (table {}.{})", + table_id.database_name, table_id.table_name); + } if (!table_structure->primary_key_columns && !table_structure->replica_identity_columns) { throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Table {}.{} has no primary key and no replica identity index", table_id.database_name, table_id.table_name); + "Table {}.{} has no primary key and no replica identity index", + table_id.database_name, table_id.table_name); } + if (table_override && table_override->columns) + { + table_override->applyToCreateTableQuery(create_table_query.get()); + if (table_override->columns) + { + auto children = table_override->columns->children; + const auto & columns = children[0]->as(); + if (columns) + { + for (const auto & child : columns->children) + { + const auto * column_declaration = child->as(); + auto type = DataTypeFactory::instance().get(column_declaration->type); + ordinary_columns_and_types.emplace_back(NameAndTypePair(column_declaration->name, type)); + } + } + + columns_declare_list->set(columns_declare_list->columns, children[0]); + } + else + { + ordinary_columns_and_types = *table_structure->columns; + columns_declare_list->set(columns_declare_list->columns, getColumnsExpressionList(ordinary_columns_and_types)); + } + + auto columns = table_override->columns; + if (columns && columns->constraints) + constraints = ConstraintsDescription(columns->constraints->children); + } + else + { + ordinary_columns_and_types = *table_structure->columns; + columns_declare_list->set(columns_declare_list->columns, getColumnsExpressionList(ordinary_columns_and_types)); + } + + if (ordinary_columns_and_types.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Table {}.{} has no columns", table_id.database_name, table_id.table_name); + NamesAndTypesList merging_columns; if (table_structure->primary_key_columns) merging_columns = *table_structure->primary_key_columns; @@ -417,39 +483,28 @@ ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery(PostgreSQLTableS order_by_expression->name = "tuple"; order_by_expression->arguments = std::make_shared(); - for (const auto & column : merging_columns) order_by_expression->arguments->children.emplace_back(std::make_shared(column.name)); - } - for (const auto & [name, type] : ordinary_columns_and_types) + storage->set(storage->order_by, order_by_expression); + } + else { - const auto & column_declaration = std::make_shared(); + ordinary_columns_and_types = metadata_snapshot->getColumns().getOrdinary(); + columns_declare_list->set(columns_declare_list->columns, getColumnsExpressionList(ordinary_columns_and_types)); - column_declaration->name = name; - column_declaration->type = getColumnDeclaration(type); + auto primary_key_ast = metadata_snapshot->getPrimaryKeyAST(); + if (!primary_key_ast) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage MaterializedPostgreSQL must have primary key"); + storage->set(storage->order_by, primary_key_ast); - columns_expression_list->children.emplace_back(column_declaration); + constraints = metadata_snapshot->getConstraints(); } - columns_declare_list->set(columns_declare_list->columns, columns_expression_list); - columns_declare_list->columns->children.emplace_back(getMaterializedColumnsDeclaration("_sign", "Int8", 1)); columns_declare_list->columns->children.emplace_back(getMaterializedColumnsDeclaration("_version", "UInt64", 1)); - create_table_query->set(create_table_query->columns_list, columns_declare_list); - /// Not nullptr for single storage (because throws exception if not specified), nullptr otherwise. - auto primary_key_ast = getInMemoryMetadataPtr()->getPrimaryKeyAST(); - - auto storage = std::make_shared(); - storage->set(storage->engine, makeASTFunction("ReplacingMergeTree", std::make_shared("_version"))); - - if (primary_key_ast) - storage->set(storage->order_by, primary_key_ast); - else - storage->set(storage->order_by, order_by_expression); - create_table_query->set(create_table_query->storage, storage); /// Add columns _sign and _version, so that they can be accessed from nested ReplacingMergeTree table if needed. @@ -458,8 +513,7 @@ ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery(PostgreSQLTableS StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription(ordinary_columns_and_types)); - storage_metadata.setConstraints(metadata_snapshot->getConstraints()); - + storage_metadata.setConstraints(constraints); setInMemoryMetadata(storage_metadata); return create_table_query; diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h index 10724fb9bf0..9e11f314738 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h @@ -99,7 +99,11 @@ public: /// only once - when nested table is successfully created and is never changed afterwards. bool hasNested() { return has_nested.load(); } - void createNestedIfNeeded(PostgreSQLTableStructurePtr table_structure); + void createNestedIfNeeded(PostgreSQLTableStructurePtr table_structure, const ASTTableOverride * table_override); + + ASTPtr getCreateNestedTableQuery(PostgreSQLTableStructurePtr table_structure, const ASTTableOverride * table_override); + + std::shared_ptr getColumnsExpressionList(const NamesAndTypesList & columns) const; StoragePtr getNested() const; @@ -120,8 +124,6 @@ public: bool supportsFinal() const override { return true; } - ASTPtr getCreateNestedTableQuery(PostgreSQLTableStructurePtr table_structure); - protected: StorageMaterializedPostgreSQL( const StorageID & table_id_, diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 99f2facbaf6..3bf494aa957 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -34,6 +34,10 @@ postgres_table_template_4 = """ CREATE TABLE IF NOT EXISTS "{}"."{}" ( key Integer NOT NULL, value Integer, PRIMARY KEY(key)) """ +postgres_table_template_5 = """ + CREATE TABLE IF NOT EXISTS "{}" ( + key Integer NOT NULL, value UUID, PRIMARY KEY(key)) + """ def get_postgres_conn(ip, port, database=False, auto_commit=True, database_name='postgres_database', replication=False): if database == True: @@ -93,7 +97,7 @@ def drop_clickhouse_postgres_db(name='postgres_database'): def create_materialized_db(ip, port, materialized_database='test_database', postgres_database='postgres_database', - settings=[]): + settings=[], table_overrides=''): instance.query(f"DROP DATABASE IF EXISTS {materialized_database}") create_query = f"CREATE DATABASE {materialized_database} ENGINE = MaterializedPostgreSQL('{ip}:{port}', '{postgres_database}', 'postgres', 'mysecretpassword')" if len(settings) > 0: @@ -102,6 +106,7 @@ def create_materialized_db(ip, port, if i != 0: create_query += ', ' create_query += settings[i] + create_query += table_overrides instance.query(create_query) assert materialized_database in instance.query('SHOW DATABASES') @@ -560,6 +565,30 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster): drop_materialized_db() +def test_table_override(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) + cursor = conn.cursor() + table_name = 'table_override' + materialized_database = 'test_database' + create_postgres_table(cursor, table_name, template=postgres_table_template_5); + instance.query(f"create table {table_name}(key Int32, value UUID) engine = PostgreSQL (postgres1, table={table_name})") + instance.query(f"insert into {table_name} select number, generateUUIDv4() from numbers(10)") + table_overrides = f" TABLE OVERRIDE {table_name} (COLUMNS (key Int32, value UUID))" + create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, + settings=[f"materialized_postgresql_tables_list = '{table_name}'"], table_overrides=table_overrides) + assert_nested_table_is_created(table_name, materialized_database) + result = instance.query(f"show create table {materialized_database}.{table_name}") + print(result) + expected = "CREATE TABLE test_database.table_override\\n(\\n `key` Int32,\\n `value` UUID,\\n `_sign` Int8() MATERIALIZED 1,\\n `_version` UInt64() MATERIALIZED 1\\n)\\nENGINE = ReplacingMergeTree(_version)\\nORDER BY tuple(key)" + assert(result.strip() == expected) + time.sleep(5) + result = instance.query(f"select * from {materialized_database}.{table_name} order by key") + expected = instance.query(f"select * from {table_name} order by key") + assert(result == expected) + drop_materialized_db() + drop_postgres_table(cursor, table_name) + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") From ac3cb8c12b4f75fbd277a76b6450a7322d4c515d Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 15 Dec 2021 15:55:28 +0300 Subject: [PATCH 14/37] CacheDictionary dictionary source access race fix --- src/Dictionaries/CacheDictionary.cpp | 4 +-- src/Dictionaries/CacheDictionary.h | 6 ++--- src/Dictionaries/CassandraDictionarySource.h | 2 +- .../ClickHouseDictionarySource.cpp | 1 - src/Dictionaries/ClickHouseDictionarySource.h | 2 +- src/Dictionaries/DirectDictionary.h | 2 +- .../ExecutableDictionarySource.cpp | 2 +- .../ExecutablePoolDictionarySource.cpp | 2 +- src/Dictionaries/ExternalQueryBuilder.cpp | 15 ++++++++--- src/Dictionaries/FileDictionarySource.h | 2 +- src/Dictionaries/FlatDictionary.h | 2 +- src/Dictionaries/HTTPDictionarySource.cpp | 2 +- src/Dictionaries/HashedArrayDictionary.h | 2 +- src/Dictionaries/HashedDictionary.h | 2 +- src/Dictionaries/IDictionary.h | 25 ++++++++++--------- src/Dictionaries/IDictionarySource.h | 3 +-- src/Dictionaries/IPAddressDictionary.h | 2 +- src/Dictionaries/LibraryDictionarySource.cpp | 2 +- src/Dictionaries/MongoDBDictionarySource.h | 2 +- src/Dictionaries/MySQLDictionarySource.cpp | 2 +- src/Dictionaries/PolygonDictionary.h | 2 +- .../PostgreSQLDictionarySource.cpp | 2 +- src/Dictionaries/RangeHashedDictionary.h | 2 +- src/Dictionaries/RedisDictionarySource.h | 2 +- src/Dictionaries/XDBCDictionarySource.cpp | 2 +- src/Dictionaries/writeParenthesisedString.cpp | 12 --------- src/Dictionaries/writeParenthesisedString.h | 11 -------- src/Functions/FunctionsExternalDictionaries.h | 10 +++++--- 28 files changed, 56 insertions(+), 69 deletions(-) delete mode 100644 src/Dictionaries/writeParenthesisedString.cpp delete mode 100644 src/Dictionaries/writeParenthesisedString.h diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index 723457fba5b..5b9d5e37b47 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -110,12 +110,12 @@ std::exception_ptr CacheDictionary::getLastException() cons } template -const IDictionarySource * CacheDictionary::getSource() const +DictionarySourcePtr CacheDictionary::getSource() const { /// Mutex required here because of the getSourceAndUpdateIfNeeded() function /// which is used from another thread. std::lock_guard lock(source_mutex); - return source_ptr.get(); + return source_ptr; } template diff --git a/src/Dictionaries/CacheDictionary.h b/src/Dictionaries/CacheDictionary.h index d2c2ed9a212..de04e7e098b 100644 --- a/src/Dictionaries/CacheDictionary.h +++ b/src/Dictionaries/CacheDictionary.h @@ -104,7 +104,7 @@ public: allow_read_expired_keys); } - const IDictionarySource * getSource() const override; + DictionarySourcePtr getSource() const override; const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } @@ -172,7 +172,7 @@ private: /// MultiVersion is not used here because it works with constant pointers. /// For some reason almost all methods in IDictionarySource interface are /// not constant. - SharedDictionarySourcePtr getSourceAndUpdateIfNeeded() const + DictionarySourcePtr getSourceAndUpdateIfNeeded() const { std::lock_guard lock(source_mutex); if (error_count) @@ -190,7 +190,7 @@ private: /// Dictionary source should be used with mutex mutable std::mutex source_mutex; - mutable SharedDictionarySourcePtr source_ptr; + mutable DictionarySourcePtr source_ptr; CacheDictionaryStoragePtr cache_storage_ptr; mutable CacheDictionaryUpdateQueue update_queue; diff --git a/src/Dictionaries/CassandraDictionarySource.h b/src/Dictionaries/CassandraDictionarySource.h index 35419d3ea7d..76ad2316366 100644 --- a/src/Dictionaries/CassandraDictionarySource.h +++ b/src/Dictionaries/CassandraDictionarySource.h @@ -61,7 +61,7 @@ public: DictionarySourcePtr clone() const override { - return std::make_unique(dict_struct, configuration, sample_block); + return std::make_shared(dict_struct, configuration, sample_block); } Pipe loadIds(const std::vector & ids) override; diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index 1ddcdd96454..6abd5f317e2 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -17,7 +17,6 @@ #include "DictionaryStructure.h" #include "ExternalQueryBuilder.h" #include "readInvalidateQuery.h" -#include "writeParenthesisedString.h" #include "DictionaryFactory.h" #include "DictionarySourceHelpers.h" diff --git a/src/Dictionaries/ClickHouseDictionarySource.h b/src/Dictionaries/ClickHouseDictionarySource.h index be09fa415fd..cdcc0ee824f 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.h +++ b/src/Dictionaries/ClickHouseDictionarySource.h @@ -60,7 +60,7 @@ public: bool hasUpdateField() const override; - DictionarySourcePtr clone() const override { return std::make_unique(*this); } + DictionarySourcePtr clone() const override { return std::make_shared(*this); } std::string toString() const override; diff --git a/src/Dictionaries/DirectDictionary.h b/src/Dictionaries/DirectDictionary.h index edf4c8d1d9a..4bf24e6ae98 100644 --- a/src/Dictionaries/DirectDictionary.h +++ b/src/Dictionaries/DirectDictionary.h @@ -58,7 +58,7 @@ public: return std::make_shared(getDictionaryID(), dict_struct, source_ptr->clone()); } - const IDictionarySource * getSource() const override { return source_ptr.get(); } + DictionarySourcePtr getSource() const override { return source_ptr; } const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp index c09993c2a84..5816b942d36 100644 --- a/src/Dictionaries/ExecutableDictionarySource.cpp +++ b/src/Dictionaries/ExecutableDictionarySource.cpp @@ -158,7 +158,7 @@ bool ExecutableDictionarySource::hasUpdateField() const DictionarySourcePtr ExecutableDictionarySource::clone() const { - return std::make_unique(*this); + return std::make_shared(*this); } std::string ExecutableDictionarySource::toString() const diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.cpp b/src/Dictionaries/ExecutablePoolDictionarySource.cpp index dce2ce94b93..01be40412b6 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp +++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp @@ -152,7 +152,7 @@ bool ExecutablePoolDictionarySource::hasUpdateField() const DictionarySourcePtr ExecutablePoolDictionarySource::clone() const { - return std::make_unique(*this); + return std::make_shared(*this); } std::string ExecutablePoolDictionarySource::toString() const diff --git a/src/Dictionaries/ExternalQueryBuilder.cpp b/src/Dictionaries/ExternalQueryBuilder.cpp index f513c7b2f61..1701f08fd67 100644 --- a/src/Dictionaries/ExternalQueryBuilder.cpp +++ b/src/Dictionaries/ExternalQueryBuilder.cpp @@ -1,14 +1,23 @@ #include "ExternalQueryBuilder.h" + +#include + #include #include #include -#include -#include "DictionaryStructure.h" -#include "writeParenthesisedString.h" +#include namespace DB { + +static inline void writeParenthesisedString(const String & s, WriteBuffer & buf) +{ + writeChar('(', buf); + writeString(s, buf); + writeChar(')', buf); +} + namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; diff --git a/src/Dictionaries/FileDictionarySource.h b/src/Dictionaries/FileDictionarySource.h index c8e37986b2f..8fe2d87d8b9 100644 --- a/src/Dictionaries/FileDictionarySource.h +++ b/src/Dictionaries/FileDictionarySource.h @@ -51,7 +51,7 @@ public: ///Not supported for FileDictionarySource bool hasUpdateField() const override { return false; } - DictionarySourcePtr clone() const override { return std::make_unique(*this); } + DictionarySourcePtr clone() const override { return std::make_shared(*this); } std::string toString() const override; diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h index 5c3a1d634d8..308cd72d55b 100644 --- a/src/Dictionaries/FlatDictionary.h +++ b/src/Dictionaries/FlatDictionary.h @@ -61,7 +61,7 @@ public: return std::make_shared(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, configuration, update_field_loaded_block); } - const IDictionarySource * getSource() const override { return source_ptr.get(); } + DictionarySourcePtr getSource() const override { return source_ptr; } const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp index aba6b40f206..308570644d1 100644 --- a/src/Dictionaries/HTTPDictionarySource.cpp +++ b/src/Dictionaries/HTTPDictionarySource.cpp @@ -207,7 +207,7 @@ bool HTTPDictionarySource::hasUpdateField() const DictionarySourcePtr HTTPDictionarySource::clone() const { - return std::make_unique(*this); + return std::make_shared(*this); } std::string HTTPDictionarySource::toString() const diff --git a/src/Dictionaries/HashedArrayDictionary.h b/src/Dictionaries/HashedArrayDictionary.h index ca5d7cb1bf6..0d07c43477a 100644 --- a/src/Dictionaries/HashedArrayDictionary.h +++ b/src/Dictionaries/HashedArrayDictionary.h @@ -71,7 +71,7 @@ public: return std::make_shared>(getDictionaryID(), dict_struct, source_ptr->clone(), configuration, update_field_loaded_block); } - const IDictionarySource * getSource() const override { return source_ptr.get(); } + DictionarySourcePtr getSource() const override { return source_ptr; } const DictionaryLifetime & getLifetime() const override { return configuration.lifetime; } diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h index 16be4e4c73e..6f63c5ec546 100644 --- a/src/Dictionaries/HashedDictionary.h +++ b/src/Dictionaries/HashedDictionary.h @@ -78,7 +78,7 @@ public: return std::make_shared>(getDictionaryID(), dict_struct, source_ptr->clone(), configuration, update_field_loaded_block); } - const IDictionarySource * getSource() const override { return source_ptr.get(); } + DictionarySourcePtr getSource() const override { return source_ptr; } const DictionaryLifetime & getLifetime() const override { return configuration.lifetime; } diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h index 66e35c8fa12..b1923306003 100644 --- a/src/Dictionaries/IDictionary.h +++ b/src/Dictionaries/IDictionary.h @@ -1,16 +1,16 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include - #include #include +#include +#include +#include +#include +#include +#include +#include + namespace DB { @@ -19,7 +19,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -struct IDictionary; +class IDictionary; using DictionaryPtr = std::unique_ptr; /** DictionaryKeyType provides IDictionary client information about @@ -47,8 +47,9 @@ enum class DictionarySpecialKeyType /** * Base class for Dictionaries implementation. */ -struct IDictionary : public IExternalLoadable +class IDictionary : public IExternalLoadable { +public: explicit IDictionary(const StorageID & dictionary_id_) : dictionary_id(dictionary_id_) , full_name(dictionary_id.getInternalDictionaryName()) @@ -99,7 +100,7 @@ struct IDictionary : public IExternalLoadable virtual double getLoadFactor() const = 0; - virtual const IDictionarySource * getSource() const = 0; + virtual DictionarySourcePtr getSource() const = 0; virtual const DictionaryStructure & getStructure() const = 0; @@ -200,7 +201,7 @@ struct IDictionary : public IExternalLoadable bool isModified() const override { - const auto * source = getSource(); + const auto source = getSource(); return source && source->isModified(); } diff --git a/src/Dictionaries/IDictionarySource.h b/src/Dictionaries/IDictionarySource.h index 5071b69d2bf..128595b815f 100644 --- a/src/Dictionaries/IDictionarySource.h +++ b/src/Dictionaries/IDictionarySource.h @@ -10,8 +10,7 @@ namespace DB { class IDictionarySource; -using DictionarySourcePtr = std::unique_ptr; -using SharedDictionarySourcePtr = std::shared_ptr; +using DictionarySourcePtr = std::shared_ptr; /** Data-provider interface for external dictionaries, * abstracts out the data source (file, MySQL, ClickHouse, external program, network request et cetera) diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h index ed0d8692d21..9f604b5aeb8 100644 --- a/src/Dictionaries/IPAddressDictionary.h +++ b/src/Dictionaries/IPAddressDictionary.h @@ -56,7 +56,7 @@ public: return std::make_shared(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty); } - const IDictionarySource * getSource() const override { return source_ptr.get(); } + DictionarySourcePtr getSource() const override { return source_ptr; } const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } diff --git a/src/Dictionaries/LibraryDictionarySource.cpp b/src/Dictionaries/LibraryDictionarySource.cpp index 42683fb884c..b79ee9be59a 100644 --- a/src/Dictionaries/LibraryDictionarySource.cpp +++ b/src/Dictionaries/LibraryDictionarySource.cpp @@ -129,7 +129,7 @@ Pipe LibraryDictionarySource::loadKeys(const Columns & key_columns, const std::v DictionarySourcePtr LibraryDictionarySource::clone() const { - return std::make_unique(*this); + return std::make_shared(*this); } diff --git a/src/Dictionaries/MongoDBDictionarySource.h b/src/Dictionaries/MongoDBDictionarySource.h index 3625deca9c6..85531f89902 100644 --- a/src/Dictionaries/MongoDBDictionarySource.h +++ b/src/Dictionaries/MongoDBDictionarySource.h @@ -65,7 +65,7 @@ public: ///Not yet supported bool hasUpdateField() const override { return false; } - DictionarySourcePtr clone() const override { return std::make_unique(*this); } + DictionarySourcePtr clone() const override { return std::make_shared(*this); } std::string toString() const override; diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index f6de6ca0cc1..18b4c512f2f 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -225,7 +225,7 @@ bool MySQLDictionarySource::hasUpdateField() const DictionarySourcePtr MySQLDictionarySource::clone() const { - return std::make_unique(*this); + return std::make_shared(*this); } std::string MySQLDictionarySource::toString() const diff --git a/src/Dictionaries/PolygonDictionary.h b/src/Dictionaries/PolygonDictionary.h index 346160c342f..762c136b8e0 100644 --- a/src/Dictionaries/PolygonDictionary.h +++ b/src/Dictionaries/PolygonDictionary.h @@ -87,7 +87,7 @@ public: double getLoadFactor() const override { return 1.0; } - const IDictionarySource * getSource() const override { return source_ptr.get(); } + DictionarySourcePtr getSource() const override { return source_ptr; } const DictionaryStructure & getStructure() const override { return dict_struct; } diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index c9fb8b86b77..0ac84b35048 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -161,7 +161,7 @@ bool PostgreSQLDictionarySource::supportsSelectiveLoad() const DictionarySourcePtr PostgreSQLDictionarySource::clone() const { - return std::make_unique(*this); + return std::make_shared(*this); } diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index 1605e2bab81..fca72d5d7cc 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -67,7 +67,7 @@ public: return std::make_shared(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, update_field_loaded_block); } - const IDictionarySource * getSource() const override { return source_ptr.get(); } + DictionarySourcePtr getSource() const override { return source_ptr; } const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } diff --git a/src/Dictionaries/RedisDictionarySource.h b/src/Dictionaries/RedisDictionarySource.h index 053094e2303..eff97dede0c 100644 --- a/src/Dictionaries/RedisDictionarySource.h +++ b/src/Dictionaries/RedisDictionarySource.h @@ -76,7 +76,7 @@ namespace ErrorCodes bool hasUpdateField() const override { return false; } - DictionarySourcePtr clone() const override { return std::make_unique(*this); } + DictionarySourcePtr clone() const override { return std::make_shared(*this); } std::string toString() const override; diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp index f827c0cd8d0..89cf18d530b 100644 --- a/src/Dictionaries/XDBCDictionarySource.cpp +++ b/src/Dictionaries/XDBCDictionarySource.cpp @@ -162,7 +162,7 @@ bool XDBCDictionarySource::hasUpdateField() const DictionarySourcePtr XDBCDictionarySource::clone() const { - return std::make_unique(*this); + return std::make_shared(*this); } diff --git a/src/Dictionaries/writeParenthesisedString.cpp b/src/Dictionaries/writeParenthesisedString.cpp deleted file mode 100644 index 5e237aa1e6c..00000000000 --- a/src/Dictionaries/writeParenthesisedString.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "writeParenthesisedString.h" - -namespace DB -{ -void writeParenthesisedString(const String & s, WriteBuffer & buf) -{ - writeChar('(', buf); - writeString(s, buf); - writeChar(')', buf); -} - -} diff --git a/src/Dictionaries/writeParenthesisedString.h b/src/Dictionaries/writeParenthesisedString.h deleted file mode 100644 index ec61e944d38..00000000000 --- a/src/Dictionaries/writeParenthesisedString.h +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once - -#include - - -namespace DB -{ -void writeParenthesisedString(const String & s, WriteBuffer & buf); - - -} diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index c52d54f30aa..71597f2b433 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -68,11 +68,12 @@ public: std::shared_ptr getDictionary(const String & dictionary_name) { - auto dict = getContext()->getExternalDictionariesLoader().getDictionary(dictionary_name, getContext()); + auto current_context = getContext(); + auto dict = current_context->getExternalDictionariesLoader().getDictionary(dictionary_name, current_context); if (!access_checked) { - getContext()->checkAccess(AccessType::dictGet, dict->getDatabaseOrNoDatabaseTag(), dict->getDictionaryID().getTableName()); + current_context->checkAccess(AccessType::dictGet, dict->getDatabaseOrNoDatabaseTag(), dict->getDictionaryID().getTableName()); access_checked = true; } @@ -106,8 +107,9 @@ public: if (!attr_name_col) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument of function dictGet must be a constant string"); - const auto dictionary_name = dict_name_col->getValue(); - const auto attribute_name = attr_name_col->getValue(); + const auto & dictionary_name = dict_name_col->getValue(); + const auto & attribute_name = attr_name_col->getValue(); + return getDictionary(dictionary_name)->isInjective(attribute_name); } From 7583c8007e2d0822572e07206d05da4ed538f430 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 16 Dec 2021 00:38:46 +0300 Subject: [PATCH 15/37] Some review fixes --- src/Common/getTableOverride.cpp | 27 +++++++++++++++++++ src/Common/getTableOverride.h | 8 ++++++ .../MySQL/InterpretersMySQLDDLQuery.cpp | 3 ++- src/Parsers/ASTTableOverrides.cpp | 18 ------------- src/Parsers/ASTTableOverrides.h | 2 -- .../PostgreSQLReplicationHandler.cpp | 11 ++++---- .../StorageMaterializedPostgreSQL.cpp | 13 ++++++--- .../test.py | 4 +-- 8 files changed, 55 insertions(+), 31 deletions(-) create mode 100644 src/Common/getTableOverride.cpp create mode 100644 src/Common/getTableOverride.h diff --git a/src/Common/getTableOverride.cpp b/src/Common/getTableOverride.cpp new file mode 100644 index 00000000000..fedcad85b3f --- /dev/null +++ b/src/Common/getTableOverride.cpp @@ -0,0 +1,27 @@ +#include "getTableOverride.h" + +#include +#include +#include +#include + +namespace DB +{ + +ASTPtr tryGetTableOverride(const String & mapped_database, const String & table) +{ + if (auto database_ptr = DatabaseCatalog::instance().tryGetDatabase(mapped_database)) + { + auto create_query = database_ptr->getCreateDatabaseQuery(); + if (auto create_database_query = create_query->as()) + { + if (create_database_query->table_overrides) + { + return create_database_query->table_overrides->tryGetTableOverride(table); + } + } + } + return nullptr; +} + +} diff --git a/src/Common/getTableOverride.h b/src/Common/getTableOverride.h new file mode 100644 index 00000000000..1a0a15e6fe2 --- /dev/null +++ b/src/Common/getTableOverride.h @@ -0,0 +1,8 @@ +#pragma once +#include +#include + +namespace DB +{ +ASTPtr tryGetTableOverride(const String & mapped_database, const String & table); +} diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index 798283c6f50..e5c4a85dc27 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -519,7 +520,7 @@ ASTs InterpreterCreateImpl::getRewrittenQueries( rewritten_query->set(rewritten_query->storage, storage); rewritten_query->set(rewritten_query->columns_list, columns); - if (auto table_override = ASTTableOverride::tryGetTableOverride(mapped_to_database, create_query.table)) + if (auto table_override = tryGetTableOverride(mapped_to_database, create_query.table)) { auto * override_ast = table_override->as(); override_ast->applyToCreateTableQuery(rewritten_query.get()); diff --git a/src/Parsers/ASTTableOverrides.cpp b/src/Parsers/ASTTableOverrides.cpp index ee351ab3eb3..0270c2cad82 100644 --- a/src/Parsers/ASTTableOverrides.cpp +++ b/src/Parsers/ASTTableOverrides.cpp @@ -6,8 +6,6 @@ #include #include #include -#include -#include namespace DB { @@ -73,22 +71,6 @@ void ASTTableOverride::formatImpl(const FormatSettings & settings_, FormatState settings.ostr << nl_or_nothing << ')'; } -ASTPtr ASTTableOverride::tryGetTableOverride(const String & mapped_database, const String & table) -{ - if (auto database_ptr = DatabaseCatalog::instance().tryGetDatabase(mapped_database)) - { - auto create_query = database_ptr->getCreateDatabaseQuery(); - if (auto create_database_query = create_query->as()) - { - if (create_database_query->table_overrides) - { - return create_database_query->table_overrides->tryGetTableOverride(table); - } - } - } - return nullptr; -} - void ASTTableOverride::applyToCreateTableQuery(ASTCreateQuery * create_query) const { if (columns) diff --git a/src/Parsers/ASTTableOverrides.h b/src/Parsers/ASTTableOverrides.h index a34b3619b2e..7a84ed25121 100644 --- a/src/Parsers/ASTTableOverrides.h +++ b/src/Parsers/ASTTableOverrides.h @@ -27,8 +27,6 @@ public: ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; void applyToCreateTableQuery(ASTCreateQuery * create_query) const; - - static ASTPtr tryGetTableOverride(const String & mapped_database, const String & table); }; /// List of table overrides, for example: diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index d706615b762..dcd64314028 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -1,5 +1,7 @@ #include "PostgreSQLReplicationHandler.h" +#include +#include #include #include #include @@ -8,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -281,8 +282,8 @@ ASTPtr PostgreSQLReplicationHandler::getCreateNestedTableQuery(StorageMaterializ if (!table_structure) throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to get PostgreSQL table structure"); - auto table_override = ASTTableOverride::tryGetTableOverride(current_database_name, table_name); - return storage->getCreateNestedTableQuery(std::move(table_structure), table_override->as()); + auto table_override = tryGetTableOverride(current_database_name, table_name); + return storage->getCreateNestedTableQuery(std::move(table_structure), table_override ? table_override->as() : nullptr); } @@ -300,8 +301,8 @@ StoragePtr PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection & query_str = fmt::format("SELECT * FROM {}", quoted_name); LOG_DEBUG(log, "Loading PostgreSQL table {}.{}", postgres_database, quoted_name); - auto table_override = ASTTableOverride::tryGetTableOverride(current_database_name, table_name); - materialized_storage->createNestedIfNeeded(fetchTableStructure(*tx, table_name), table_override->as()); + auto table_override = tryGetTableOverride(current_database_name, table_name); + materialized_storage->createNestedIfNeeded(fetchTableStructure(*tx, table_name), table_override ? table_override->as() : nullptr); auto nested_storage = materialized_storage->getNested(); auto insert = std::make_shared(); diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index a680792acad..ae9ade32ef0 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -2,29 +2,37 @@ #if USE_LIBPQXX #include + #include #include #include + #include +#include + #include #include #include #include #include + #include #include + #include #include #include #include -#include + #include #include #include + #include #include #include -#include + +#include namespace DB @@ -438,7 +446,6 @@ ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery( if (table_override && table_override->columns) { - table_override->applyToCreateTableQuery(create_table_query.get()); if (table_override->columns) { auto children = table_override->columns->children; diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 3bf494aa957..c8b63d8e667 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -582,9 +582,9 @@ def test_table_override(started_cluster): expected = "CREATE TABLE test_database.table_override\\n(\\n `key` Int32,\\n `value` UUID,\\n `_sign` Int8() MATERIALIZED 1,\\n `_version` UInt64() MATERIALIZED 1\\n)\\nENGINE = ReplacingMergeTree(_version)\\nORDER BY tuple(key)" assert(result.strip() == expected) time.sleep(5) - result = instance.query(f"select * from {materialized_database}.{table_name} order by key") + query = f"select * from {materialized_database}.{table_name} order by key" expected = instance.query(f"select * from {table_name} order by key") - assert(result == expected) + assert_eq_with_retry(instance, query, expected) drop_materialized_db() drop_postgres_table(cursor, table_name) From f515f8d98827a319f2938fe07603af7f75de9b9d Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Wed, 15 Dec 2021 20:13:08 -0400 Subject: [PATCH 16/37] test for summap_nullable 0 --- tests/queries/0_stateless/01634_summap_nullable.reference | 2 ++ tests/queries/0_stateless/01634_summap_nullable.sql | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 tests/queries/0_stateless/01634_summap_nullable.reference create mode 100644 tests/queries/0_stateless/01634_summap_nullable.sql diff --git a/tests/queries/0_stateless/01634_summap_nullable.reference b/tests/queries/0_stateless/01634_summap_nullable.reference new file mode 100644 index 00000000000..babed7df00d --- /dev/null +++ b/tests/queries/0_stateless/01634_summap_nullable.reference @@ -0,0 +1,2 @@ +(['a'],[1]) +(['a','b'],[1,0]) diff --git a/tests/queries/0_stateless/01634_summap_nullable.sql b/tests/queries/0_stateless/01634_summap_nullable.sql new file mode 100644 index 00000000000..226da645e9f --- /dev/null +++ b/tests/queries/0_stateless/01634_summap_nullable.sql @@ -0,0 +1,2 @@ +SELECT sumMap(['a', 'b'], [1, NULL]); +SELECT sumMap(['a', 'b'], [1, toNullable(0)]); From c8a92c046f95216e68ed3ba66b8f5c1eaedd8a1d Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 17 Dec 2021 20:36:37 +0300 Subject: [PATCH 17/37] Another try --- src/Interpreters/SelectQueryOptions.h | 9 +++++++++ src/Interpreters/TreeRewriter.cpp | 2 +- src/Storages/ProjectionsDescription.cpp | 6 ++++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h index 709ecdc239c..bc95a940c18 100644 --- a/src/Interpreters/SelectQueryOptions.h +++ b/src/Interpreters/SelectQueryOptions.h @@ -41,6 +41,9 @@ struct SelectQueryOptions /// It is needed because lazy normal projections require special planning in FetchColumns stage, such as adding WHERE transform. /// It is also used to avoid adding aggregating step when aggregate projection is chosen. bool is_projection_query = false; + /// This flag is needed for projection description. + /// Otherwise, keys for GROUP BY may be removed as constants. + bool ignore_ast_optimizations = false; bool ignore_alias = false; bool is_internal = false; bool is_subquery = false; // non-subquery can also have subquery_depth > 0, e.g. insert select @@ -120,6 +123,12 @@ struct SelectQueryOptions return *this; } + SelectQueryOptions & ignoreASTOptimizationsAlias(bool value = true) + { + ignore_ast_optimizations = value; + return *this; + } + SelectQueryOptions & setInternal(bool value = false) { is_internal = value; diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 639d38d36f1..48ab2822c82 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1117,7 +1117,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( result.rewrite_subqueries = PredicateExpressionsOptimizer(getContext(), tables_with_columns, settings).optimize(*select_query); /// Only apply AST optimization for initial queries. - if (getContext()->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY) + if (getContext()->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY && !select_options.ignore_ast_optimizations) TreeOptimizer::apply(query, result, tables_with_columns, getContext()); /// array_join_alias_to_name, array_join_result_to_source. diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index f1a0372a07d..791583e2495 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -201,7 +201,7 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( select_expression_list->children.push_back(makeASTFunction("count")); select_query->setExpression(ASTProjectionSelectQuery::Expression::SELECT, std::move(select_expression_list)); - if (partition_columns) + if (partition_columns && !partition_columns->children.empty()) select_query->setExpression(ASTProjectionSelectQuery::Expression::GROUP_BY, partition_columns->clone()); result.definition_ast = select_query; @@ -211,7 +211,9 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( auto external_storage_holder = std::make_shared(query_context, columns, ConstraintsDescription{}); StoragePtr storage = external_storage_holder->getTable(); InterpreterSelectQuery select( - result.query_ast, query_context, storage, {}, SelectQueryOptions{QueryProcessingStage::WithMergeableState}.modify().ignoreAlias()); + result.query_ast, query_context, storage, {}, + /// Here we ignore ast optimizations because otherwise aggregation keys may be removed from result header as constants. + SelectQueryOptions{QueryProcessingStage::WithMergeableState}.modify().ignoreAlias().ignoreASTOptimizationsAlias()); result.required_columns = select.getRequiredColumns(); result.sample_block = select.getSampleBlock(); From e6ebb55c4e5d14adc4a41d818e6f03407947fd6b Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 17 Dec 2021 22:19:10 +0300 Subject: [PATCH 18/37] Move to Interpreters --- src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp | 2 +- src/{Common => Interpreters}/getTableOverride.cpp | 0 src/{Common => Interpreters}/getTableOverride.h | 0 src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp | 2 +- 4 files changed, 2 insertions(+), 2 deletions(-) rename src/{Common => Interpreters}/getTableOverride.cpp (100%) rename src/{Common => Interpreters}/getTableOverride.h (100%) diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index e5c4a85dc27..853d84a9695 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Common/getTableOverride.cpp b/src/Interpreters/getTableOverride.cpp similarity index 100% rename from src/Common/getTableOverride.cpp rename to src/Interpreters/getTableOverride.cpp diff --git a/src/Common/getTableOverride.h b/src/Interpreters/getTableOverride.h similarity index 100% rename from src/Common/getTableOverride.h rename to src/Interpreters/getTableOverride.h diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index dcd64314028..984a9cdd47a 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -1,12 +1,12 @@ #include "PostgreSQLReplicationHandler.h" #include -#include #include #include #include #include #include +#include #include #include #include From 708439b0364b46e89762f408e09df3e6101d0faa Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 16 Dec 2021 00:06:17 +0300 Subject: [PATCH 19/37] Support customized compression for input/output data in gRPC protocol. --- docker/test/integration/runner/Dockerfile | 1 + src/Server/GRPCServer.cpp | 109 +++++++++++++++---- src/Server/grpc_protos/clickhouse_grpc.proto | 23 ++++ tests/integration/test_grpc_protocol/test.py | 66 +++++++++++ 4 files changed, 179 insertions(+), 20 deletions(-) diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index a953a8a904a..eee974f2cc2 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -72,6 +72,7 @@ RUN python3 -m pip install \ grpcio-tools \ kafka-python \ kazoo \ + lz4 \ minio \ protobuf \ psycopg2-binary==2.8.6 \ diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 096194455b1..589bdd63f41 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -618,7 +619,11 @@ namespace ASTInsertQuery * insert_query = nullptr; String input_format; String input_data_delimiter; + PODArray output; String output_format; + CompressionMethod compression_method = CompressionMethod::None; + int compression_level = 0; + uint64_t interactive_delay = 100000; bool send_exception_with_stacktrace = true; bool input_function_is_used = false; @@ -635,8 +640,10 @@ namespace bool responder_finished = false; bool cancelled = false; - std::optional read_buffer; - std::optional write_buffer; + std::unique_ptr read_buffer; + std::unique_ptr write_buffer; + WriteBufferFromVector> * nested_write_buffer = nullptr; + WriteBuffer * compressing_write_buffer = nullptr; std::unique_ptr pipeline; std::unique_ptr pipeline_executor; std::shared_ptr output_format_processor; @@ -818,6 +825,10 @@ namespace if (output_format.empty()) output_format = query_context->getDefaultFormat(); + /// Choose compression. + compression_method = chooseCompressionMethod("", query_info.compression_type()); + compression_level = query_info.compression_level(); + /// Set callback to create and fill external tables query_context->setExternalTablesInitializer([this] (ContextPtr context) { @@ -891,7 +902,7 @@ namespace void Call::initializeBlockInputStream(const Block & header) { assert(!read_buffer); - read_buffer.emplace([this]() -> std::pair + read_buffer = std::make_unique([this]() -> std::pair { if (need_input_data_from_insert_query) { @@ -947,6 +958,8 @@ namespace return {nullptr, 0}; /// no more input data }); + read_buffer = wrapReadBufferWithCompressionMethod(std::move(read_buffer), compression_method); + assert(!pipeline); auto source = query_context->getInputFormat( input_format, *read_buffer, header, query_context->getSettings().max_insert_block_size); @@ -1030,7 +1043,10 @@ namespace /// The data will be written directly to the table. auto metadata_snapshot = storage->getInMemoryMetadataPtr(); auto sink = storage->write(ASTPtr(), metadata_snapshot, query_context); - ReadBufferFromMemory data(external_table.data().data(), external_table.data().size()); + + std::unique_ptr buf = std::make_unique(external_table.data().data(), external_table.data().size()); + buf = wrapReadBufferWithCompressionMethod(std::move(buf), chooseCompressionMethod("", external_table.compression_type())); + String format = external_table.format(); if (format.empty()) format = "TabSeparated"; @@ -1047,7 +1063,7 @@ namespace external_table_context->applySettingsChanges(settings_changes); } auto in = external_table_context->getInputFormat( - format, data, metadata_snapshot->getSampleBlock(), + format, *buf, metadata_snapshot->getSampleBlock(), external_table_context->getSettings().max_insert_block_size); QueryPipelineBuilder cur_pipeline; @@ -1101,7 +1117,18 @@ namespace if (io.pipeline.pulling()) header = io.pipeline.getHeader(); - write_buffer.emplace(*result.mutable_output()); + if (compression_method != CompressionMethod::None) + output.resize(DBMS_DEFAULT_BUFFER_SIZE); /// Must have enough space for compressed data. + write_buffer = std::make_unique>>(output); + nested_write_buffer = static_cast> *>(write_buffer.get()); + if (compression_method != CompressionMethod::None) + { + write_buffer = wrapWriteBufferWithCompressionMethod(std::move(write_buffer), compression_method, compression_level); + compressing_write_buffer = write_buffer.get(); + } + + auto has_output = [&] { return (nested_write_buffer->position() != output.data()) || (compressing_write_buffer && compressing_write_buffer->offset()); }; + output_format_processor = query_context->getOutputFormat(output_format, *write_buffer, header); Stopwatch after_send_progress; @@ -1143,8 +1170,7 @@ namespace addLogsToResult(); - bool has_output = write_buffer->offset(); - if (has_output || result.has_progress() || result.logs_size()) + if (has_output() || result.has_progress() || result.logs_size()) sendResult(); throwIfFailedToSendResult(); @@ -1164,13 +1190,11 @@ namespace auto executor = std::make_shared(io.pipeline); auto callback = [&]() -> bool { - throwIfFailedToSendResult(); addProgressToResult(); addLogsToResult(); - bool has_output = write_buffer->offset(); - if (has_output || result.has_progress() || result.logs_size()) + if (has_output() || result.has_progress() || result.logs_size()) sendResult(); throwIfFailedToSendResult(); @@ -1260,6 +1284,8 @@ namespace /// immediately after it receives our final result, and it's prohibited to have /// two queries executed at the same time with the same query ID or session ID. io.process_list_entry.reset(); + if (query_context) + query_context->setProcessListElement(nullptr); if (session) session->releaseSessionID(); } @@ -1272,6 +1298,8 @@ namespace output_format_processor.reset(); read_buffer.reset(); write_buffer.reset(); + nested_write_buffer = nullptr; + compressing_write_buffer = nullptr; io = {}; query_scope.reset(); query_context.reset(); @@ -1390,10 +1418,17 @@ namespace if (!totals) return; - WriteBufferFromString buf{*result.mutable_totals()}; - auto format = query_context->getOutputFormat(output_format, buf, totals); + PODArray memory; + if (compression_method != CompressionMethod::None) + memory.resize(DBMS_DEFAULT_BUFFER_SIZE); /// Must have enough space for compressed data. + std::unique_ptr buf = std::make_unique>>(memory); + buf = wrapWriteBufferWithCompressionMethod(std::move(buf), compression_method, compression_level); + auto format = query_context->getOutputFormat(output_format, *buf, totals); format->write(materializeBlock(totals)); format->finalize(); + buf->finalize(); + + result.mutable_totals()->assign(memory.data(), memory.size()); } void Call::addExtremesToResult(const Block & extremes) @@ -1401,10 +1436,17 @@ namespace if (!extremes) return; - WriteBufferFromString buf{*result.mutable_extremes()}; - auto format = query_context->getOutputFormat(output_format, buf, extremes); + PODArray memory; + if (compression_method != CompressionMethod::None) + memory.resize(DBMS_DEFAULT_BUFFER_SIZE); /// Must have enough space for compressed data. + std::unique_ptr buf = std::make_unique>>(memory); + buf = wrapWriteBufferWithCompressionMethod(std::move(buf), compression_method, compression_level); + auto format = query_context->getOutputFormat(output_format, *buf, extremes); format->write(materializeBlock(extremes)); format->finalize(); + buf->finalize(); + + result.mutable_extremes()->assign(memory.data(), memory.size()); } void Call::addProfileInfoToResult(const ProfileInfo & info) @@ -1475,6 +1517,38 @@ namespace if (!send_final_message && !isOutputStreaming(call_type)) return; + /// Copy output to `result.output`, with optional compressing. + if (write_buffer) + { + size_t output_size; + if (send_final_message) + { + if (compressing_write_buffer) + LOG_DEBUG(log, "Compressing final {} bytes", compressing_write_buffer->offset()); + write_buffer->finalize(); + output_size = output.size(); + } + else + { + if (compressing_write_buffer && compressing_write_buffer->offset()) + { + LOG_DEBUG(log, "Compressing {} bytes", compressing_write_buffer->offset()); + compressing_write_buffer->sync(); + } + output_size = nested_write_buffer->position() - output.data(); + } + + if (output_size) + { + result.mutable_output()->assign(output.data(), output_size); + nested_write_buffer->restart(); /// We're going to reuse the same buffer again for next block of data. + } + } + + if (!send_final_message && result.output().empty() && result.totals().empty() && result.extremes().empty() && !result.logs_size() + && !result.has_progress() && !result.has_stats() && !result.has_exception() && !result.cancelled()) + return; /// Nothing to send. + /// Wait for previous write to finish. /// (gRPC doesn't allow to start sending another result while the previous is still being sending.) if (sending_result.get()) @@ -1488,9 +1562,6 @@ namespace /// Start sending the result. LOG_DEBUG(log, "Sending {} result to the client: {}", (send_final_message ? "final" : "intermediate"), getResultDescription(result)); - if (write_buffer) - write_buffer->finalize(); - sending_result.set(true); auto callback = [this](bool ok) { @@ -1511,8 +1582,6 @@ namespace /// gRPC has already retrieved all data from `result`, so we don't have to keep it. result.Clear(); - if (write_buffer) - write_buffer->restart(); if (send_final_message) { diff --git a/src/Server/grpc_protos/clickhouse_grpc.proto b/src/Server/grpc_protos/clickhouse_grpc.proto index c6cafaf6e40..c86c74535c5 100644 --- a/src/Server/grpc_protos/clickhouse_grpc.proto +++ b/src/Server/grpc_protos/clickhouse_grpc.proto @@ -37,6 +37,10 @@ message ExternalTable { // Format of the data to insert to the external table. string format = 4; + // Compression type used to compress `data`. + // Supported values: none, gzip(gz), deflate, brotli(br), lzma(xz), zstd(zst), lz4, bz2. + string compression_type = 6; + // Settings for executing that insertion, applied after QueryInfo.settings. map settings = 5; } @@ -101,6 +105,25 @@ message QueryInfo { /// Controls how a ClickHouse server will compress query execution results before sending back to the client. /// If not set the compression settings from the configuration file will be used. Compression result_compression = 17; + + // Compression type for `input_data`, `output_data`, `totals` and `extremes`. + // Supported compression types: none, gzip(gz), deflate, brotli(br), lzma(xz), zstd(zst), lz4, bz2. + // When used for `input_data` the client is responsible to compress data before putting it into `input_data`. + // When used for `output_data` or `totals` or `extremes` the client receives compressed data and should decompress it by itself. + // In the latter case consider to specify also `compression_level`. + string compression_type = 18; + + // Compression level. + // WARNING: If it's not specified the compression level is set to zero by default which might be not the best choice for some compression types (see below). + // The compression level should be in the following range (the higher the number, the better the compression): + // none: compression level isn't used + // gzip: 0..9; 0 means no compression, 6 is recommended by default (compression level -1 also means 6) + // brotli: 0..11 + // lzma: 0..9; 6 is recommended by default + // zstd: 1..22; 3 is recommended by default (compression level 0 also means 3) + // lz4: 0..16; values < 0 mean fast acceleration + // bz2: 1..9 + int32 compression_level = 19; } enum LogsLevel { diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index c892fc94712..e17ed0d9c8e 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -5,6 +5,8 @@ import time import grpc from helpers.cluster import ClickHouseCluster, run_and_check from threading import Thread +import gzip +import lz4.frame GRPC_PORT = 9100 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -365,3 +367,67 @@ def test_result_compression(): stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) result = stub.ExecuteQuery(query_info) assert result.output == (b'0\n')*1000000 + +def test_compressed_output(): + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(1000)", compression_type="lz4") + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) + result = stub.ExecuteQuery(query_info) + assert lz4.frame.decompress(result.output) == (b'0\n')*1000 + +def test_compressed_output_streaming(): + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(100000)", compression_type="lz4") + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) + d_context = lz4.frame.create_decompression_context() + data = b'' + for result in stub.ExecuteQueryWithStreamOutput(query_info): + d1, _, _ = lz4.frame.decompress_chunk(d_context, result.output) + data += d1 + assert data == (b'0\n')*100000 + +def test_compressed_output_gzip(): + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(1000)", compression_type="gzip", compression_level=6) + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) + result = stub.ExecuteQuery(query_info) + assert gzip.decompress(result.output) == (b'0\n')*1000 + +def test_compressed_totals_and_extremes(): + query("CREATE TABLE t (x UInt8, y UInt8) ENGINE = Memory") + query("INSERT INTO t VALUES (1, 2), (2, 4), (3, 2), (3, 3), (3, 4)") + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT sum(x), y FROM t GROUP BY y WITH TOTALS", compression_type="lz4") + result = stub.ExecuteQuery(query_info) + assert lz4.frame.decompress(result.totals) == b'12\t0\n' + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT x, y FROM t", settings={"extremes": "1"}, compression_type="lz4") + result = stub.ExecuteQuery(query_info) + assert lz4.frame.decompress(result.extremes) == b'1\t2\n3\t4\n' + +def test_compressed_insert_query_streaming(): + query("CREATE TABLE t (a UInt8) ENGINE = Memory") + data = lz4.frame.compress(b'(1),(2),(3),(5),(4),(6),(7),(8),(9)') + sz1 = len(data) // 3 + sz2 = len(data) // 3 + d1 = data[:sz1] + d2 = data[sz1:sz1+sz2] + d3 = data[sz1+sz2:] + def send_query_info(): + yield clickhouse_grpc_pb2.QueryInfo(query="INSERT INTO t VALUES", input_data=d1, compression_type="lz4", next_query_info=True) + yield clickhouse_grpc_pb2.QueryInfo(input_data=d2, next_query_info=True) + yield clickhouse_grpc_pb2.QueryInfo(input_data=d3) + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) + stub.ExecuteQueryWithStreamInput(send_query_info()) + assert query("SELECT a FROM t ORDER BY a") == "1\n2\n3\n4\n5\n6\n7\n8\n9\n" + +def test_compressed_external_table(): + columns = [clickhouse_grpc_pb2.NameAndType(name='UserID', type='UInt64'), clickhouse_grpc_pb2.NameAndType(name='UserName', type='String')] + d1 = lz4.frame.compress(b'1\tAlex\n2\tBen\n3\tCarl\n') + d2 = gzip.compress(b'4,Daniel\n5,Ethan\n') + ext1 = clickhouse_grpc_pb2.ExternalTable(name='ext1', columns=columns, data=d1, format='TabSeparated', compression_type="lz4") + ext2 = clickhouse_grpc_pb2.ExternalTable(name='ext2', columns=columns, data=d2, format='CSV', compression_type="gzip") + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT * FROM (SELECT * FROM ext1 UNION ALL SELECT * FROM ext2) ORDER BY UserID", external_tables=[ext1, ext2]) + result = stub.ExecuteQuery(query_info) + assert result.output == b"1\tAlex\n"\ + b"2\tBen\n"\ + b"3\tCarl\n"\ + b"4\tDaniel\n"\ + b"5\tEthan\n" From 02d6cea857cad223a15cb8de19173b56e54d26d6 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 16 Dec 2021 00:06:41 +0300 Subject: [PATCH 20/37] Lz4DeflatingWriteBuffer now supports changes in the nested buffer between calls. --- src/IO/Lz4DeflatingWriteBuffer.cpp | 40 ++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/src/IO/Lz4DeflatingWriteBuffer.cpp b/src/IO/Lz4DeflatingWriteBuffer.cpp index 5d9c5d40e6f..da954b13df9 100644 --- a/src/IO/Lz4DeflatingWriteBuffer.cpp +++ b/src/IO/Lz4DeflatingWriteBuffer.cpp @@ -54,14 +54,19 @@ void Lz4DeflatingWriteBuffer::nextImpl() in_data = reinterpret_cast(working_buffer.begin()); in_capacity = offset(); + out_capacity = out->buffer().end() - out->position(); + out_data = reinterpret_cast(out->position()); + try { if (first_time) { - out->nextIfAtEnd(); - - out_data = reinterpret_cast(out->position()); - out_capacity = out->buffer().end() - out->position(); + if (out_capacity < LZ4F_HEADER_SIZE_MAX) + { + out->next(); + out_capacity = out->buffer().end() - out->position(); + out_data = reinterpret_cast(out->position()); + } /// write frame header and check for errors size_t header_size = LZ4F_compressBegin(ctx, out_data, out_capacity, &kPrefs); @@ -74,24 +79,29 @@ void Lz4DeflatingWriteBuffer::nextImpl() out_capacity -= header_size; out->position() = out->buffer().end() - out_capacity; + out_data = reinterpret_cast(out->position()); + first_time = false; } do { /// Ensure that there is enough space for compressed block of minimal size - if (out_capacity < LZ4F_compressBound(0, &kPrefs)) + size_t min_compressed_block_size = LZ4F_compressBound(1, &kPrefs); + if (out_capacity < min_compressed_block_size) { out->next(); out_capacity = out->buffer().end() - out->position(); + out_data = reinterpret_cast(out->position()); } - out_data = reinterpret_cast(out->position()); - /// LZ4F_compressUpdate compresses whole input buffer at once so we need to shink it manually size_t cur_buffer_size = in_capacity; - while (out_capacity < LZ4F_compressBound(cur_buffer_size, &kPrefs)) - cur_buffer_size /= 2; + if (out_capacity >= min_compressed_block_size) /// We cannot shrink the input buffer if it's already too small. + { + while (out_capacity < LZ4F_compressBound(cur_buffer_size, &kPrefs)) + cur_buffer_size /= 2; + } size_t compressed_size = LZ4F_compressUpdate(ctx, out_data, out_capacity, in_data, cur_buffer_size, nullptr); @@ -101,11 +111,12 @@ void Lz4DeflatingWriteBuffer::nextImpl() "LZ4 failed to encode stream. LZ4F version: {}", LZ4F_VERSION); - out_capacity -= compressed_size; in_capacity -= cur_buffer_size; - in_data = reinterpret_cast(working_buffer.end() - in_capacity); + + out_capacity -= compressed_size; out->position() = out->buffer().end() - out_capacity; + out_data = reinterpret_cast(out->position()); } while (in_capacity > 0); } @@ -120,14 +131,16 @@ void Lz4DeflatingWriteBuffer::finalizeBefore() { next(); + out_capacity = out->buffer().end() - out->position(); + out_data = reinterpret_cast(out->position()); + if (out_capacity < LZ4F_compressBound(0, &kPrefs)) { out->next(); out_capacity = out->buffer().end() - out->position(); + out_data = reinterpret_cast(out->position()); } - out_data = reinterpret_cast(out->position()); - /// compression end size_t end_size = LZ4F_compressEnd(ctx, out_data, out_capacity, nullptr); @@ -139,6 +152,7 @@ void Lz4DeflatingWriteBuffer::finalizeBefore() out_capacity -= end_size; out->position() = out->buffer().end() - out_capacity; + out_data = reinterpret_cast(out->position()); } void Lz4DeflatingWriteBuffer::finalizeAfter() From f06c37d20645a630f9f9829584c484169fbaa353 Mon Sep 17 00:00:00 2001 From: kreuzerkrieg Date: Sat, 18 Dec 2021 11:25:25 +0200 Subject: [PATCH 21/37] Stop reading incomplete stripes and skip rows. --- src/Core/Settings.h | 1 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + .../Formats/Impl/ORCBlockInputFormat.cpp | 45 ++++++------------- .../Formats/Impl/ORCBlockInputFormat.h | 6 --- 5 files changed, 16 insertions(+), 38 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 00ab0b73807..1332d844ff3 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -593,6 +593,7 @@ class IColumn; M(Bool, input_format_null_as_default, true, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \ M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \ M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \ + M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \ M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \ M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \ \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 75b096de425..09e0876bb4f 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -114,6 +114,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary; format_settings.arrow.import_nested = settings.input_format_arrow_import_nested; format_settings.orc.import_nested = settings.input_format_orc_import_nested; + format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields; format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; format_settings.seekable_read = settings.input_format_allow_seeks; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index a18a20bac7b..909b173007a 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -200,6 +200,7 @@ struct FormatSettings struct { bool import_nested = false; + int64_t row_batch_size = 100'000; } orc; /// For capnProto format we should determine how to diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 8768e2f5f14..c645595919e 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include "ArrowBufferedStreams.h" #include "ArrowColumnToCHColumn.h" #include @@ -38,37 +37,22 @@ Chunk ORCBlockInputFormat::generate() if (!file_reader) prepareReader(); + std::shared_ptr batch_reader; + arrow::Status reader_status = file_reader->NextStripeReader(format_settings.orc.row_batch_size, include_indices, &batch_reader); + if (!reader_status.ok()) + throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Failed to create batch reader: {}", reader_status.ToString()); if (!batch_reader) - { - arrow::Status reader_status = file_reader->NextStripeReader( - DBMS_DEFAULT_BUFFER_SIZE, include_indices, &batch_reader); - if (!reader_status.ok()) - throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, - "Failed to create batch reader: {}", - reader_status.ToString()); - if (!batch_reader) - return res; - } - - std::shared_ptr batch_result; - arrow::Status batch_status = batch_reader->ReadNext(&batch_result); - if (!batch_status.ok()) - throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, - "Error while reading batch of ORC data: {}", - batch_status.ToString()); - - if (!batch_result || !batch_result->num_rows()) return res; - ArrowColumnToCHColumn::NameToColumnPtr name_to_column_ptr; - for (const auto & column_name : column_names) - { - arrow::ArrayVector vec = {batch_result->GetColumnByName(column_name)}; - std::shared_ptr arrow_column = std::make_shared(vec); - name_to_column_ptr[column_name] = arrow_column; - } - arrow_column_to_ch_column->arrowColumnsToCHChunk(res, name_to_column_ptr); - batch_reader.reset(); + std::shared_ptr table; + arrow::Status table_status = batch_reader->ReadAll(&table); + if (!table_status.ok()) + throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading batch of ORC data: {}", table_status.ToString()); + + if (!table || !table->num_rows()) + return res; + + arrow_column_to_ch_column->arrowTableToCHChunk(res, table); return res; } @@ -79,7 +63,6 @@ void ORCBlockInputFormat::resetParser() file_reader.reset(); include_indices.clear(); - stripe_current = 0; } static size_t countIndicesForType(std::shared_ptr type) @@ -108,8 +91,6 @@ static size_t countIndicesForType(std::shared_ptr type) void ORCBlockInputFormat::prepareReader() { THROW_ARROW_NOT_OK(arrow::adapters::orc::ORCFileReader::Open(asArrowFile(*in, format_settings), arrow::default_memory_pool(), &file_reader)); - stripe_total = file_reader->NumberOfStripes(); - stripe_current = 0; std::shared_ptr schema; THROW_ARROW_NOT_OK(file_reader->ReadSchema(&schema)); diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.h b/src/Processors/Formats/Impl/ORCBlockInputFormat.h index 857ec7937b7..639aaee73bb 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.h @@ -35,16 +35,10 @@ private: std::unique_ptr file_reader; - std::shared_ptr batch_reader; - std::unique_ptr arrow_column_to_ch_column; std::vector column_names; - int stripe_total = 0; - - int stripe_current = 0; - // indices of columns to read from ORC file std::vector include_indices; From a34c3511af06122e230eb9a77b716a33da81bc77 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 14 Dec 2021 10:25:30 +0300 Subject: [PATCH 22/37] tests: fix 02050_client_profile_events flakiness CI: https://s3.amazonaws.com/clickhouse-test-reports/32303/24751e7d45d94541be854c86ce46d65c2e0f66da/stateless_tests__thread__actions_.html --- tests/queries/0_stateless/02050_client_profile_events.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02050_client_profile_events.sh b/tests/queries/0_stateless/02050_client_profile_events.sh index 5c3887cf5fb..fb54ebfb7ff 100755 --- a/tests/queries/0_stateless/02050_client_profile_events.sh +++ b/tests/queries/0_stateless/02050_client_profile_events.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: long CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -10,6 +9,8 @@ $CLICKHOUSE_CLIENT -q 'select * from numbers(1e5) format Null' |& grep -c 'Selec # print only last $CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -q 'select * from numbers(1e5) format Null' |& grep -o 'SelectedRows: .*$' # print everything -test "$($CLICKHOUSE_CLIENT --print-profile-events -q 'select * from numbers(1e9) format Null' |& grep -c 'SelectedRows')" -gt 1 && echo OK || echo FAIL +profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events -q 'select sleep(1) from numbers(2) format Null' |& grep -c 'SelectedRows')" +test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)" # print each 100 ms -test "$($CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=100 -q 'select * from numbers(1e9) format Null' |& grep -c 'SelectedRows')" -gt 1 && echo OK || echo FAIL +profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events --profile-events-delay-ms=100 -q 'select sleep(1) from numbers(2) format Null' |& grep -c 'SelectedRows')" +test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)" From b7f18e23338b2645995bd81a6dd1c45f1382129d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 14 Dec 2021 10:25:30 +0300 Subject: [PATCH 23/37] tests: add more information for 02050_client_profile_events in case of failure Sometimes [1] the test fails like this: 2021-12-10 03:41:19 --- /usr/share/clickhouse-test/queries/0_stateless/02050_client_profile_events.reference 2021-12-10 03:23:43.000000000 -0500 2021-12-10 03:41:19 +++ /tmp/clickhouse-test/0_stateless/02050_client_profile_events.617.stdout 2021-12-10 03:41:19.509611205 -0500 2021-12-10 03:41:19 @@ -1,4 +1,3 @@ 2021-12-10 03:41:19 0 2021-12-10 03:41:19 -SelectedRows: 131010 (increment) 2021-12-10 03:41:19 OK 2021-12-10 03:41:19 OK 2021-12-10 03:41:19 2021-12-10 03:41:19 2021-12-10 03:41:19 Database: test_73d5o0 [1]: https://s3.amazonaws.com/clickhouse-test-reports/32493/703213a6444f8014e3324df4b6e44d03fa351294/stateless_tests_flaky_check__address__actions_.html And I did not find anything strange in server logs (and there was 0 exceptions). --- .../queries/0_stateless/02050_client_profile_events.reference | 1 + tests/queries/0_stateless/02050_client_profile_events.sh | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02050_client_profile_events.reference b/tests/queries/0_stateless/02050_client_profile_events.reference index 00fc3b5d06a..29d68082647 100644 --- a/tests/queries/0_stateless/02050_client_profile_events.reference +++ b/tests/queries/0_stateless/02050_client_profile_events.reference @@ -1,4 +1,5 @@ 0 +100000 SelectedRows: 131010 (increment) OK OK diff --git a/tests/queries/0_stateless/02050_client_profile_events.sh b/tests/queries/0_stateless/02050_client_profile_events.sh index fb54ebfb7ff..f91396d35cd 100755 --- a/tests/queries/0_stateless/02050_client_profile_events.sh +++ b/tests/queries/0_stateless/02050_client_profile_events.sh @@ -6,8 +6,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # do not print any ProfileEvents packets $CLICKHOUSE_CLIENT -q 'select * from numbers(1e5) format Null' |& grep -c 'SelectedRows' -# print only last -$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -q 'select * from numbers(1e5) format Null' |& grep -o 'SelectedRows: .*$' +# print only last (and also number of rows to provide more info in case of failures) +$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -q 'select * from numbers(1e5)' 2> >(grep -o -e 'SelectedRows: .*$' -e Exception) 1> >(wc -l) # print everything profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events -q 'select sleep(1) from numbers(2) format Null' |& grep -c 'SelectedRows')" test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)" From 2461cc5f9319831e7edfa098d53ccbba57eebde6 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 14 Dec 2021 10:25:30 +0300 Subject: [PATCH 24/37] tests: add thread_id into 02050_client_profile_events --- tests/queries/0_stateless/02050_client_profile_events.reference | 2 +- tests/queries/0_stateless/02050_client_profile_events.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02050_client_profile_events.reference b/tests/queries/0_stateless/02050_client_profile_events.reference index 29d68082647..2451417ddf0 100644 --- a/tests/queries/0_stateless/02050_client_profile_events.reference +++ b/tests/queries/0_stateless/02050_client_profile_events.reference @@ -1,5 +1,5 @@ 0 100000 -SelectedRows: 131010 (increment) +[ 0 ] SelectedRows: 131010 (increment) OK OK diff --git a/tests/queries/0_stateless/02050_client_profile_events.sh b/tests/queries/0_stateless/02050_client_profile_events.sh index f91396d35cd..459e8505e22 100755 --- a/tests/queries/0_stateless/02050_client_profile_events.sh +++ b/tests/queries/0_stateless/02050_client_profile_events.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # do not print any ProfileEvents packets $CLICKHOUSE_CLIENT -q 'select * from numbers(1e5) format Null' |& grep -c 'SelectedRows' # print only last (and also number of rows to provide more info in case of failures) -$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -q 'select * from numbers(1e5)' 2> >(grep -o -e 'SelectedRows: .*$' -e Exception) 1> >(wc -l) +$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -q 'select * from numbers(1e5)' 2> >(grep -o -e '\[ 0 \] SelectedRows: .*$' -e Exception) 1> >(wc -l) # print everything profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events -q 'select sleep(1) from numbers(2) format Null' |& grep -c 'SelectedRows')" test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)" From 909ce68b5a195616c527d5d3e1ccdfe147e8f107 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 14 Dec 2021 10:25:30 +0300 Subject: [PATCH 25/37] Do not suppress exception in ThreadStatus::~ThreadStatus() --- src/Common/ThreadStatus.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index c976e4ca16a..ff69163958d 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -139,6 +139,7 @@ ThreadStatus::~ThreadStatus() { /// It's a minor tracked memory leak here (not the memory itself but it's counter). /// We've already allocated a little bit more than the limit and cannot track it in the thread memory tracker or its parent. + tryLogCurrentException(log); } if (thread_group) From 6aebc3e94ca1159431a957f4b49b1f2235849843 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 14 Dec 2021 10:25:30 +0300 Subject: [PATCH 26/37] Do not loose ProfileEvents in case of thread destroyed before v2: drop std::move and add copy ctor for ProfileEvents::Counter::Snapshot v2: remove std::move --- src/Common/ThreadStatus.cpp | 23 +++++++++++++++++++++++ src/Common/ThreadStatus.h | 11 +++++++++++ src/Server/TCPHandler.cpp | 19 +++++++++---------- 3 files changed, 43 insertions(+), 10 deletions(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index ff69163958d..411f725f2db 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -72,6 +72,24 @@ static thread_local bool has_alt_stack = false; #endif +std::vector ThreadGroupStatus::getProfileEventsCountersAndMemoryForThreads() +{ + std::lock_guard guard(mutex); + + /// It is OK to move it, since it is enough to report statistics for the thread at least once. + auto stats = std::move(finished_threads_counters_memory); + for (auto * thread : threads) + { + stats.emplace_back(ProfileEventsCountersAndMemory{ + thread->performance_counters.getPartiallyAtomicSnapshot(), + thread->memory_tracker.get(), + thread->thread_id, + }); + } + + return stats; +} + ThreadStatus::ThreadStatus() : thread_id{getThreadId()} { @@ -145,6 +163,11 @@ ThreadStatus::~ThreadStatus() if (thread_group) { std::lock_guard guard(thread_group->mutex); + thread_group->finished_threads_counters_memory.emplace_back(ThreadGroupStatus::ProfileEventsCountersAndMemory{ + performance_counters.getPartiallyAtomicSnapshot(), + memory_tracker.get(), + thread_id, + }); thread_group->threads.erase(this); } diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 97ddda1ea30..f3920474111 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -61,6 +61,13 @@ using ThreadStatusPtr = ThreadStatus *; class ThreadGroupStatus { public: + struct ProfileEventsCountersAndMemory + { + ProfileEvents::Counters::Snapshot counters; + Int64 memory_usage; + UInt64 thread_id; + }; + mutable std::mutex mutex; ProfileEvents::Counters performance_counters{VariableContext::Process}; @@ -83,6 +90,10 @@ public: String query; UInt64 normalized_query_hash = 0; + + std::vector finished_threads_counters_memory; + + std::vector getProfileEventsCountersAndMemoryForThreads(); }; using ThreadGroupStatusPtr = std::shared_ptr; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 36d126559a7..3b1ce4cc846 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -947,28 +947,27 @@ void TCPHandler::sendProfileEvents() ThreadIdToCountersSnapshot new_snapshots; ProfileEventsSnapshot group_snapshot; { - std::lock_guard guard(thread_group->mutex); - snapshots.reserve(thread_group->threads.size()); - for (auto * thread : thread_group->threads) + auto stats = thread_group->getProfileEventsCountersAndMemoryForThreads(); + snapshots.reserve(stats.size()); + + for (auto & stat : stats) { - auto const thread_id = thread->thread_id; + auto const thread_id = stat.thread_id; if (thread_id == current_thread_id) continue; auto current_time = time(nullptr); - auto counters = thread->performance_counters.getPartiallyAtomicSnapshot(); - auto memory_usage = thread->memory_tracker.get(); auto previous_snapshot = last_sent_snapshots.find(thread_id); auto increment = previous_snapshot != last_sent_snapshots.end() - ? CountersIncrement(counters, previous_snapshot->second) - : CountersIncrement(counters); + ? CountersIncrement(stat.counters, previous_snapshot->second) + : CountersIncrement(stat.counters); snapshots.push_back(ProfileEventsSnapshot{ thread_id, std::move(increment), - memory_usage, + stat.memory_usage, current_time }); - new_snapshots[thread_id] = std::move(counters); + new_snapshots[thread_id] = std::move(stat.counters); } group_snapshot.thread_id = 0; From 1d25ec3e82fd6385dd2bc85b864c5996cdc0c82f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 14 Dec 2021 10:25:30 +0300 Subject: [PATCH 27/37] Merge ProfileEvents in case they were not printed That way with --profile-events-delay-ms=-1 you will always get totals. Plus, this will fix periodic failures, that can be reproduced by limitting CPU (5% is enough in my setup), i.e.: $ systemd-run --collect --unit ch -p CPUQuota=5% --user clickhouse-server $ while clickhouse-client --print-profile-events --profile-events-delay-ms=-1 -q 'select * from numbers (1e5) format Null' |& tee /dev/stderr | fgrep 'SelectedRows: 131010 (increment)'; do :; done And as a bonus it will make 02050_client_profile_events deterministic. --- src/Client/ClientBase.cpp | 98 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 97 insertions(+), 1 deletion(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 58bc239f003..e662bad1086 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include @@ -52,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -105,6 +108,99 @@ namespace ProfileEvents namespace DB { +static void incrementProfileEventsBlock(Block & dst, const Block & src) +{ + if (!dst) + { + dst = src; + return; + } + + assertBlocksHaveEqualStructure(src, dst, "ProfileEvents"); + + std::unordered_map name_pos; + for (size_t i = 0; i < dst.columns(); ++i) + name_pos[dst.getByPosition(i).name] = i; + + size_t dst_rows = dst.rows(); + MutableColumns mutable_columns = dst.mutateColumns(); + + auto & dst_column_host_name = typeid_cast(*mutable_columns[name_pos["host_name"]]); + auto & dst_array_current_time = typeid_cast(*mutable_columns[name_pos["current_time"]]).getData(); + auto & dst_array_thread_id = typeid_cast(*mutable_columns[name_pos["thread_id"]]).getData(); + auto & dst_array_type = typeid_cast(*mutable_columns[name_pos["type"]]).getData(); + auto & dst_column_name = typeid_cast(*mutable_columns[name_pos["name"]]); + auto & dst_array_value = typeid_cast(*mutable_columns[name_pos["value"]]).getData(); + + const auto & src_column_host_name = typeid_cast(*src.getByName("host_name").column); + const auto & src_array_current_time = typeid_cast(*src.getByName("current_time").column).getData(); + const auto & src_array_thread_id = typeid_cast(*src.getByName("thread_id").column).getData(); + const auto & src_column_name = typeid_cast(*src.getByName("name").column); + const auto & src_array_value = typeid_cast(*src.getByName("value").column).getData(); + + struct Id + { + StringRef name; + StringRef host_name; + UInt64 thread_id; + + bool operator<(const Id & rhs) const + { + return std::tie(name, host_name, thread_id) + < std::tie(rhs.name, rhs.host_name, rhs.thread_id); + } + }; + std::map rows_by_name; + for (size_t src_row = 0; src_row < src.rows(); ++src_row) + { + Id id{ + src_column_name.getDataAt(src_row), + src_column_host_name.getDataAt(src_row), + src_array_thread_id[src_row], + }; + rows_by_name[id] = src_row; + } + + /// Merge src into dst. + for (size_t dst_row = 0; dst_row < dst_rows; ++dst_row) + { + Id id{ + dst_column_name.getDataAt(dst_row), + dst_column_host_name.getDataAt(dst_row), + dst_array_thread_id[dst_row], + }; + + if (auto it = rows_by_name.find(id); it != rows_by_name.end()) + { + size_t src_row = it->second; + dst_array_current_time[dst_row] = src_array_current_time[src_row]; + + switch (dst_array_type[dst_row]) + { + case ProfileEvents::Type::INCREMENT: + dst_array_value[dst_row] += src_array_value[src_row]; + break; + case ProfileEvents::Type::GAUGE: + dst_array_value[dst_row] = src_array_value[src_row]; + break; + } + + rows_by_name.erase(it); + } + } + + /// Copy rows from src that dst does not contains. + for (const auto & [id, pos] : rows_by_name) + { + for (size_t col = 0; col < src.columns(); ++col) + { + mutable_columns[col]->insert((*src.getByPosition(col).column)[pos]); + } + } + + dst.setColumns(std::move(mutable_columns)); +} + std::atomic_flag exit_on_signal = ATOMIC_FLAG_INIT; @@ -753,7 +849,7 @@ void ClientBase::onProfileEvents(Block & block) } else { - profile_events.last_block = block; + incrementProfileEventsBlock(profile_events.last_block, block); } } profile_events.watch.restart(); From 3edb2ca8065ecf98ada9c9e020bd4a1a9f86297a Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Mon, 20 Dec 2021 16:32:38 +0800 Subject: [PATCH 28/37] Update external-dicts-dict-polygon.md fix a typo `tne` -> `the` --- .../external-dictionaries/external-dicts-dict-polygon.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md index 5fedd5cf8ad..b49f384367d 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md @@ -41,7 +41,7 @@ Example of a polygon dictionary configuration: ``` -Tne corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md#create-dictionary-query): +The corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md#create-dictionary-query): ``` sql CREATE DICTIONARY polygon_dict_name ( key Array(Array(Array(Array(Float64)))), From 0e6b1b0ec08222128997acca658afffbfbed5aaa Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 20 Dec 2021 11:58:52 +0300 Subject: [PATCH 29/37] Sync release branches in master --- .github/workflows/release_branches.yml | 536 ++++++++++++++++++++++++- 1 file changed, 519 insertions(+), 17 deletions(-) diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index bad444d9961..c35b18f8b14 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -407,7 +407,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestAsan: + FunctionalStatelessTestAsan0: needs: [BuilderDebAsan] runs-on: [self-hosted, func-tester] steps: @@ -419,6 +419,8 @@ jobs: CHECK_NAME=Stateless tests (address, actions) REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 2 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -442,7 +444,44 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestTsan: + FunctionalStatelessTestAsan1: + needs: [BuilderDebAsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (address, actions) + REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 2 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] steps: @@ -454,6 +493,82 @@ jobs: CHECK_NAME=Stateless tests (thread, actions) REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan1: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (thread, actions) + REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan2: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (thread, actions) + REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -512,7 +627,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestMsan: + FunctionalStatelessTestMsan0: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] steps: @@ -524,6 +639,8 @@ jobs: CHECK_NAME=Stateless tests (memory, actions) REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -547,7 +664,81 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestDebug: + FunctionalStatelessTestMsan1: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_memory + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (memory, actions) + REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestMsan2: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_memory + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (memory, actions) + REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug0: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] steps: @@ -559,6 +750,82 @@ jobs: CHECK_NAME=Stateless tests (debug, actions) REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug1: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (debug, actions) + REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug2: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (debug, actions) + REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -975,8 +1242,8 @@ jobs: ############################################################################################# ############################# INTEGRATION TESTS ############################################# ############################################################################################# - IntegrationTestsAsan: - needs: [BuilderDebAsan, FunctionalStatelessTestAsan] + IntegrationTestsAsan0: + needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] steps: - name: Set envs @@ -986,6 +1253,8 @@ jobs: REPORTS_PATH=${{runner.temp}}/reports_dir CHECK_NAME=Integration tests (asan, actions) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1009,8 +1278,80 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsTsan: - needs: [BuilderDebTsan, FunctionalStatelessTestTsan] + IntegrationTestsAsan1: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, actions) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsAsan2: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, actions) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan0: + needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] steps: - name: Set envs @@ -1020,6 +1361,8 @@ jobs: REPORTS_PATH=${{runner.temp}}/reports_dir CHECK_NAME=Integration tests (thread, actions) REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1043,8 +1386,116 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsRelease: - needs: [BuilderDebRelease, FunctionalStatelessTestRelease] + IntegrationTestsTsan1: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (thread, actions) + REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=4 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan2: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (thread, actions) + REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=4 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan3: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (thread, actions) + REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=4 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsRelease0: + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Set envs @@ -1054,6 +1505,44 @@ jobs: REPORTS_PATH=${{runner.temp}}/reports_dir CHECK_NAME=Integration tests (release, actions) REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=2 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsRelease1: + needs: [BuilderDebRelease] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (release, actions) + REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1081,11 +1570,18 @@ jobs: needs: - DockerHubPush - BuilderReport - - FunctionalStatelessTestDebug + - FunctionalStatelessTestDebug0 + - FunctionalStatelessTestDebug1 + - FunctionalStatelessTestDebug2 - FunctionalStatelessTestRelease - - FunctionalStatelessTestAsan - - FunctionalStatelessTestTsan - - FunctionalStatelessTestMsan + - FunctionalStatelessTestAsan0 + - FunctionalStatelessTestAsan1 + - FunctionalStatelessTestTsan0 + - FunctionalStatelessTestTsan1 + - FunctionalStatelessTestTsan2 + - FunctionalStatelessTestMsan0 + - FunctionalStatelessTestMsan1 + - FunctionalStatelessTestMsan2 - FunctionalStatelessTestUBsan - FunctionalStatefulTestDebug - FunctionalStatefulTestRelease @@ -1098,9 +1594,15 @@ jobs: - StressTestTsan - StressTestMsan - StressTestUBsan - - IntegrationTestsAsan - - IntegrationTestsRelease - - IntegrationTestsTsan + - IntegrationTestsAsan0 + - IntegrationTestsAsan1 + - IntegrationTestsAsan2 + - IntegrationTestsRelease0 + - IntegrationTestsRelease1 + - IntegrationTestsTsan0 + - IntegrationTestsTsan1 + - IntegrationTestsTsan2 + - IntegrationTestsTsan3 - CompatibilityCheck runs-on: [self-hosted, style-checker] steps: From 7785eac384e6a8cf2cfdb8756f5bdb8d94e0e660 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 20 Dec 2021 13:32:13 +0300 Subject: [PATCH 30/37] Loops remove postfix increment --- src/Functions/geoToH3.cpp | 2 +- src/Functions/h3EdgeAngle.cpp | 2 +- src/Functions/h3EdgeLengthM.cpp | 2 +- src/Functions/h3GetBaseCell.cpp | 2 +- src/Functions/h3GetFaces.cpp | 2 +- src/Functions/h3GetResolution.cpp | 2 +- src/Functions/h3HexAreaM2.cpp | 2 +- src/Functions/h3IndexesAreNeighbors.cpp | 2 +- src/Functions/h3IsPentagon.cpp | 2 +- src/Functions/h3IsResClassIII.cpp | 2 +- src/Functions/h3IsValid.cpp | 2 +- src/Functions/h3ToChildren.cpp | 2 +- src/Functions/h3ToParent.cpp | 2 +- src/Functions/h3kRing.cpp | 2 +- src/Functions/map.cpp | 4 ++-- 15 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/Functions/geoToH3.cpp b/src/Functions/geoToH3.cpp index 93865782c8e..18951d1a03f 100644 --- a/src/Functions/geoToH3.cpp +++ b/src/Functions/geoToH3.cpp @@ -76,7 +76,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const double lon = col_lon->getFloat64(row); const double lat = col_lat->getFloat64(row); diff --git a/src/Functions/h3EdgeAngle.cpp b/src/Functions/h3EdgeAngle.cpp index 68e44e38bb9..5d5ad6cd1d3 100644 --- a/src/Functions/h3EdgeAngle.cpp +++ b/src/Functions/h3EdgeAngle.cpp @@ -58,7 +58,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const int resolution = col_hindex->getUInt(row); if (resolution > MAX_H3_RES) diff --git a/src/Functions/h3EdgeLengthM.cpp b/src/Functions/h3EdgeLengthM.cpp index eb0aab029b7..3eef9be9345 100644 --- a/src/Functions/h3EdgeLengthM.cpp +++ b/src/Functions/h3EdgeLengthM.cpp @@ -63,7 +63,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 resolution = col_hindex->getUInt(row); if (resolution > MAX_H3_RES) diff --git a/src/Functions/h3GetBaseCell.cpp b/src/Functions/h3GetBaseCell.cpp index 1f635fda715..83978919f2c 100644 --- a/src/Functions/h3GetBaseCell.cpp +++ b/src/Functions/h3GetBaseCell.cpp @@ -55,7 +55,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 hindex = col_hindex->getUInt(row); diff --git a/src/Functions/h3GetFaces.cpp b/src/Functions/h3GetFaces.cpp index 5d82c16296c..e67ab15128f 100644 --- a/src/Functions/h3GetFaces.cpp +++ b/src/Functions/h3GetFaces.cpp @@ -64,7 +64,7 @@ public: auto current_offset = 0; std::vector faces; - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { int max_faces = maxFaceCount(data[row]); diff --git a/src/Functions/h3GetResolution.cpp b/src/Functions/h3GetResolution.cpp index cc4a3c7443d..02b634dac89 100644 --- a/src/Functions/h3GetResolution.cpp +++ b/src/Functions/h3GetResolution.cpp @@ -55,7 +55,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 hindex = col_hindex->getUInt(row); diff --git a/src/Functions/h3HexAreaM2.cpp b/src/Functions/h3HexAreaM2.cpp index 6aa8fb31aab..96b301806a5 100644 --- a/src/Functions/h3HexAreaM2.cpp +++ b/src/Functions/h3HexAreaM2.cpp @@ -58,7 +58,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 resolution = col_hindex->getUInt(row); if (resolution > MAX_H3_RES) diff --git a/src/Functions/h3IndexesAreNeighbors.cpp b/src/Functions/h3IndexesAreNeighbors.cpp index f938f7fe784..27eaacad4d6 100644 --- a/src/Functions/h3IndexesAreNeighbors.cpp +++ b/src/Functions/h3IndexesAreNeighbors.cpp @@ -63,7 +63,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 hindex_origin = col_hindex_origin->getUInt(row); const UInt64 hindex_dest = col_hindex_dest->getUInt(row); diff --git a/src/Functions/h3IsPentagon.cpp b/src/Functions/h3IsPentagon.cpp index 039fea39f2a..a6726fe1656 100644 --- a/src/Functions/h3IsPentagon.cpp +++ b/src/Functions/h3IsPentagon.cpp @@ -56,7 +56,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0 ; row < input_rows_count ; row++) + for (size_t row = 0 ; row < input_rows_count ; ++row) { UInt8 res = isPentagon(data[row]); dst_data[row] = res; diff --git a/src/Functions/h3IsResClassIII.cpp b/src/Functions/h3IsResClassIII.cpp index f2f7ae445f2..c6b79d404a4 100644 --- a/src/Functions/h3IsResClassIII.cpp +++ b/src/Functions/h3IsResClassIII.cpp @@ -56,7 +56,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0 ; row < input_rows_count ; row++) + for (size_t row = 0 ; row < input_rows_count ; ++row) { UInt8 res = isResClassIII(data[row]); dst_data[row] = res; diff --git a/src/Functions/h3IsValid.cpp b/src/Functions/h3IsValid.cpp index 891d534375e..aa109eee6b4 100644 --- a/src/Functions/h3IsValid.cpp +++ b/src/Functions/h3IsValid.cpp @@ -55,7 +55,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 hindex = col_hindex->getUInt(row); diff --git a/src/Functions/h3ToChildren.cpp b/src/Functions/h3ToChildren.cpp index 5745838e9cb..56b3dd9a88c 100644 --- a/src/Functions/h3ToChildren.cpp +++ b/src/Functions/h3ToChildren.cpp @@ -76,7 +76,7 @@ public: std::vector hindex_vec; - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 parent_hindex = col_hindex->getUInt(row); const UInt8 child_resolution = col_resolution->getUInt(row); diff --git a/src/Functions/h3ToParent.cpp b/src/Functions/h3ToParent.cpp index 76ebea6daf6..fef1b16696f 100644 --- a/src/Functions/h3ToParent.cpp +++ b/src/Functions/h3ToParent.cpp @@ -66,7 +66,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 hindex = col_hindex->getUInt(row); const UInt8 resolution = col_resolution->getUInt(row); diff --git a/src/Functions/h3kRing.cpp b/src/Functions/h3kRing.cpp index 1bcb3e1ab6c..9fc6312daa4 100644 --- a/src/Functions/h3kRing.cpp +++ b/src/Functions/h3kRing.cpp @@ -73,7 +73,7 @@ public: std::vector hindex_vec; - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const H3Index origin_hindex = col_hindex->getUInt(row); const int k = col_k->getInt(row); diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index 03a9da404c2..0dda46e16d9 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -310,7 +310,7 @@ public: FunctionLike func_like; - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { size_t element_start_row = row != 0 ? column_array.getOffsets()[row-1] : 0; size_t elem_size = column_array.getOffsets()[row]- element_start_row; @@ -457,7 +457,7 @@ public: IColumn::Offset current_offset = 0; - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { size_t element_start_row = row != 0 ? nested_column.getOffsets()[row-1] : 0; size_t element_size = nested_column.getOffsets()[row]- element_start_row; From 3feab5a975c639ca9a63166016b9a80044eb000a Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 20 Dec 2021 13:42:31 +0300 Subject: [PATCH 31/37] Containers iteration fix erase --- src/Core/Block.cpp | 2 +- src/Disks/DiskMemory.cpp | 2 +- src/Interpreters/TreeRewriter.cpp | 4 ++-- src/Interpreters/getTableExpressions.cpp | 2 +- src/Storages/MergeTree/ActiveDataPartSet.cpp | 4 ++-- src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index 1bebe5f5efc..1d23325d473 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -209,7 +209,7 @@ void Block::eraseImpl(size_t position) for (auto it = index_by_name.begin(); it != index_by_name.end();) { if (it->second == position) - index_by_name.erase(it++); + it = index_by_name.erase(it); else { if (it->second > position) diff --git a/src/Disks/DiskMemory.cpp b/src/Disks/DiskMemory.cpp index ea8bf719de6..834ed3e0c65 100644 --- a/src/Disks/DiskMemory.cpp +++ b/src/Disks/DiskMemory.cpp @@ -253,7 +253,7 @@ void DiskMemory::clearDirectory(const String & path) throw Exception( "Failed to clear directory '" + path + "'. " + iter->first + " is a directory", ErrorCodes::CANNOT_DELETE_DIRECTORY); - files.erase(iter++); + iter = files.erase(iter); } } diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 44b0c760d8d..9fc16bd9757 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -957,7 +957,7 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select unknown_required_source_columns.erase(column_name); if (!required.count(column_name)) - source_columns.erase(it++); + it = source_columns.erase(it); else ++it; } @@ -973,7 +973,7 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select if (column) { source_columns.push_back(*column); - unknown_required_source_columns.erase(it++); + it = unknown_required_source_columns.erase(it); } else ++it; diff --git a/src/Interpreters/getTableExpressions.cpp b/src/Interpreters/getTableExpressions.cpp index d82c7fc1332..830f0ea4411 100644 --- a/src/Interpreters/getTableExpressions.cpp +++ b/src/Interpreters/getTableExpressions.cpp @@ -16,7 +16,7 @@ NameSet removeDuplicateColumns(NamesAndTypesList & columns) if (names.emplace(it->name).second) ++it; else - columns.erase(it++); + it = columns.erase(it); } return names; } diff --git a/src/Storages/MergeTree/ActiveDataPartSet.cpp b/src/Storages/MergeTree/ActiveDataPartSet.cpp index 0f6cd8050ca..b21910158ad 100644 --- a/src/Storages/MergeTree/ActiveDataPartSet.cpp +++ b/src/Storages/MergeTree/ActiveDataPartSet.cpp @@ -49,7 +49,7 @@ bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts) if (out_replaced_parts) out_replaced_parts->push_back(it->second); - part_info_to_name.erase(it++); + it = part_info_to_name.erase(it); } if (out_replaced_parts) @@ -61,7 +61,7 @@ bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts) assert(part_info != it->first); if (out_replaced_parts) out_replaced_parts->push_back(it->second); - part_info_to_name.erase(it++); + it = part_info_to_name.erase(it); } if (it != part_info_to_name.end() && !part_info.isDisjoint(it->first)) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index cc9a142c65c..b3da3d47684 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1033,7 +1033,7 @@ void ReplicatedMergeTreeQueue::removePartProducingOpsInRange( min_unprocessed_insert_time_changed, max_processed_insert_time_changed, lock); (*it)->removed_by_other_entry = true; - queue.erase(it++); + it = queue.erase(it); ++removed_entries; } else From 6e15ff7d31b20df0bd4d1937b877f3a867e3acfa Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 20 Dec 2021 13:48:15 +0300 Subject: [PATCH 32/37] Fix envs --- .github/workflows/release_branches.yml | 44 +++++++++++++------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index c35b18f8b14..7a898c79b1b 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -419,8 +419,8 @@ jobs: CHECK_NAME=Stateless tests (address, actions) REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT=10800 - RUN_BY_HASH_NUM: 0 - RUN_BY_HASH_TOTAL: 2 + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -456,8 +456,8 @@ jobs: CHECK_NAME=Stateless tests (address, actions) REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT=10800 - RUN_BY_HASH_NUM: 1 - RUN_BY_HASH_TOTAL: 2 + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -493,8 +493,8 @@ jobs: CHECK_NAME=Stateless tests (thread, actions) REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT=10800 - RUN_BY_HASH_NUM: 0 - RUN_BY_HASH_TOTAL: 3 + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -530,8 +530,8 @@ jobs: CHECK_NAME=Stateless tests (thread, actions) REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT=10800 - RUN_BY_HASH_NUM: 1 - RUN_BY_HASH_TOTAL: 3 + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -567,8 +567,8 @@ jobs: CHECK_NAME=Stateless tests (thread, actions) REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT=10800 - RUN_BY_HASH_NUM: 2 - RUN_BY_HASH_TOTAL: 3 + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -639,8 +639,8 @@ jobs: CHECK_NAME=Stateless tests (memory, actions) REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT=10800 - RUN_BY_HASH_NUM: 0 - RUN_BY_HASH_TOTAL: 3 + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -676,8 +676,8 @@ jobs: CHECK_NAME=Stateless tests (memory, actions) REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT=10800 - RUN_BY_HASH_NUM: 1 - RUN_BY_HASH_TOTAL: 3 + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -713,8 +713,8 @@ jobs: CHECK_NAME=Stateless tests (memory, actions) REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT=10800 - RUN_BY_HASH_NUM: 2 - RUN_BY_HASH_TOTAL: 3 + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -750,8 +750,8 @@ jobs: CHECK_NAME=Stateless tests (debug, actions) REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT=10800 - RUN_BY_HASH_NUM: 0 - RUN_BY_HASH_TOTAL: 3 + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -787,8 +787,8 @@ jobs: CHECK_NAME=Stateless tests (debug, actions) REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT=10800 - RUN_BY_HASH_NUM: 1 - RUN_BY_HASH_TOTAL: 3 + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -824,8 +824,8 @@ jobs: CHECK_NAME=Stateless tests (debug, actions) REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT=10800 - RUN_BY_HASH_NUM: 2 - RUN_BY_HASH_TOTAL: 3 + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports uses: actions/download-artifact@v2 From 20e4ce3314acf3d21bb10d7a86cc84b658ab744e Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 20 Dec 2021 14:49:05 +0300 Subject: [PATCH 33/37] Short circuit evaluation function throwIf support --- src/Columns/ColumnFunction.h | 7 ++++++- src/Columns/MaskOperations.cpp | 2 +- src/Columns/MaskOperations.h | 2 +- src/Functions/FunctionsLogical.cpp | 2 +- src/Functions/if.cpp | 2 +- src/Functions/multiIf.cpp | 2 +- src/Functions/throwIf.cpp | 15 +++++++++++---- .../02152_short_circuit_throw_if.reference | 2 ++ .../0_stateless/02152_short_circuit_throw_if.sql | 2 ++ 9 files changed, 26 insertions(+), 10 deletions(-) create mode 100644 tests/queries/0_stateless/02152_short_circuit_throw_if.reference create mode 100644 tests/queries/0_stateless/02152_short_circuit_throw_if.sql diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index 8e39551676c..2592dc01f98 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -24,7 +24,12 @@ class ColumnFunction final : public COWHelper private: friend class COWHelper; - ColumnFunction(size_t size, FunctionBasePtr function_, const ColumnsWithTypeAndName & columns_to_capture, bool is_short_circuit_argument_ = false, bool is_function_compiled_ = false); + ColumnFunction( + size_t size, + FunctionBasePtr function_, + const ColumnsWithTypeAndName & columns_to_capture, + bool is_short_circuit_argument_ = false, + bool is_function_compiled_ = false); public: const char * getFamilyName() const override { return "Function"; } diff --git a/src/Columns/MaskOperations.cpp b/src/Columns/MaskOperations.cpp index 9499185da30..1641bdf5a4c 100644 --- a/src/Columns/MaskOperations.cpp +++ b/src/Columns/MaskOperations.cpp @@ -293,7 +293,7 @@ void executeColumnIfNeeded(ColumnWithTypeAndName & column, bool empty) column.column = column_function->getResultType()->createColumn(); } -int checkShirtCircuitArguments(const ColumnsWithTypeAndName & arguments) +int checkShortCircuitArguments(const ColumnsWithTypeAndName & arguments) { int last_short_circuit_argument_index = -1; for (size_t i = 0; i != arguments.size(); ++i) diff --git a/src/Columns/MaskOperations.h b/src/Columns/MaskOperations.h index bd6c5e8fe2c..e43b4588258 100644 --- a/src/Columns/MaskOperations.h +++ b/src/Columns/MaskOperations.h @@ -66,7 +66,7 @@ void executeColumnIfNeeded(ColumnWithTypeAndName & column, bool empty = false); /// Check if arguments contain lazy executed argument. If contain, return index of the last one, /// otherwise return -1. -int checkShirtCircuitArguments(const ColumnsWithTypeAndName & arguments); +int checkShortCircuitArguments(const ColumnsWithTypeAndName & arguments); void copyMask(const PaddedPODArray & from, PaddedPODArray & to); diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index f427deced3a..87a2ecd4c57 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -609,7 +609,7 @@ ColumnPtr FunctionAnyArityLogical::executeImpl( ColumnsWithTypeAndName arguments = std::move(args); /// Special implementation for short-circuit arguments. - if (checkShirtCircuitArguments(arguments) != -1) + if (checkShortCircuitArguments(arguments) != -1) return executeShortCircuit(arguments, result_type); ColumnRawPtrs args_in; diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index 953aff3568e..6841098ebcf 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -969,7 +969,7 @@ private: static void executeShortCircuitArguments(ColumnsWithTypeAndName & arguments) { - int last_short_circuit_argument_index = checkShirtCircuitArguments(arguments); + int last_short_circuit_argument_index = checkShortCircuitArguments(arguments); if (last_short_circuit_argument_index == -1) return; diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index 3e5242d5f9b..070a7c2f05e 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -262,7 +262,7 @@ public: private: static void executeShortCircuitArguments(ColumnsWithTypeAndName & arguments) { - int last_short_circuit_argument_index = checkShirtCircuitArguments(arguments); + int last_short_circuit_argument_index = checkShortCircuitArguments(arguments); if (last_short_circuit_argument_index < 0) return; diff --git a/src/Functions/throwIf.cpp b/src/Functions/throwIf.cpp index d499f1f492f..2af6b7eede8 100644 --- a/src/Functions/throwIf.cpp +++ b/src/Functions/throwIf.cpp @@ -63,11 +63,15 @@ public: return std::make_shared(); } - bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForConstants() const override { return false; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + bool isSuitableForConstantFolding() const override { return false; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { + if (input_rows_count == 0) + return result_type->createColumn(); + std::optional custom_message; if (arguments.size() == 2) { @@ -77,7 +81,10 @@ public: custom_message = msg_column->getValue(); } - const auto * in = arguments.front().column.get(); + auto first_argument_column = arguments.front().column; + auto first_argument_column_non_const = first_argument_column->convertToFullColumnIfConst(); + + const auto * in = first_argument_column_non_const.get(); ColumnPtr res; if (!((res = execute(in, custom_message)) @@ -106,7 +113,7 @@ public: ErrorCodes::FUNCTION_THROW_IF_VALUE_IS_NON_ZERO}; /// We return non constant to avoid constant folding. - return ColumnUInt8::create(in_data.size(), 0); + return ColumnUInt8::create(in_data.size(), 0); } return nullptr; diff --git a/tests/queries/0_stateless/02152_short_circuit_throw_if.reference b/tests/queries/0_stateless/02152_short_circuit_throw_if.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/02152_short_circuit_throw_if.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/02152_short_circuit_throw_if.sql b/tests/queries/0_stateless/02152_short_circuit_throw_if.sql new file mode 100644 index 00000000000..3fdc3cc48c8 --- /dev/null +++ b/tests/queries/0_stateless/02152_short_circuit_throw_if.sql @@ -0,0 +1,2 @@ +SELECT if(1, 0, throwIf(1, 'Executing FALSE branch')); +SELECT if(empty(''), 0, throwIf(1, 'Executing FALSE branch')); From 51477adf1bf85c92ebbadb6bc650f3407836ff8a Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 20 Dec 2021 15:55:07 +0300 Subject: [PATCH 34/37] Updated additional cases --- src/Access/Authentication.cpp | 2 +- src/Access/LDAPClient.cpp | 4 ++-- .../AggregateFunctionForEach.h | 2 +- .../AggregateFunctionHistogram.h | 2 +- src/AggregateFunctions/AggregateFunctionIf.cpp | 2 +- src/Common/Dwarf.cpp | 2 +- src/Common/TraceCollector.cpp | 2 +- src/Common/parseRemoteDescription.cpp | 2 +- src/Core/MySQL/Authentication.cpp | 4 ++-- src/Core/MySQL/MySQLGtid.cpp | 6 +++--- src/Core/MySQL/MySQLReplication.cpp | 16 ++++++++-------- src/Core/MySQL/PacketsProtocolText.cpp | 4 ++-- src/Dictionaries/PolygonDictionaryUtils.cpp | 2 +- src/Disks/S3/DiskS3.cpp | 2 +- src/Functions/CRC.cpp | 2 +- src/Functions/FunctionMathUnary.h | 2 +- src/Functions/FunctionsLogical.h | 4 ++-- src/Functions/array/mapOp.cpp | 4 ++-- src/Functions/formatString.h | 2 +- src/Functions/h3GetFaces.cpp | 2 +- src/Functions/isIPAddressContainedIn.cpp | 6 +++--- src/Functions/map.cpp | 2 +- src/Functions/pointInPolygon.cpp | 2 +- src/Functions/polygonArea.cpp | 2 +- src/Functions/polygonConvexHull.cpp | 2 +- src/Functions/polygonPerimeter.cpp | 2 +- src/Functions/polygonsDistance.cpp | 2 +- src/Functions/polygonsEquals.cpp | 2 +- src/Functions/polygonsSymDifference.cpp | 2 +- src/Functions/polygonsUnion.cpp | 2 +- src/Functions/polygonsWithin.cpp | 2 +- src/Functions/readWkt.cpp | 2 +- src/Functions/svg.cpp | 2 +- src/Functions/wkt.cpp | 2 +- src/IO/AIO.cpp | 2 +- src/Interpreters/ExpressionAnalyzer.cpp | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- .../Formats/Impl/AvroRowInputFormat.cpp | 10 +++++----- .../Formats/Impl/MySQLOutputFormat.cpp | 4 ++-- .../Formats/Impl/PostgreSQLOutputFormat.cpp | 2 +- .../Transforms/AggregatingTransform.cpp | 2 +- src/Processors/Transforms/WindowTransform.cpp | 4 ++-- .../tests/gtest_blocks_size_merging_streams.cpp | 2 +- .../tests/gtest_check_sorted_stream.cpp | 6 +++--- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 2 +- .../MergeTree/MergeTreeIndexFullText.cpp | 2 +- src/Storages/RocksDB/EmbeddedRocksDBSink.cpp | 2 +- src/Storages/StorageBuffer.cpp | 2 +- 48 files changed, 72 insertions(+), 72 deletions(-) diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index 794c0a0d5d5..6bc9aeec4c2 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -54,7 +54,7 @@ namespace const Poco::SHA1Engine::Digest & digest = engine.digest(); Poco::SHA1Engine::Digest calculated_password_sha1(sha1_size); - for (size_t i = 0; i < sha1_size; i++) + for (size_t i = 0; i < sha1_size; ++i) calculated_password_sha1[i] = scrambled_password[i] ^ digest[i]; auto calculated_password_double_sha1 = Util::encodeSHA1(calculated_password_sha1); diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index c666520c069..49d01074f6a 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -448,7 +448,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) vals = nullptr; }); - for (std::size_t i = 0; vals[i]; i++) + for (size_t i = 0; vals[i]; ++i) { if (vals[i]->bv_val && vals[i]->bv_len > 0) result.emplace(vals[i]->bv_val, vals[i]->bv_len); @@ -473,7 +473,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) referrals = nullptr; }); - for (std::size_t i = 0; referrals[i]; i++) + for (size_t i = 0; referrals[i]; ++i) { LOG_WARNING(&Poco::Logger::get("LDAPClient"), "Received reference during LDAP search but not following it: {}", referrals[i]); } diff --git a/src/AggregateFunctions/AggregateFunctionForEach.h b/src/AggregateFunctions/AggregateFunctionForEach.h index 0de6272d23e..064b7b00c86 100644 --- a/src/AggregateFunctions/AggregateFunctionForEach.h +++ b/src/AggregateFunctions/AggregateFunctionForEach.h @@ -90,7 +90,7 @@ private: throw; } - for (i = 0; i < old_size; i++) + for (i = 0; i < old_size; ++i) { nested_func->merge(&new_state[i * nested_size_of_data], &old_state[i * nested_size_of_data], diff --git a/src/AggregateFunctions/AggregateFunctionHistogram.h b/src/AggregateFunctions/AggregateFunctionHistogram.h index 665e505aa4e..b858c6b628c 100644 --- a/src/AggregateFunctions/AggregateFunctionHistogram.h +++ b/src/AggregateFunctions/AggregateFunctionHistogram.h @@ -271,7 +271,7 @@ public: { lower_bound = std::min(lower_bound, other.lower_bound); upper_bound = std::max(upper_bound, other.upper_bound); - for (size_t i = 0; i < other.size; i++) + for (size_t i = 0; i < other.size; ++i) add(other.points[i].mean, other.points[i].weight, max_bins); } diff --git a/src/AggregateFunctions/AggregateFunctionIf.cpp b/src/AggregateFunctions/AggregateFunctionIf.cpp index 4ac6a2dce21..d752900c018 100644 --- a/src/AggregateFunctions/AggregateFunctionIf.cpp +++ b/src/AggregateFunctions/AggregateFunctionIf.cpp @@ -56,7 +56,7 @@ static bool ALWAYS_INLINE inline is_all_zeros(const UInt8 * flags, size_t size) i += 8; } - for (; i < size; i++) + for (; i < size; ++i) if (flags[i]) return false; diff --git a/src/Common/Dwarf.cpp b/src/Common/Dwarf.cpp index 18e9315d5c3..a85bbe818b5 100644 --- a/src/Common/Dwarf.cpp +++ b/src/Common/Dwarf.cpp @@ -838,7 +838,7 @@ bool Dwarf::findLocation( // The next inlined subroutine's call file and call line is the current // caller's location. - for (size_t i = 0; i < num_found - 1; i++) + for (size_t i = 0; i < num_found - 1; ++i) { call_locations[i].file = call_locations[i + 1].file; call_locations[i].line = call_locations[i + 1].line; diff --git a/src/Common/TraceCollector.cpp b/src/Common/TraceCollector.cpp index d84202449d1..523251fa2a2 100644 --- a/src/Common/TraceCollector.cpp +++ b/src/Common/TraceCollector.cpp @@ -153,7 +153,7 @@ void TraceCollector::run() Array trace; trace.reserve(trace_size); - for (size_t i = 0; i < trace_size; i++) + for (size_t i = 0; i < trace_size; ++i) { uintptr_t addr = 0; readPODBinary(addr, in); diff --git a/src/Common/parseRemoteDescription.cpp b/src/Common/parseRemoteDescription.cpp index 7c8053037ea..fa5d3a8fbd5 100644 --- a/src/Common/parseRemoteDescription.cpp +++ b/src/Common/parseRemoteDescription.cpp @@ -41,7 +41,7 @@ static void append(std::vector & to, const std::vector & what, s static bool parseNumber(const String & description, size_t l, size_t r, size_t & res) { res = 0; - for (size_t pos = l; pos < r; pos ++) + for (size_t pos = l; pos < r; ++pos) { if (!isNumericASCII(description[pos])) return false; diff --git a/src/Core/MySQL/Authentication.cpp b/src/Core/MySQL/Authentication.cpp index 4dd20ff585e..0492211c51f 100644 --- a/src/Core/MySQL/Authentication.cpp +++ b/src/Core/MySQL/Authentication.cpp @@ -71,7 +71,7 @@ Native41::Native41(const String & password_, const String & scramble_) const Poco::SHA1Engine::Digest & digest = engine3.digest(); scramble.resize(SCRAMBLE_LENGTH); - for (size_t i = 0; i < SCRAMBLE_LENGTH; i++) + for (size_t i = 0; i < SCRAMBLE_LENGTH; ++i) scramble[i] = static_cast(password_sha1[i] ^ digest[i]); } @@ -191,7 +191,7 @@ void Sha256Password::authenticate( } password.resize(plaintext_size); - for (int i = 0; i < plaintext_size; i++) + for (int i = 0; i < plaintext_size; ++i) { password[i] = plaintext[i] ^ static_cast(scramble[i % SCRAMBLE_LENGTH]); } diff --git a/src/Core/MySQL/MySQLGtid.cpp b/src/Core/MySQL/MySQLGtid.cpp index a441bccb076..bfd0bd02b45 100644 --- a/src/Core/MySQL/MySQLGtid.cpp +++ b/src/Core/MySQL/MySQLGtid.cpp @@ -41,7 +41,7 @@ void GTIDSets::parse(const String gtid_format) GTIDSet set; set.uuid = DB::parse(server_ids[0]); - for (size_t k = 1; k < server_ids.size(); k++) + for (size_t k = 1; k < server_ids.size(); ++k) { std::vector inters; boost::split(inters, server_ids[k], [](char c) { return c == '-'; }); @@ -74,7 +74,7 @@ void GTIDSets::update(const GTID & other) { if (set.uuid == other.uuid) { - for (auto i = 0U; i < set.intervals.size(); i++) + for (auto i = 0U; i < set.intervals.size(); ++i) { auto & current = set.intervals[i]; @@ -134,7 +134,7 @@ String GTIDSets::toString() const { WriteBufferFromOwnString buffer; - for (size_t i = 0; i < sets.size(); i++) + for (size_t i = 0; i < sets.size(); ++i) { GTIDSet set = sets[i]; writeUUIDText(set.uuid, buffer); diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp index f734154f4ba..663d246b0d3 100644 --- a/src/Core/MySQL/MySQLReplication.cpp +++ b/src/Core/MySQL/MySQLReplication.cpp @@ -159,7 +159,7 @@ namespace MySQLReplication payload.ignore(1); column_count = readLengthEncodedNumber(payload); - for (auto i = 0U; i < column_count; i++) + for (auto i = 0U; i < column_count; ++i) { UInt8 v = 0x00; payload.readStrict(reinterpret_cast(&v), 1); @@ -188,7 +188,7 @@ namespace MySQLReplication { auto pos = 0; column_meta.reserve(column_count); - for (auto i = 0U; i < column_count; i++) + for (auto i = 0U; i < column_count; ++i) { UInt16 typ = column_type[i]; switch (typ) @@ -255,7 +255,7 @@ namespace MySQLReplication out << "Table Len: " << std::to_string(this->table_len) << '\n'; out << "Table: " << this->table << '\n'; out << "Column Count: " << this->column_count << '\n'; - for (auto i = 0U; i < column_count; i++) + for (UInt32 i = 0; i < column_count; ++i) { out << "Column Type [" << i << "]: " << std::to_string(column_type[i]) << ", Meta: " << column_meta[i] << '\n'; } @@ -312,7 +312,7 @@ namespace MySQLReplication UInt32 null_index = 0; UInt32 re_count = 0; - for (auto i = 0U; i < number_columns; i++) + for (UInt32 i = 0; i < number_columns; ++i) { if (bitmap[i]) re_count++; @@ -321,7 +321,7 @@ namespace MySQLReplication boost::dynamic_bitset<> columns_null_set; readBitmap(payload, columns_null_set, re_count); - for (auto i = 0U; i < number_columns; i++) + for (UInt32 i = 0; i < number_columns; ++i) { UInt32 field_len = 0; @@ -523,7 +523,7 @@ namespace MySQLReplication res += (val ^ (mask & compressed_integer_align_numbers[compressed_integers])); } - for (auto k = 0U; k < uncompressed_integers; k++) + for (size_t k = 0; k < uncompressed_integers; ++k) { UInt32 val = 0; readBigEndianStrict(payload, reinterpret_cast(&val), 4); @@ -536,7 +536,7 @@ namespace MySQLReplication size_t uncompressed_decimals = scale / digits_per_integer; size_t compressed_decimals = scale - (uncompressed_decimals * digits_per_integer); - for (auto k = 0U; k < uncompressed_decimals; k++) + for (size_t k = 0; k < uncompressed_decimals; ++k) { UInt32 val = 0; readBigEndianStrict(payload, reinterpret_cast(&val), 4); @@ -669,7 +669,7 @@ namespace MySQLReplication header.dump(out); out << "Schema: " << this->schema << '\n'; out << "Table: " << this->table << '\n'; - for (auto i = 0U; i < rows.size(); i++) + for (size_t i = 0; i < rows.size(); ++i) { out << "Row[" << i << "]: " << applyVisitor(to_string, rows[i]) << '\n'; } diff --git a/src/Core/MySQL/PacketsProtocolText.cpp b/src/Core/MySQL/PacketsProtocolText.cpp index 0494a146c47..728e8061e87 100644 --- a/src/Core/MySQL/PacketsProtocolText.cpp +++ b/src/Core/MySQL/PacketsProtocolText.cpp @@ -15,7 +15,7 @@ namespace ProtocolText ResultSetRow::ResultSetRow(const Serializations & serializations, const Columns & columns_, int row_num_) : columns(columns_), row_num(row_num_) { - for (size_t i = 0; i < columns.size(); i++) + for (size_t i = 0; i < columns.size(); ++i) { if (columns[i]->isNullAt(row_num)) { @@ -39,7 +39,7 @@ size_t ResultSetRow::getPayloadSize() const void ResultSetRow::writePayloadImpl(WriteBuffer & buffer) const { - for (size_t i = 0; i < columns.size(); i++) + for (size_t i = 0; i < columns.size(); ++i) { if (columns[i]->isNullAt(row_num)) buffer.write(serialized[i].data(), 1); diff --git a/src/Dictionaries/PolygonDictionaryUtils.cpp b/src/Dictionaries/PolygonDictionaryUtils.cpp index fced18a6f88..15267481c0b 100644 --- a/src/Dictionaries/PolygonDictionaryUtils.cpp +++ b/src/Dictionaries/PolygonDictionaryUtils.cpp @@ -151,7 +151,7 @@ void SlabsPolygonIndex::indexBuild(const std::vector & polygons) } } - for (size_t i = 0; i != all_edges.size(); i++) + for (size_t i = 0; i != all_edges.size(); ++i) { size_t l = edge_left[i]; size_t r = edge_right[i]; diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index 0e2f75505fa..29f40a52bd5 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -355,7 +355,7 @@ void DiskS3::findLastRevision() /// Construct revision number from high to low bits. String revision; revision.reserve(64); - for (int bit = 0; bit < 64; bit++) + for (int bit = 0; bit < 64; ++bit) { auto revision_prefix = revision + "1"; diff --git a/src/Functions/CRC.cpp b/src/Functions/CRC.cpp index 00aa631c85b..abcf137f2e7 100644 --- a/src/Functions/CRC.cpp +++ b/src/Functions/CRC.cpp @@ -33,7 +33,7 @@ struct CRCImpl static CRCBase base(polynomial); T crc = 0; - for (size_t i = 0; i < size; i++) + for (size_t i = 0; i < size; ++i) crc = base.tab[(crc ^ buf[i]) & 0xff] ^ (crc >> 8); return crc; } diff --git a/src/Functions/FunctionMathUnary.h b/src/Functions/FunctionMathUnary.h index 2d39daac366..d9ca162ba16 100644 --- a/src/Functions/FunctionMathUnary.h +++ b/src/Functions/FunctionMathUnary.h @@ -94,7 +94,7 @@ private: Impl::execute(src_remaining, dst_remaining); if constexpr (is_big_int_v || std::is_same_v) - for (size_t i = 0; i < rows_remaining; i++) + for (size_t i = 0; i < rows_remaining; ++i) dst_data[rows_size + i] = dst_remaining[i]; else memcpy(&dst_data[rows_size], dst_remaining, rows_remaining * sizeof(ReturnType)); diff --git a/src/Functions/FunctionsLogical.h b/src/Functions/FunctionsLogical.h index 3ddf7ea84eb..7d4f5489e86 100644 --- a/src/Functions/FunctionsLogical.h +++ b/src/Functions/FunctionsLogical.h @@ -185,7 +185,7 @@ public: if constexpr (!Impl::isSaturable()) { auto * result = nativeBoolCast(b, types[0], values[0]); - for (size_t i = 1; i < types.size(); i++) + for (size_t i = 1; i < types.size(); ++i) result = Impl::apply(b, result, nativeBoolCast(b, types[i], values[i])); return b.CreateSelect(result, b.getInt8(1), b.getInt8(0)); } @@ -194,7 +194,7 @@ public: auto * stop = llvm::BasicBlock::Create(next->getContext(), "", next->getParent()); b.SetInsertPoint(stop); auto * phi = b.CreatePHI(b.getInt8Ty(), values.size()); - for (size_t i = 0; i < types.size(); i++) + for (size_t i = 0; i < types.size(); ++i) { b.SetInsertPoint(next); auto * value = values[i]; diff --git a/src/Functions/array/mapOp.cpp b/src/Functions/array/mapOp.cpp index a5913105146..b928254e454 100644 --- a/src/Functions/array/mapOp.cpp +++ b/src/Functions/array/mapOp.cpp @@ -204,7 +204,7 @@ private: std::map summing_map; - for (size_t i = 0; i < row_count; i++) + for (size_t i = 0; i < row_count; ++i) { [[maybe_unused]] bool first = true; for (auto & arg : args) @@ -222,7 +222,7 @@ private: } Field temp_val; - for (size_t j = 0; j < len; j++) + for (size_t j = 0; j < len; ++j) { KeyType key; if constexpr (std::is_same::value) diff --git a/src/Functions/formatString.h b/src/Functions/formatString.h index c72e7db9579..419ecf1c773 100644 --- a/src/Functions/formatString.h +++ b/src/Functions/formatString.h @@ -42,7 +42,7 @@ struct FormatImpl static void parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 & res) { res = 0; - for (UInt64 pos = l; pos < r; pos++) + for (UInt64 pos = l; pos < r; ++pos) { if (!isNumericASCII(description[pos])) throw Exception("Not a number in curly braces at position " + std::to_string(pos), ErrorCodes::BAD_ARGUMENTS); diff --git a/src/Functions/h3GetFaces.cpp b/src/Functions/h3GetFaces.cpp index e67ab15128f..c0300e7212b 100644 --- a/src/Functions/h3GetFaces.cpp +++ b/src/Functions/h3GetFaces.cpp @@ -73,7 +73,7 @@ public: // function name h3GetFaces (v3.x) changed to getIcosahedronFaces (v4.0.0). getIcosahedronFaces(data[row], faces.data()); - for (int i = 0; i < max_faces; i++) + for (int i = 0; i < max_faces; ++i) { // valid icosahedron faces are represented by integers 0-19 if (faces[i] >= 0 && faces[i] <= 19) diff --git a/src/Functions/isIPAddressContainedIn.cpp b/src/Functions/isIPAddressContainedIn.cpp index 048fa04adb1..3d2a38ef4c0 100644 --- a/src/Functions/isIPAddressContainedIn.cpp +++ b/src/Functions/isIPAddressContainedIn.cpp @@ -210,7 +210,7 @@ namespace DB ColumnUInt8::MutablePtr col_res = ColumnUInt8::create(input_rows_count); ColumnUInt8::Container & vec_res = col_res->getData(); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { const auto cidr = parseIPWithCIDR(col_cidr.getDataAt(i)); vec_res[i] = isAddressInRange(addr, cidr) ? 1 : 0; @@ -227,7 +227,7 @@ namespace DB ColumnUInt8::MutablePtr col_res = ColumnUInt8::create(input_rows_count); ColumnUInt8::Container & vec_res = col_res->getData(); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { const auto addr = IPAddressVariant(col_addr.getDataAt(i)); vec_res[i] = isAddressInRange(addr, cidr) ? 1 : 0; @@ -241,7 +241,7 @@ namespace DB ColumnUInt8::MutablePtr col_res = ColumnUInt8::create(input_rows_count); ColumnUInt8::Container & vec_res = col_res->getData(); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { const auto addr = IPAddressVariant(col_addr.getDataAt(i)); const auto cidr = parseIPWithCIDR(col_cidr.getDataAt(i)); diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index 0dda46e16d9..4e242c4348b 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -492,7 +492,7 @@ public: auto res = func_like.executeImpl(new_arguments, result_type, input_rows_count); const auto & container = checkAndGetColumn(res.get())->getData(); - for (size_t row_num = 0; row_num < element_size; row_num++) + for (size_t row_num = 0; row_num < element_size; ++row_num) { if (container[row_num] == 1) { diff --git a/src/Functions/pointInPolygon.cpp b/src/Functions/pointInPolygon.cpp index 03e46541cdf..c3a9c411cbc 100644 --- a/src/Functions/pointInPolygon.cpp +++ b/src/Functions/pointInPolygon.cpp @@ -139,7 +139,7 @@ public: } else { - for (size_t i = 1; i < arguments.size(); i++) + for (size_t i = 1; i < arguments.size(); ++i) { const auto * array = checkAndGetDataType(arguments[i].get()); if (array == nullptr) diff --git a/src/Functions/polygonArea.cpp b/src/Functions/polygonArea.cpp index 2e38d6c74b9..c4c573490f6 100644 --- a/src/Functions/polygonArea.cpp +++ b/src/Functions/polygonArea.cpp @@ -78,7 +78,7 @@ public: { auto geometries = Converter::convert(arguments[0].column->convertToFullColumnIfConst()); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) res_data.emplace_back(boost::geometry::area(geometries[i])); } } diff --git a/src/Functions/polygonConvexHull.cpp b/src/Functions/polygonConvexHull.cpp index 887a12b8b6a..e8756f11bba 100644 --- a/src/Functions/polygonConvexHull.cpp +++ b/src/Functions/polygonConvexHull.cpp @@ -75,7 +75,7 @@ public: { auto geometries = Converter::convert(arguments[0].column->convertToFullColumnIfConst()); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { Polygon convex_hull{}; boost::geometry::convex_hull(geometries[i], convex_hull); diff --git a/src/Functions/polygonPerimeter.cpp b/src/Functions/polygonPerimeter.cpp index 8291020197a..eedb91a1622 100644 --- a/src/Functions/polygonPerimeter.cpp +++ b/src/Functions/polygonPerimeter.cpp @@ -77,7 +77,7 @@ public: { auto geometries = Converter::convert(arguments[0].column->convertToFullColumnIfConst()); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) res_data.emplace_back(boost::geometry::perimeter(geometries[i])); } } diff --git a/src/Functions/polygonsDistance.cpp b/src/Functions/polygonsDistance.cpp index 8dd88e1c3bd..51c0198b465 100644 --- a/src/Functions/polygonsDistance.cpp +++ b/src/Functions/polygonsDistance.cpp @@ -83,7 +83,7 @@ public: auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); auto second = RightConverter::convert(arguments[1].column->convertToFullColumnIfConst()); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { boost::geometry::correct(first[i]); boost::geometry::correct(second[i]); diff --git a/src/Functions/polygonsEquals.cpp b/src/Functions/polygonsEquals.cpp index da1db43229b..5c572a16d0e 100644 --- a/src/Functions/polygonsEquals.cpp +++ b/src/Functions/polygonsEquals.cpp @@ -82,7 +82,7 @@ public: auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); auto second = RightConverter::convert(arguments[1].column->convertToFullColumnIfConst()); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { boost::geometry::correct(first[i]); boost::geometry::correct(second[i]); diff --git a/src/Functions/polygonsSymDifference.cpp b/src/Functions/polygonsSymDifference.cpp index 8ef0142072a..4f718760124 100644 --- a/src/Functions/polygonsSymDifference.cpp +++ b/src/Functions/polygonsSymDifference.cpp @@ -81,7 +81,7 @@ public: auto second = RightConverter::convert(arguments[1].column->convertToFullColumnIfConst()); /// NOLINTNEXTLINE(clang-analyzer-core.uninitialized.Assign) - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { boost::geometry::correct(first[i]); boost::geometry::correct(second[i]); diff --git a/src/Functions/polygonsUnion.cpp b/src/Functions/polygonsUnion.cpp index 770aa14ac52..e0c6f208c91 100644 --- a/src/Functions/polygonsUnion.cpp +++ b/src/Functions/polygonsUnion.cpp @@ -82,7 +82,7 @@ public: /// We are not interested in some pitfalls in third-party libraries /// NOLINTNEXTLINE(clang-analyzer-core.uninitialized.Assign) - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { /// Orient the polygons correctly. boost::geometry::correct(first[i]); diff --git a/src/Functions/polygonsWithin.cpp b/src/Functions/polygonsWithin.cpp index 66e5b4e6e17..0412c9a656d 100644 --- a/src/Functions/polygonsWithin.cpp +++ b/src/Functions/polygonsWithin.cpp @@ -85,7 +85,7 @@ public: auto second = RightConverter::convert(arguments[1].column->convertToFullColumnIfConst()); /// NOLINTNEXTLINE(clang-analyzer-core.uninitialized.Assign) - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { boost::geometry::correct(first[i]); boost::geometry::correct(second[i]); diff --git a/src/Functions/readWkt.cpp b/src/Functions/readWkt.cpp index c3ae6516e0f..b8d0d20acb3 100644 --- a/src/Functions/readWkt.cpp +++ b/src/Functions/readWkt.cpp @@ -55,7 +55,7 @@ public: Serializer serializer; Geometry geometry; - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { const auto & str = column_string->getDataAt(i).toString(); boost::geometry::read_wkt(str, geometry); diff --git a/src/Functions/svg.cpp b/src/Functions/svg.cpp index b3a89c0393c..e1d48ffc061 100644 --- a/src/Functions/svg.cpp +++ b/src/Functions/svg.cpp @@ -79,7 +79,7 @@ public: auto figures = Converter::convert(arguments[0].column->convertToFullColumnIfConst()); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { std::stringstream str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM boost::geometry::correct(figures[i]); diff --git a/src/Functions/wkt.cpp b/src/Functions/wkt.cpp index 8fbb8f59d33..732441eeef2 100644 --- a/src/Functions/wkt.cpp +++ b/src/Functions/wkt.cpp @@ -49,7 +49,7 @@ public: auto figures = Converter::convert(arguments[0].column->convertToFullColumnIfConst()); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { std::stringstream str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM str << boost::geometry::wkt(figures[i]); diff --git a/src/IO/AIO.cpp b/src/IO/AIO.cpp index 777d9bbbc7f..97e5a470463 100644 --- a/src/IO/AIO.cpp +++ b/src/IO/AIO.cpp @@ -95,7 +95,7 @@ int io_destroy(int ctx) int io_submit(int ctx, long nr, struct iocb * iocbpp[]) { - for (long i = 0; i < nr; i++) + for (long i = 0; i < nr; ++i) { struct aiocb * iocb = &iocbpp[i]->aio; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index f71675910d8..abf1ae5472b 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1973,7 +1973,7 @@ std::string ExpressionAnalysisResult::dump() const if (!selected_columns.empty()) { ss << "selected_columns "; - for (size_t i = 0; i < selected_columns.size(); i++) + for (size_t i = 0; i < selected_columns.size(); ++i) { if (i > 0) { diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 26a20f73d24..8677cf59d79 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -375,7 +375,7 @@ BlockIO InterpreterInsertQuery::execute() pipeline = interpreter_watch.buildQueryPipeline(); } - for (size_t i = 0; i < out_streams_size; i++) + for (size_t i = 0; i < out_streams_size; ++i) { auto out = buildChainImpl(table, metadata_snapshot, query_sample_block, nullptr, nullptr); out_chains.emplace_back(std::move(out)); diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 1f806d47c45..ba0d9490618 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -280,7 +280,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node for (size_t n = decoder.arrayStart(); n != 0; n = decoder.arrayNext()) { total += n; - for (size_t i = 0; i < n; i++) + for (size_t i = 0; i < n; ++i) { nested_deserialize(nested_column, decoder); } @@ -344,7 +344,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node if (target.isString()) { std::vector symbols; - for (size_t i = 0; i < root_node->names(); i++) + for (size_t i = 0; i < root_node->names(); ++i) { symbols.push_back(root_node->nameAt(i)); } @@ -359,7 +359,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node { const auto & enum_type = dynamic_cast(*target_type); Row symbol_mapping; - for (size_t i = 0; i < root_node->names(); i++) + for (size_t i = 0; i < root_node->names(); ++i) { symbol_mapping.push_back(enum_type.castToValue(root_node->nameAt(i))); } @@ -443,7 +443,7 @@ AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(avro::NodePtr root_node) case avro::AVRO_UNION: { std::vector union_skip_fns; - for (size_t i = 0; i < root_node->leaves(); i++) + for (size_t i = 0; i < root_node->leaves(); ++i) { union_skip_fns.push_back(createSkipFn(root_node->leafAt(i))); } @@ -476,7 +476,7 @@ AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(avro::NodePtr root_node) case avro::AVRO_RECORD: { std::vector field_skip_fns; - for (size_t i = 0; i < root_node->leaves(); i++) + for (size_t i = 0; i < root_node->leaves(); ++i) { field_skip_fns.push_back(createSkipFn(root_node->leafAt(i))); } diff --git a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp index 5033176ca4b..74070252ebb 100644 --- a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp @@ -48,7 +48,7 @@ void MySQLOutputFormat::writePrefix() { packet_endpoint->sendPacket(LengthEncodedNumber(header.columns())); - for (size_t i = 0; i < header.columns(); i++) + for (size_t i = 0; i < header.columns(); ++i) { const auto & column_name = header.getColumnsWithTypeAndName()[i].name; packet_endpoint->sendPacket(getColumnDefinition(column_name, data_types[i]->getTypeId())); @@ -63,7 +63,7 @@ void MySQLOutputFormat::writePrefix() void MySQLOutputFormat::consume(Chunk chunk) { - for (size_t i = 0; i < chunk.getNumRows(); i++) + for (size_t i = 0; i < chunk.getNumRows(); ++i) { ProtocolText::ResultSetRow row_packet(serializations, chunk.getColumns(), i); packet_endpoint->sendPacket(row_packet); diff --git a/src/Processors/Formats/Impl/PostgreSQLOutputFormat.cpp b/src/Processors/Formats/Impl/PostgreSQLOutputFormat.cpp index f46488fd0a8..0450051daf8 100644 --- a/src/Processors/Formats/Impl/PostgreSQLOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PostgreSQLOutputFormat.cpp @@ -21,7 +21,7 @@ void PostgreSQLOutputFormat::writePrefix() std::vector columns; columns.reserve(header.columns()); - for (size_t i = 0; i < header.columns(); i++) + for (size_t i = 0; i < header.columns(); ++i) { const auto & column_name = header.getColumnsWithTypeAndName()[i].name; columns.emplace_back(column_name, data_types[i]->getTypeId()); diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 8357a997960..5b58530f3d5 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -329,7 +329,7 @@ private: if (num_threads > first->aggregates_pools.size()) { Arenas & first_pool = first->aggregates_pools; - for (size_t j = first_pool.size(); j < num_threads; j++) + for (size_t j = first_pool.size(); j < num_threads; ++j) first_pool.emplace_back(std::make_shared()); } diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 7a3bb25d2c6..0da7541556b 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -385,7 +385,7 @@ void WindowTransform::advancePartitionEnd() // prev_frame_start, partition_end); size_t i = 0; - for (; i < partition_by_columns; i++) + for (; i < partition_by_columns; ++i) { const auto * reference_column = inputAt(prev_frame_start)[partition_by_indices[i]].get(); @@ -667,7 +667,7 @@ bool WindowTransform::arePeers(const RowNumber & x, const RowNumber & y) const } size_t i = 0; - for (; i < n; i++) + for (; i < n; ++i) { const auto * column_x = inputAt(x)[order_by_indices[i]].get(); const auto * column_y = inputAt(y)[order_by_indices[i]].get(); diff --git a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp index fb10601216e..e19d2c7114b 100644 --- a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp +++ b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp @@ -16,7 +16,7 @@ static Block getBlockWithSize(const std::vector & columns, size_t r ColumnsWithTypeAndName cols; size_t size_of_row_in_bytes = columns.size() * sizeof(UInt64); - for (size_t i = 0; i * sizeof(UInt64) < size_of_row_in_bytes; i++) + for (size_t i = 0; i * sizeof(UInt64) < size_of_row_in_bytes; ++i) { auto column = ColumnUInt64::create(rows, 0); for (size_t j = 0; j < rows; ++j) diff --git a/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp b/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp index 751f7ef8635..7b30958f0c4 100644 --- a/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp +++ b/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp @@ -29,7 +29,7 @@ static Block getSortedBlockWithSize( { ColumnsWithTypeAndName cols; size_t size_of_row_in_bytes = columns.size() * sizeof(UInt64); - for (size_t i = 0; i * sizeof(UInt64) < size_of_row_in_bytes; i++) + for (size_t i = 0; i * sizeof(UInt64) < size_of_row_in_bytes; ++i) { auto column = ColumnUInt64::create(rows, 0); for (size_t j = 0; j < rows; ++j) @@ -47,7 +47,7 @@ static Block getUnSortedBlockWithSize(const std::vector & columns, { ColumnsWithTypeAndName cols; size_t size_of_row_in_bytes = columns.size() * sizeof(UInt64); - for (size_t i = 0; i * sizeof(UInt64) < size_of_row_in_bytes; i++) + for (size_t i = 0; i * sizeof(UInt64) < size_of_row_in_bytes; ++i) { auto column = ColumnUInt64::create(rows, 0); for (size_t j = 0; j < rows; ++j) @@ -71,7 +71,7 @@ static Block getEqualValuesBlockWithSize( { ColumnsWithTypeAndName cols; size_t size_of_row_in_bytes = columns.size() * sizeof(UInt64); - for (size_t i = 0; i * sizeof(UInt64) < size_of_row_in_bytes; i++) + for (size_t i = 0; i * sizeof(UInt64) < size_of_row_in_bytes; ++i) { auto column = ColumnUInt64::create(rows, 0); for (size_t j = 0; j < rows; ++j) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index f49c31cba0c..83328594363 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1357,7 +1357,7 @@ String IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix, bool else if (parent_part) full_relative_path /= parent_part->relative_path; - for (int try_no = 0; try_no < 10; try_no++) + for (int try_no = 0; try_no < 10; ++try_no) { res = (prefix.empty() ? "" : prefix + "_") + name + (try_no ? "_try" + DB::toString(try_no) : ""); diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index a8820b3f6d4..9332f4fd442 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -112,7 +112,7 @@ void MergeTreeIndexAggregatorFullText::update(const Block & block, size_t * pos, size_t element_start_row = column_offsets[current_position - 1]; size_t elements_size = column_offsets[current_position] - element_start_row; - for (size_t row_num = 0; row_num < elements_size; row_num++) + for (size_t row_num = 0; row_num < elements_size; ++row_num) { auto ref = column_key.getDataAt(element_start_row + row_num); token_extractor->stringPaddedToBloomFilter(ref.data, ref.size, granule->bloom_filters[col]); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp index 1c918c15775..b42f2214d88 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp @@ -38,7 +38,7 @@ void EmbeddedRocksDBSink::consume(Chunk chunk) rocksdb::WriteBatch batch; rocksdb::Status status; - for (size_t i = 0; i < rows; i++) + for (size_t i = 0; i < rows; ++i) { wb_key.restart(); wb_value.restart(); diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 6417aa9f72c..f5526781f41 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -1108,7 +1108,7 @@ void registerStorageBuffer(StorageFactory & factory) // After we evaluated all expressions, check that all arguments are // literals. - for (size_t i = 0; i < engine_args.size(); i++) + for (size_t i = 0; i < engine_args.size(); ++i) { if (!typeid_cast(engine_args[i].get())) { From 60d4295d8a1f599f338e325934dd89771f651e6b Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 20 Dec 2021 16:00:43 +0300 Subject: [PATCH 35/37] FunctionThrowIf added comment --- src/Functions/throwIf.cpp | 45 +++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/src/Functions/throwIf.cpp b/src/Functions/throwIf.cpp index 2af6b7eede8..d2af6781764 100644 --- a/src/Functions/throwIf.cpp +++ b/src/Functions/throwIf.cpp @@ -48,16 +48,21 @@ public: const size_t number_of_arguments = arguments.size(); if (number_of_arguments < 1 || number_of_arguments > 2) - throw Exception{"Number of arguments for function " + getName() + " doesn't match: passed " - + toString(number_of_arguments) + ", should be 1 or 2", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1 or 2", + getName(), + toString(number_of_arguments)); if (!isNativeNumber(arguments[0])) - throw Exception{"Argument for function " + getName() + " must be number", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument for function {} must be number", + getName()); if (number_of_arguments > 1 && !isString(arguments[1])) - throw Exception{"Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}", + arguments[1]->getName(), + getName()); return std::make_shared(); @@ -65,6 +70,10 @@ public: bool useDefaultImplementationForConstants() const override { return false; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + + /** Prevent constant folding for FunctionThrowIf because for short circuit evaluation + * it is unsafe to evaluate this function during DAG analysis. + */ bool isSuitableForConstantFolding() const override { return false; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override @@ -75,16 +84,17 @@ public: std::optional custom_message; if (arguments.size() == 2) { - const auto * msg_column = checkAndGetColumnConst(arguments[1].column.get()); - if (!msg_column) - throw Exception{"Second argument for function " + getName() + " must be constant String", ErrorCodes::ILLEGAL_COLUMN}; - custom_message = msg_column->getValue(); + const auto * message_column = checkAndGetColumnConst(arguments[1].column.get()); + if (!message_column) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Second argument for function {} must be constant String", + getName()); + + custom_message = message_column->getValue(); } auto first_argument_column = arguments.front().column; - auto first_argument_column_non_const = first_argument_column->convertToFullColumnIfConst(); - - const auto * in = first_argument_column_non_const.get(); + const auto * in = first_argument_column.get(); ColumnPtr res; if (!((res = execute(in, custom_message)) @@ -97,7 +107,9 @@ public: || (res = execute(in, custom_message)) || (res = execute(in, custom_message)) || (res = execute(in, custom_message)))) + { throw Exception{"Illegal column " + in->getName() + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; + } return res; } @@ -105,7 +117,12 @@ public: template ColumnPtr execute(const IColumn * in_untyped, const std::optional & message) const { - if (const auto in = checkAndGetColumn>(in_untyped)) + const auto * in = checkAndGetColumn>(in_untyped); + + if (!in) + in = checkAndGetColumnConstData>(in_untyped); + + if (in) { const auto & in_data = in->getData(); if (!memoryIsZero(in_data.data(), in_data.size() * sizeof(in_data[0]))) From 02b6ad52efc760c04e4ce0eb25e8b31d4a7163d4 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 20 Dec 2021 16:55:15 +0300 Subject: [PATCH 36/37] FunctionThrowIf fixed exception --- src/Functions/throwIf.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Functions/throwIf.cpp b/src/Functions/throwIf.cpp index d2af6781764..7533e30c9b9 100644 --- a/src/Functions/throwIf.cpp +++ b/src/Functions/throwIf.cpp @@ -126,8 +126,10 @@ public: { const auto & in_data = in->getData(); if (!memoryIsZero(in_data.data(), in_data.size() * sizeof(in_data[0]))) - throw Exception{message.value_or("Value passed to '" + getName() + "' function is non zero"), - ErrorCodes::FUNCTION_THROW_IF_VALUE_IS_NON_ZERO}; + { + throw Exception(ErrorCodes::FUNCTION_THROW_IF_VALUE_IS_NON_ZERO, + message.value_or("Value passed to '" + getName() + "' function is non zero")); + } /// We return non constant to avoid constant folding. return ColumnUInt8::create(in_data.size(), 0); From 7dea7b7f760ddfacd79c36bac5336b37ee45620e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 20 Dec 2021 18:18:54 +0300 Subject: [PATCH 37/37] Decrease log level for some s3 messages. --- .../AzureBlobStorage/DiskAzureBlobStorage.cpp | 2 +- src/Disks/DiskCacheWrapper.cpp | 10 +++++----- src/Disks/HDFS/DiskHDFS.cpp | 2 +- src/Disks/IDiskRemote.cpp | 6 +++--- src/Disks/S3/DiskS3.cpp | 2 +- src/IO/ReadBufferFromS3.cpp | 4 ++-- src/IO/S3/PocoHTTPClient.cpp | 14 +++++++------- src/IO/S3Common.cpp | 4 ++-- 8 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp index 7883bc0b537..0b65f09338c 100644 --- a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp +++ b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp @@ -67,7 +67,7 @@ std::unique_ptr DiskAzureBlobStorage::readFile( auto settings = current_settings.get(); auto metadata = readMeta(path); - LOG_TRACE(log, "Read from file by path: {}", backQuote(metadata_disk->getPath() + path)); + LOG_TEST(log, "Read from file by path: {}", backQuote(metadata_disk->getPath() + path)); bool threadpool_read = read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp index e1e901f0d45..b09487c17bc 100644 --- a/src/Disks/DiskCacheWrapper.cpp +++ b/src/Disks/DiskCacheWrapper.cpp @@ -91,7 +91,7 @@ DiskCacheWrapper::readFile( if (!cache_file_predicate(path)) return DiskDecorator::readFile(path, settings, size); - LOG_DEBUG(log, "Read file {} from cache", backQuote(path)); + LOG_TEST(log, "Read file {} from cache", backQuote(path)); if (cache_disk->exists(path)) return cache_disk->readFile(path, settings, size); @@ -105,11 +105,11 @@ DiskCacheWrapper::readFile( { /// This thread will responsible for file downloading to cache. metadata->status = DOWNLOADING; - LOG_DEBUG(log, "File {} doesn't exist in cache. Will download it", backQuote(path)); + LOG_TEST(log, "File {} doesn't exist in cache. Will download it", backQuote(path)); } else if (metadata->status == DOWNLOADING) { - LOG_DEBUG(log, "Waiting for file {} download to cache", backQuote(path)); + LOG_TEST(log, "Waiting for file {} download to cache", backQuote(path)); metadata->condition.wait(lock, [metadata] { return metadata->status == DOWNLOADED || metadata->status == ERROR; }); } } @@ -134,7 +134,7 @@ DiskCacheWrapper::readFile( } cache_disk->moveFile(tmp_path, path); - LOG_DEBUG(log, "File {} downloaded to cache", backQuote(path)); + LOG_TEST(log, "File {} downloaded to cache", backQuote(path)); } catch (...) { @@ -163,7 +163,7 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode if (!cache_file_predicate(path)) return DiskDecorator::writeFile(path, buf_size, mode); - LOG_DEBUG(log, "Write file {} to cache", backQuote(path)); + LOG_TRACE(log, "Write file {} to cache", backQuote(path)); auto dir_path = directoryPath(path); if (!cache_disk->exists(dir_path)) diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp index 4d4a438f93b..41c407c10ee 100644 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ b/src/Disks/HDFS/DiskHDFS.cpp @@ -75,7 +75,7 @@ std::unique_ptr DiskHDFS::readFile(const String & path, { auto metadata = readMeta(path); - LOG_TRACE(log, + LOG_TEST(log, "Read from file by path: {}. Existing HDFS objects: {}", backQuote(metadata_disk->getPath() + path), metadata.remote_fs_objects.size()); diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp index e920e6fd5b9..848726f957d 100644 --- a/src/Disks/IDiskRemote.cpp +++ b/src/Disks/IDiskRemote.cpp @@ -177,7 +177,7 @@ IDiskRemote::Metadata IDiskRemote::createMeta(const String & path) const void IDiskRemote::removeMeta(const String & path, RemoteFSPathKeeperPtr fs_paths_keeper) { - LOG_DEBUG(log, "Remove file by path: {}", backQuote(metadata_disk->getPath() + path)); + LOG_TRACE(log, "Remove file by path: {}", backQuote(metadata_disk->getPath() + path)); if (!metadata_disk->isFile(path)) throw Exception(ErrorCodes::CANNOT_DELETE_DIRECTORY, "Path '{}' is a directory", path); @@ -464,7 +464,7 @@ bool IDiskRemote::tryReserve(UInt64 bytes) std::lock_guard lock(reservation_mutex); if (bytes == 0) { - LOG_DEBUG(log, "Reserving 0 bytes on remote_fs disk {}", backQuote(name)); + LOG_TRACE(log, "Reserving 0 bytes on remote_fs disk {}", backQuote(name)); ++reservation_count; return true; } @@ -473,7 +473,7 @@ bool IDiskRemote::tryReserve(UInt64 bytes) UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); if (unreserved_space >= bytes) { - LOG_DEBUG(log, "Reserving {} on disk {}, having unreserved {}.", + LOG_TRACE(log, "Reserving {} on disk {}, having unreserved {}.", ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space)); ++reservation_count; reserved_bytes += bytes; diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index 0e2f75505fa..97d23d9e407 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -219,7 +219,7 @@ std::unique_ptr DiskS3::readFile(const String & path, co auto settings = current_settings.get(); auto metadata = readMeta(path); - LOG_TRACE(log, "Read from file by path: {}. Existing S3 objects: {}", + LOG_TEST(log, "Read from file by path: {}. Existing S3 objects: {}", backQuote(metadata_disk->getPath() + path), metadata.remote_fs_objects.size()); bool threadpool_read = read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 53d2067780e..30484b14021 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -235,12 +235,12 @@ std::unique_ptr ReadBufferFromS3::initialize() throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1); req.SetRange(fmt::format("bytes={}-{}", offset, read_until_position - 1)); - LOG_DEBUG(log, "Read S3 object. Bucket: {}, Key: {}, Range: {}-{}", bucket, key, offset, read_until_position - 1); + LOG_TEST(log, "Read S3 object. Bucket: {}, Key: {}, Range: {}-{}", bucket, key, offset, read_until_position - 1); } else { req.SetRange(fmt::format("bytes={}-", offset)); - LOG_DEBUG(log, "Read S3 object. Bucket: {}, Key: {}, Offset: {}", bucket, key, offset); + LOG_TEST(log, "Read S3 object. Bucket: {}, Key: {}, Offset: {}", bucket, key, offset); } Aws::S3::Model::GetObjectOutcome outcome = client_ptr->GetObject(req); diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 68bdbc9cf86..25b03d66097 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -119,7 +119,7 @@ void PocoHTTPClient::makeRequestInternal( Poco::Logger * log = &Poco::Logger::get("AWSClient"); auto uri = request.GetUri().GetURIString(); - LOG_DEBUG(log, "Make request to: {}", uri); + LOG_TEST(log, "Make request to: {}", uri); enum class S3MetricType { @@ -251,7 +251,7 @@ void PocoHTTPClient::makeRequestInternal( if (request.GetContentBody()) { - LOG_TRACE(log, "Writing request body."); + LOG_TEST(log, "Writing request body."); if (attempt > 0) /// rewind content body buffer. { @@ -259,24 +259,24 @@ void PocoHTTPClient::makeRequestInternal( request.GetContentBody()->seekg(0); } auto size = Poco::StreamCopier::copyStream(*request.GetContentBody(), request_body_stream); - LOG_DEBUG(log, "Written {} bytes to request body", size); + LOG_TEST(log, "Written {} bytes to request body", size); } - LOG_TRACE(log, "Receiving response..."); + LOG_TEST(log, "Receiving response..."); auto & response_body_stream = session->receiveResponse(poco_response); watch.stop(); ProfileEvents::increment(select_metric(S3MetricType::Microseconds), watch.elapsedMicroseconds()); int status_code = static_cast(poco_response.getStatus()); - LOG_DEBUG(log, "Response status: {}, {}", status_code, poco_response.getReason()); + LOG_TEST(log, "Response status: {}, {}", status_code, poco_response.getReason()); if (poco_response.getStatus() == Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT) { auto location = poco_response.get("location"); remote_host_filter.checkURL(Poco::URI(location)); uri = location; - LOG_DEBUG(log, "Redirecting request to new location: {}", location); + LOG_TEST(log, "Redirecting request to new location: {}", location); ProfileEvents::increment(select_metric(S3MetricType::Redirects)); @@ -292,7 +292,7 @@ void PocoHTTPClient::makeRequestInternal( response->AddHeader(header_name, header_value); headers_ss << header_name << ": " << header_value << "; "; } - LOG_DEBUG(log, "Received headers: {}", headers_ss.str()); + LOG_TEST(log, "Received headers: {}", headers_ss.str()); if (status_code == 429 || status_code == 503) { // API throttling diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 41b2b1f059a..432dc443300 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -51,8 +51,8 @@ const std::pair & convertLogLevel(Aws::U {Aws::Utils::Logging::LogLevel::Error, {DB::LogsLevel::error, Poco::Message::PRIO_ERROR}}, {Aws::Utils::Logging::LogLevel::Warn, {DB::LogsLevel::warning, Poco::Message::PRIO_WARNING}}, {Aws::Utils::Logging::LogLevel::Info, {DB::LogsLevel::information, Poco::Message::PRIO_INFORMATION}}, - {Aws::Utils::Logging::LogLevel::Debug, {DB::LogsLevel::debug, Poco::Message::PRIO_DEBUG}}, - {Aws::Utils::Logging::LogLevel::Trace, {DB::LogsLevel::trace, Poco::Message::PRIO_TRACE}}, + {Aws::Utils::Logging::LogLevel::Debug, {DB::LogsLevel::debug, Poco::Message::PRIO_TEST}}, + {Aws::Utils::Logging::LogLevel::Trace, {DB::LogsLevel::trace, Poco::Message::PRIO_TEST}}, }; return mapping.at(log_level); }