diff --git a/CHANGELOG.md b/CHANGELOG.md index babb5ebca8d..718aa751cc2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -### ClickHouse release v21.10, 2021-10-08 +### ClickHouse release v21.10, 2021-10-14 #### Backward Incompatible Change @@ -110,6 +110,7 @@ * Fix the issue that in case of some sophisticated query with column aliases identical to the names of expressions, bad cast may happen. This fixes [#25447](https://github.com/ClickHouse/ClickHouse/issues/25447). This fixes [#26914](https://github.com/ClickHouse/ClickHouse/issues/26914). This fix may introduce backward incompatibility: if there are different expressions with identical names, exception will be thrown. It may break some rare cases when `enable_optimize_predicate_expression` is set. [#26639](https://github.com/ClickHouse/ClickHouse/pull/26639) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Now, scalar subquery always returns `Nullable` result if it's type can be `Nullable`. It is needed because in case of empty subquery it's result should be `Null`. Previously, it was possible to get error about incompatible types (type deduction does not execute scalar subquery, and it could use not-nullable type). Scalar subquery with empty result which can't be converted to `Nullable` (like `Array` or `Tuple`) now throws error. Fixes [#25411](https://github.com/ClickHouse/ClickHouse/issues/25411). [#26423](https://github.com/ClickHouse/ClickHouse/pull/26423) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Introduce syntax for here documents. Example `SELECT $doc$ VALUE $doc$`. [#26671](https://github.com/ClickHouse/ClickHouse/pull/26671) ([Maksim Kita](https://github.com/kitaisreal)). This change is backward incompatible if in query there are identifiers that contain `$` [#28768](https://github.com/ClickHouse/ClickHouse/issues/28768). +* Now indices can handle Nullable types, including `isNull` and `isNotNull`. [#12433](https://github.com/ClickHouse/ClickHouse/pull/12433) and [#12455](https://github.com/ClickHouse/ClickHouse/pull/12455) ([Amos Bird](https://github.com/amosbird)) and [#27250](https://github.com/ClickHouse/ClickHouse/pull/27250) ([Azat Khuzhin](https://github.com/azat)). But this was done with on-disk format changes, and even though new server can read old data, old server cannot. Also, in case you have `MINMAX` data skipping indices, you may get `Data after mutation/merge is not byte-identical` error, since new index will have `.idx2` extension while before it was `.idx`. That said, that you should not delay updating all existing replicas, in this case, otherwise, if old replica (<21.9) will download data from new replica with 21.9+ it will not be able to apply index for downloaded part. #### New Feature @@ -179,7 +180,6 @@ * Add setting `log_formatted_queries` to log additional formatted query into `system.query_log`. It's useful for normalized query analysis because functions like `normalizeQuery` and `normalizeQueryKeepNames` don't parse/format queries in order to achieve better performance. [#27380](https://github.com/ClickHouse/ClickHouse/pull/27380) ([Amos Bird](https://github.com/amosbird)). * Add two settings `max_hyperscan_regexp_length` and `max_hyperscan_regexp_total_length` to prevent huge regexp being used in hyperscan related functions, such as `multiMatchAny`. [#27378](https://github.com/ClickHouse/ClickHouse/pull/27378) ([Amos Bird](https://github.com/amosbird)). * Memory consumed by bitmap aggregate functions now is taken into account for memory limits. This closes [#26555](https://github.com/ClickHouse/ClickHouse/issues/26555). [#27252](https://github.com/ClickHouse/ClickHouse/pull/27252) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add new index data skipping minmax index format for proper Nullable support. [#27250](https://github.com/ClickHouse/ClickHouse/pull/27250) ([Azat Khuzhin](https://github.com/azat)). * Add 10 seconds cache for S3 proxy resolver. [#27216](https://github.com/ClickHouse/ClickHouse/pull/27216) ([ianton-ru](https://github.com/ianton-ru)). * Split global mutex into individual regexp construction. This helps avoid huge regexp construction blocking other related threads. [#27211](https://github.com/ClickHouse/ClickHouse/pull/27211) ([Amos Bird](https://github.com/amosbird)). * Support schema for PostgreSQL database engine. Closes [#27166](https://github.com/ClickHouse/ClickHouse/issues/27166). [#27198](https://github.com/ClickHouse/ClickHouse/pull/27198) ([Kseniia Sumarokova](https://github.com/kssenii)). @@ -234,7 +234,6 @@ * Fix multiple block insertion into distributed table with `insert_distributed_one_random_shard = 1`. This is a marginal feature. Mark as improvement. [#23140](https://github.com/ClickHouse/ClickHouse/pull/23140) ([Amos Bird](https://github.com/amosbird)). * Support `LowCardinality` and `FixedString` keys/values for `Map` type. [#21543](https://github.com/ClickHouse/ClickHouse/pull/21543) ([hexiaoting](https://github.com/hexiaoting)). * Enable reloading of local disk config. [#19526](https://github.com/ClickHouse/ClickHouse/pull/19526) ([taiyang-li](https://github.com/taiyang-li)). -* Now KeyConditions can correctly skip nullable keys, including `isNull` and `isNotNull`. https://github.com/ClickHouse/ClickHouse/pull/12433. [#12455](https://github.com/ClickHouse/ClickHouse/pull/12455) ([Amos Bird](https://github.com/amosbird)). #### Bug Fix diff --git a/CMakeLists.txt b/CMakeLists.txt index 66df4d3124a..685b2c25a0d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -336,6 +336,10 @@ if (COMPILER_GCC OR COMPILER_CLANG) set(COMPILER_FLAGS "${COMPILER_FLAGS} -falign-functions=32") endif () +if (COMPILER_GCC) + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcoroutines") +endif () + # Compiler-specific coverage flags e.g. -fcoverage-mapping for gcc option(WITH_COVERAGE "Profile the resulting binary/binaries" OFF) diff --git a/contrib/cctz b/contrib/cctz index c0f1bcb97fd..9edd0861d83 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit c0f1bcb97fd2782f7c3f972fadd5aad5affac4b8 +Subproject commit 9edd0861d8328b2ae77e8fb5f4d7dcd1cf33b42b diff --git a/contrib/libcxx b/contrib/libcxx index 2fa892f69ac..61e60294b1d 160000 --- a/contrib/libcxx +++ b/contrib/libcxx @@ -1 +1 @@ -Subproject commit 2fa892f69acbaa40f8a18c6484854a6183a34482 +Subproject commit 61e60294b1de01483caa9f5d00f437c99b674de6 diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 798910fb952..f50c65bb9f2 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -67,7 +67,7 @@ RUN apt-get update \ unixodbc \ --yes --no-install-recommends -RUN pip3 install numpy scipy pandas Jinja2 +RUN pip3 install numpy scipy pandas Jinja2 pandas clickhouse_driver # This symlink required by gcc to find lld compiler RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index 6444e745c47..13353bc2960 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -27,7 +27,7 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -RUN pip3 install Jinja2 +RUN pip3 install Jinja2 pandas clickhouse_driver COPY * / diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 3c3fcd42fde..15590902b68 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -125,25 +125,9 @@ function fuzz # interferes with gdb export CLICKHOUSE_WATCHDOG_ENABLE=0 - # NOTE: that $! cannot be used to obtain the server pid, since it will be - # the pid of the bash, due to piping the output of clickhouse-server to - # tail - PID_FILE=clickhouse-server.pid - clickhouse-server --pidfile=$PID_FILE --config-file db/config.xml -- --path db 2>&1 | tail -100000 > server.log & - - server_pid=-1 - for _ in {1..60}; do - if [ -s $PID_FILE ]; then - server_pid=$(cat $PID_FILE) - break - fi - sleep 1 - done - - if [ $server_pid = -1 ]; then - echo "Server did not started" >&2 - exit 1 - fi + # NOTE: we use process substitution here to preserve keep $! as a pid of clickhouse-server + clickhouse-server --config-file db/config.xml -- --path db > >(tail -100000 > server.log) 2>&1 & + server_pid=$! kill -0 $server_pid diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 7de8c061673..a5733d11dd2 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -34,7 +34,7 @@ RUN apt-get update -y \ postgresql-client \ sqlite3 -RUN pip3 install numpy scipy pandas Jinja2 +RUN pip3 install numpy scipy pandas Jinja2 clickhouse_driver RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \ diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 33cdb9db57a..64cc0c9c7b7 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -10,7 +10,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ python3-pip \ pylint \ yamllint \ - && pip3 install codespell + && pip3 install codespell pandas clickhouse_driver COPY run.sh / COPY process_style_check_result.py / diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 72ebe33292f..20bf9a10986 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -102,6 +102,7 @@ toc_title: Adopters | Raiffeisenbank | Banking | Analytics | — | — | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html) | | Rambler | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | | Retell | Speech synthesis | Analytics | — | — | [Blog Article, August 2020](https://vc.ru/services/153732-kak-sozdat-audiostati-na-vashem-sayte-i-zachem-eto-nuzhno) | +| Rollbar | Software Development | Main Product | — | — | [Official Website](https://www.rollbar.com) | | Rspamd | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html) | | RuSIEM | SIEM | Main Product | — | — | [Official Website](https://rusiem.com/en/products/architecture) | | S7 Airlines | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index de7a1835038..f78fbc8a2bc 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3749,3 +3749,38 @@ Exception: Total regexp lengths too large. **See Also** - [max_hyperscan_regexp_length](#max-hyperscan-regexp-length) + +## enable_positional_arguments {#enable-positional-arguments} + +Enables or disables supporting positional arguments for [GROUP BY](../../sql-reference/statements/select/group-by.md), [LIMIT BY](../../sql-reference/statements/select/limit-by.md), [ORDER BY](../../sql-reference/statements/select/order-by.md) statements. When you want to use column numbers instead of column names in these clauses, set `enable_positional_arguments = 1`. + +Possible values: + +- 0 — Positional arguments aren't supported. +- 1 — Positional arguments are supported: column numbers can use instead of column names. + +Default value: `0`. + +**Example** + +Query: + +```sql +CREATE TABLE positional_arguments(one Int, two Int, three Int) ENGINE=Memory(); + +INSERT INTO positional_arguments VALUES (10, 20, 30), (20, 20, 10), (30, 10, 20); + +SET enable_positional_arguments = 1; + +SELECT * FROM positional_arguments ORDER BY 2,3; +``` + +Result: + +```text +┌─one─┬─two─┬─three─┐ +│ 30 │ 10 │ 20 │ +│ 20 │ 20 │ 10 │ +│ 10 │ 20 │ 30 │ +└─────┴─────┴───────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/statements/select/group-by.md b/docs/en/sql-reference/statements/select/group-by.md index 7c2d3a20f43..26dd51d806d 100644 --- a/docs/en/sql-reference/statements/select/group-by.md +++ b/docs/en/sql-reference/statements/select/group-by.md @@ -10,6 +10,8 @@ toc_title: GROUP BY - All the expressions in the [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having.md), and [ORDER BY](../../../sql-reference/statements/select/order-by.md) clauses **must** be calculated based on key expressions **or** on [aggregate functions](../../../sql-reference/aggregate-functions/index.md) over non-key expressions (including plain columns). In other words, each column selected from the table must be used either in a key expression or inside an aggregate function, but not both. - Result of aggregating `SELECT` query will contain as many rows as there were unique values of “grouping key” in source table. Usually this signficantly reduces the row count, often by orders of magnitude, but not necessarily: row count stays the same if all “grouping key” values were distinct. +When you want to group data in the table by column numbers instead of column names, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments). + !!! note "Note" There’s an additional way to run aggregation over a table. If a query contains table columns only inside aggregate functions, the `GROUP BY clause` can be omitted, and aggregation by an empty set of keys is assumed. Such queries always return exactly one row. diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index b3cc7555d91..0427764475a 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -144,7 +144,7 @@ Extreme values are calculated for rows before `LIMIT`, but after `LIMIT BY`. How You can use synonyms (`AS` aliases) in any part of a query. -The `GROUP BY` and `ORDER BY` clauses do not support positional arguments. This contradicts MySQL, but conforms to standard SQL. For example, `GROUP BY 1, 2` will be interpreted as grouping by constants (i.e. aggregation of all rows into one). +The `GROUP BY`, `ORDER BY`, and `LIMIT BY` clauses can support positional arguments. To enable this, switch on the [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments) setting. Then, for example, `ORDER BY 1,2` will be sorting rows in the table on the first and then the second column. ## Implementation Details {#implementation-details} diff --git a/docs/en/sql-reference/statements/select/limit-by.md b/docs/en/sql-reference/statements/select/limit-by.md index 34645b68b03..e1ca58cdec8 100644 --- a/docs/en/sql-reference/statements/select/limit-by.md +++ b/docs/en/sql-reference/statements/select/limit-by.md @@ -16,6 +16,9 @@ During query processing, ClickHouse selects data ordered by sorting key. The sor !!! note "Note" `LIMIT BY` is not related to [LIMIT](../../../sql-reference/statements/select/limit.md). They can both be used in the same query. +If you want to use column numbers instead of column names in the `LIMIT BY` clause, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments). + + ## Examples {#examples} Sample table: diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index 030f04d5e83..ee6893812cc 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -4,7 +4,9 @@ toc_title: ORDER BY # ORDER BY Clause {#select-order-by} -The `ORDER BY` clause contains a list of expressions, which can each be attributed with `DESC` (descending) or `ASC` (ascending) modifier which determine the sorting direction. If the direction is not specified, `ASC` is assumed, so it’s usually omitted. The sorting direction applies to a single expression, not to the entire list. Example: `ORDER BY Visits DESC, SearchPhrase` +The `ORDER BY` clause contains a list of expressions, which can each be attributed with `DESC` (descending) or `ASC` (ascending) modifier which determine the sorting direction. If the direction is not specified, `ASC` is assumed, so it’s usually omitted. The sorting direction applies to a single expression, not to the entire list. Example: `ORDER BY Visits DESC, SearchPhrase`. + +If you want to sort by column numbers instead of column names, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments). Rows that have identical values for the list of sorting expressions are output in an arbitrary order, which can also be non-deterministic (different each time). If the ORDER BY clause is omitted, the order of the rows is also undefined, and may be non-deterministic as well. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index e639c0a0df2..500485aea2f 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -3538,3 +3538,38 @@ Exception: Total regexp lengths too large. **См. также** - [max_hyperscan_regexp_length](#max-hyperscan-regexp-length) + +## enable_positional_arguments {#enable-positional-arguments} + +Включает и отключает поддержку позиционных аргументов для [GROUP BY](../../sql-reference/statements/select/group-by.md), [LIMIT BY](../../sql-reference/statements/select/limit-by.md), [ORDER BY](../../sql-reference/statements/select/order-by.md). Если вы хотите использовать номера столбцов вместо названий в выражениях этих операторов, установите `enable_positional_arguments = 1`. + +Возможные значения: + +- 0 — Позиционные аргументы не поддерживаются. +- 1 — Позиционные аргументы поддерживаются: можно использовать номера столбцов вместо названий столбцов. + +Значение по умолчанию: `0`. + +**Пример** + +Запрос: + +```sql +CREATE TABLE positional_arguments(one Int, two Int, three Int) ENGINE=Memory(); + +INSERT INTO positional_arguments VALUES (10, 20, 30), (20, 20, 10), (30, 10, 20); + +SET enable_positional_arguments = 1; + +SELECT * FROM positional_arguments ORDER BY 2,3; +``` + +Результат: + +```text +┌─one─┬─two─┬─three─┐ +│ 30 │ 10 │ 20 │ +│ 20 │ 20 │ 10 │ +│ 10 │ 20 │ 30 │ +└─────┴─────┴───────┘ +``` \ No newline at end of file diff --git a/docs/ru/sql-reference/statements/select/group-by.md b/docs/ru/sql-reference/statements/select/group-by.md index 2f0cabd14fb..8bc1b765ad3 100644 --- a/docs/ru/sql-reference/statements/select/group-by.md +++ b/docs/ru/sql-reference/statements/select/group-by.md @@ -10,6 +10,8 @@ toc_title: GROUP BY - Все выражения в секциях [SELECT](index.md), [HAVING](having.md), и [ORDER BY](order-by.md) статьи **должны** быть вычисленными на основе ключевых выражений **или** на [агрегатных функций](../../../sql-reference/aggregate-functions/index.md) над неключевыми выражениями (включая столбцы). Другими словами, каждый столбец, выбранный из таблицы, должен использоваться либо в ключевом выражении, либо внутри агрегатной функции, но не в обоих. - В результате агрегирования `SELECT` запрос будет содержать столько строк, сколько было уникальных значений ключа группировки в исходной таблице. Обычно агрегация значительно уменьшает количество строк, часто на порядки, но не обязательно: количество строк остается неизменным, если все исходные значения ключа группировки ценности были различны. +Если вы хотите для группировки данных в таблице указывать номера столбцов, а не названия, включите настройку [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments). + !!! note "Примечание" Есть ещё один способ запустить агрегацию по таблице. Если запрос содержит столбцы исходной таблицы только внутри агрегатных функций, то `GROUP BY` секцию можно опустить, и предполагается агрегирование по пустому набору ключей. Такие запросы всегда возвращают ровно одну строку. diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md index c2820bc7be4..ffaae74b1d9 100644 --- a/docs/ru/sql-reference/statements/select/index.md +++ b/docs/ru/sql-reference/statements/select/index.md @@ -140,8 +140,7 @@ Code: 42. DB::Exception: Received from localhost:9000. DB::Exception: Number of Вы можете использовать синонимы (алиасы `AS`) в любом месте запроса. -В секциях `GROUP BY`, `ORDER BY`, в отличие от диалекта MySQL, и в соответствии со стандартным SQL, не поддерживаются позиционные аргументы. -Например, если вы напишите `GROUP BY 1, 2` - то это будет воспринято, как группировка по константам (то есть, агрегация всех строк в одну). +В секциях `GROUP BY`, `ORDER BY` и `LIMIT BY` можно использовать не названия столбцов, а номера. Для этого нужно включить настройку [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments). Тогда, например, в запросе с `ORDER BY 1,2` будет выполнена сортировка сначала по первому, а затем по второму столбцу. ## Детали реализации {#implementation-details} diff --git a/docs/ru/sql-reference/statements/select/limit-by.md b/docs/ru/sql-reference/statements/select/limit-by.md index fba81c023b5..861d88dcafb 100644 --- a/docs/ru/sql-reference/statements/select/limit-by.md +++ b/docs/ru/sql-reference/statements/select/limit-by.md @@ -15,6 +15,8 @@ ClickHouse поддерживает следующий синтаксис: `LIMIT BY` не связана с секцией `LIMIT`. Их можно использовать в одном запросе. +Если вы хотите использовать в секции `LIMIT BY` номера столбцов вместо названий, включите настройку [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments). + ## Примеры Образец таблицы: diff --git a/docs/ru/sql-reference/statements/select/order-by.md b/docs/ru/sql-reference/statements/select/order-by.md index d7d2e9c7574..190a46dacc9 100644 --- a/docs/ru/sql-reference/statements/select/order-by.md +++ b/docs/ru/sql-reference/statements/select/order-by.md @@ -4,7 +4,9 @@ toc_title: ORDER BY # Секция ORDER BY {#select-order-by} -Секция `ORDER BY` содержит список выражений, к каждому из которых также может быть приписано `DESC` или `ASC` (направление сортировки). Если ничего не приписано - это аналогично приписыванию `ASC`. `ASC` - сортировка по возрастанию, `DESC` - сортировка по убыванию. Обозначение направления сортировки действует на одно выражение, а не на весь список. Пример: `ORDER BY Visits DESC, SearchPhrase` +Секция `ORDER BY` содержит список выражений, к каждому из которых также может быть приписано `DESC` или `ASC` (направление сортировки). Если ничего не приписано - это аналогично приписыванию `ASC`. `ASC` - сортировка по возрастанию, `DESC` - сортировка по убыванию. Обозначение направления сортировки действует на одно выражение, а не на весь список. Пример: `ORDER BY Visits DESC, SearchPhrase`. + +Если вы хотите для сортировки данных указывать номера столбцов, а не названия, включите настройку [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments). Строки, для которых список выражений, по которым производится сортировка, принимает одинаковые значения, выводятся в произвольном порядке, который может быть также недетерминированным (каждый раз разным). Если секция ORDER BY отсутствует, то, аналогично, порядок, в котором идут строки, не определён, и может быть недетерминированным. diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index cd5d72cfba4..4ed5b114082 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1159,7 +1159,6 @@ if (ThreadFuzzer::instance().isEffective()) UInt64 total_memory_profiler_step = config().getUInt64("total_memory_profiler_step", 0); if (total_memory_profiler_step) { - total_memory_tracker.setOrRaiseProfilerLimit(total_memory_profiler_step); total_memory_tracker.setProfilerStep(total_memory_profiler_step); } diff --git a/programs/server/config.xml b/programs/server/config.xml index ba4c680d765..e38a6daeaed 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -62,6 +62,27 @@ --> + + + + diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index cde5a5f9977..b68df11fd60 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -426,10 +426,8 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) catch (Exception & e) { if (!is_interactive) - { e.addMessage("(in query: {})", full_query); - throw; - } + throw; } if (have_error) diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index aa9f89e47b5..ec267e44a99 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -144,7 +144,7 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - fprintf(stderr, "erased\n"); + std::cerr << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -153,12 +153,12 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - fprintf(stderr, "inserted (pos %zd)\n", pos); + std::cerr << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - fprintf(stderr, "inserted (0)\n"); + std::cerr << "inserted (0)\n"; } } @@ -278,7 +278,7 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast) } else { - fprintf(stderr, "no random col!\n"); + std::cerr << "No random column.\n"; } } @@ -312,13 +312,9 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast) : impl->children.begin() + fuzz_rand() % impl->children.size(); auto col = getRandomColumnLike(); if (col) - { impl->children.insert(pos, col); - } else - { - fprintf(stderr, "no random col!\n"); - } + std::cerr << "No random column.\n"; } // We don't have to recurse here to fuzz the children, this is handled by diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index ce39ab0994c..733ecaa979a 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -248,31 +248,23 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast(filt_pos)), zero16)); mask = ~mask; - if (0 == mask) - { - /// Nothing is inserted. - data_pos += chars_per_simd_elements; - } - else if (0xFFFF == mask) + if (0xFFFF == mask) { res->chars.insert(data_pos, data_pos + chars_per_simd_elements); - data_pos += chars_per_simd_elements; } else { size_t res_chars_size = res->chars.size(); - for (size_t i = 0; i < SIMD_BYTES; ++i) + while (mask) { - if (filt_pos[i]) - { - res->chars.resize(res_chars_size + n); - memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos, n); - res_chars_size += n; - } - data_pos += n; + size_t index = __builtin_ctz(mask); + res->chars.resize(res_chars_size + n); + memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos + index * n, n); + res_chars_size += n; + mask = mask & (mask - 1); } } - + data_pos += chars_per_simd_elements; filt_pos += SIMD_BYTES; } #endif diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 7f3cdaeec7f..a769cd93037 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -327,19 +327,18 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast(filt_pos)), zero16)); mask = ~mask; - if (0 == mask) - { - /// Nothing is inserted. - } - else if (0xFFFF == mask) + if (0xFFFF == mask) { res_data.insert(data_pos, data_pos + SIMD_BYTES); } else { - for (size_t i = 0; i < SIMD_BYTES; ++i) - if (filt_pos[i]) - res_data.push_back(data_pos[i]); + while (mask) + { + size_t index = __builtin_ctz(mask); + res_data.push_back(data_pos[index]); + mask = mask & (mask - 1); + } } filt_pos += SIMD_BYTES; diff --git a/src/Columns/ColumnsCommon.cpp b/src/Columns/ColumnsCommon.cpp index 41933ed08ed..a4d7de34382 100644 --- a/src/Columns/ColumnsCommon.cpp +++ b/src/Columns/ColumnsCommon.cpp @@ -241,11 +241,7 @@ namespace zero_vec)); mask = ~mask; - if (mask == 0) - { - /// SIMD_BYTES consecutive rows do not pass the filter - } - else if (mask == 0xffff) + if (mask == 0xffff) { /// SIMD_BYTES consecutive rows pass the filter const auto first = offsets_pos == offsets_begin; @@ -262,9 +258,12 @@ namespace } else { - for (size_t i = 0; i < SIMD_BYTES; ++i) - if (filt_pos[i]) - copy_array(offsets_pos + i); + while (mask) + { + size_t index = __builtin_ctz(mask); + copy_array(offsets_pos + index); + mask = mask & (mask - 1); + } } filt_pos += SIMD_BYTES; diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 205771a5f6c..013005442be 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -200,11 +200,13 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded) } + bool allocation_traced = false; if (unlikely(current_profiler_limit && will_be > current_profiler_limit)) { BlockerInThread untrack_lock(VariableContext::Global); DB::TraceCollector::collect(DB::TraceType::Memory, StackTrace(), size); setOrRaiseProfilerLimit((will_be + profiler_step - 1) / profiler_step * profiler_step); + allocation_traced = true; } std::bernoulli_distribution sample(sample_probability); @@ -212,6 +214,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded) { BlockerInThread untrack_lock(VariableContext::Global); DB::TraceCollector::collect(DB::TraceType::MemorySample, StackTrace(), size); + allocation_traced = true; } if (unlikely(current_hard_limit && will_be > current_hard_limit) && memoryTrackerCanThrow(level, false) && throw_if_memory_exceeded) @@ -230,17 +233,24 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded) formatReadableSizeWithBinarySuffix(current_hard_limit)); } + bool peak_updated; if (throw_if_memory_exceeded) { /// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc BlockerInThread untrack_lock(VariableContext::Global); bool log_memory_usage = true; - updatePeak(will_be, log_memory_usage); + peak_updated = updatePeak(will_be, log_memory_usage); } else { bool log_memory_usage = false; - updatePeak(will_be, log_memory_usage); + peak_updated = updatePeak(will_be, log_memory_usage); + } + + if (peak_updated && allocation_traced) + { + BlockerInThread untrack_lock(VariableContext::Global); + DB::TraceCollector::collect(DB::TraceType::MemoryPeak, StackTrace(), will_be); } if (auto * loaded_next = parent.load(std::memory_order_relaxed)) @@ -259,7 +269,7 @@ void MemoryTracker::allocNoThrow(Int64 size) allocImpl(size, throw_if_memory_exceeded); } -void MemoryTracker::updatePeak(Int64 will_be, bool log_memory_usage) +bool MemoryTracker::updatePeak(Int64 will_be, bool log_memory_usage) { auto peak_old = peak.load(std::memory_order_relaxed); if (will_be > peak_old) /// Races doesn't matter. Could rewrite with CAS, but not worth. @@ -269,7 +279,10 @@ void MemoryTracker::updatePeak(Int64 will_be, bool log_memory_usage) if (log_memory_usage && (level == VariableContext::Process || level == VariableContext::Global) && will_be / log_peak_memory_usage_every > peak_old / log_peak_memory_usage_every) logMemoryUsage(will_be); + + return true; } + return false; } diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index 36560fec334..b860c611be2 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -58,9 +58,11 @@ private: /// This description will be used as prefix into log messages (if isn't nullptr) std::atomic description_ptr = nullptr; - void updatePeak(Int64 will_be, bool log_memory_usage); + bool updatePeak(Int64 will_be, bool log_memory_usage); void logMemoryUsage(Int64 current) const; + void setOrRaiseProfilerLimit(Int64 value); + public: explicit MemoryTracker(VariableContext level_ = VariableContext::Thread); explicit MemoryTracker(MemoryTracker * parent_, VariableContext level_ = VariableContext::Thread); @@ -106,7 +108,6 @@ public: * Otherwise, set limit to new value, if new value is greater than previous limit. */ void setOrRaiseHardLimit(Int64 value); - void setOrRaiseProfilerLimit(Int64 value); void setFaultProbability(double value) { @@ -121,6 +122,7 @@ public: void setProfilerStep(Int64 value) { profiler_step = value; + setOrRaiseProfilerLimit(value); } /// next should be changed only once: from nullptr to some value. diff --git a/src/Common/TraceCollector.h b/src/Common/TraceCollector.h index 86e9d659d0a..d3bbc74726e 100644 --- a/src/Common/TraceCollector.h +++ b/src/Common/TraceCollector.h @@ -20,7 +20,8 @@ enum class TraceType : uint8_t Real, CPU, Memory, - MemorySample + MemorySample, + MemoryPeak, }; class TraceCollector diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index d79a94169b2..cf607a3d70e 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -289,7 +289,7 @@ ZooKeeper::~ZooKeeper() { try { - finalize(false, false); + finalize(false, false, "destructor called"); if (send_thread.joinable()) send_thread.join(); @@ -299,7 +299,7 @@ ZooKeeper::~ZooKeeper() } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log); } } @@ -317,6 +317,7 @@ ZooKeeper::ZooKeeper( session_timeout(session_timeout_), operation_timeout(std::min(operation_timeout_, session_timeout_)) { + log = &Poco::Logger::get("ZooKeeperClient"); std::atomic_store(&zk_log, std::move(zk_log_)); if (!root_path.empty()) @@ -450,6 +451,10 @@ void ZooKeeper::connect( message << fail_reasons.str() << "\n"; throw Exception(message.str(), Error::ZCONNECTIONLOSS); } + else + { + LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}", socket.peerAddress().toString(), session_id); + } } @@ -604,8 +609,8 @@ void ZooKeeper::sendThread() } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); - finalize(true, false); + tryLogCurrentException(log); + finalize(true, false, "exception in sendThread"); } } @@ -663,8 +668,8 @@ void ZooKeeper::receiveThread() } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); - finalize(false, true); + tryLogCurrentException(log); + finalize(false, true, "exception in receiveThread"); } } @@ -799,7 +804,7 @@ void ZooKeeper::receiveEvent() } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log); /// Unrecoverable. Don't leave incorrect state in memory. if (!response) @@ -819,7 +824,7 @@ void ZooKeeper::receiveEvent() catch (...) { /// Throw initial exception, not exception from callback. - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log); } throw; @@ -832,10 +837,15 @@ void ZooKeeper::receiveEvent() } -void ZooKeeper::finalize(bool error_send, bool error_receive) +void ZooKeeper::finalize(bool error_send, bool error_receive, const String & reason) { /// If some thread (send/receive) already finalizing session don't try to do it - if (finalization_started.exchange(true)) + bool already_started = finalization_started.exchange(true); + + LOG_TEST(log, "Finalizing session {}: finalization_started={}, queue_closed={}, reason={}", + session_id, already_started, requests_queue.isClosed(), reason); + + if (already_started) return; auto expire_session_if_not_expired = [&] @@ -860,7 +870,7 @@ void ZooKeeper::finalize(bool error_send, bool error_receive) /// This happens for example, when "Cannot push request to queue within operation timeout". /// Just mark session expired in case of error on close request, otherwise sendThread may not stop. expire_session_if_not_expired(); - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log); } /// Send thread will exit after sending close request or on expired flag @@ -879,7 +889,7 @@ void ZooKeeper::finalize(bool error_send, bool error_receive) catch (...) { /// We must continue to execute all callbacks, because the user is waiting for them. - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log); } if (!error_receive && receive_thread.joinable()) @@ -908,7 +918,7 @@ void ZooKeeper::finalize(bool error_send, bool error_receive) catch (...) { /// We must continue to all other callbacks, because the user is waiting for them. - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log); } } } @@ -939,7 +949,7 @@ void ZooKeeper::finalize(bool error_send, bool error_receive) } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log); } } } @@ -967,7 +977,7 @@ void ZooKeeper::finalize(bool error_send, bool error_receive) } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log); } } } @@ -983,14 +993,14 @@ void ZooKeeper::finalize(bool error_send, bool error_receive) } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log); } } } } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log); } } @@ -1028,7 +1038,7 @@ void ZooKeeper::pushRequest(RequestInfo && info) } catch (...) { - finalize(false, false); + finalize(false, false, getCurrentExceptionMessage(false, false, false)); throw; } diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index ce37ca7b650..53908e5b0c7 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -187,7 +187,7 @@ public: /// it will do read in another session, that read may not see the /// already performed write. - void finalize() override { finalize(false, false); } + void finalize() override { finalize(false, false, "unknown"); } void setZooKeeperLog(std::shared_ptr zk_log_); @@ -240,6 +240,8 @@ private: ThreadFromGlobalPool send_thread; ThreadFromGlobalPool receive_thread; + Poco::Logger * log; + void connect( const Nodes & node, Poco::Timespan connection_timeout); @@ -257,7 +259,7 @@ private: void close(); /// Call all remaining callbacks and watches, passing errors to them. - void finalize(bool error_send, bool error_receive); + void finalize(bool error_send, bool error_receive, const String & reason); template void write(const T &); diff --git a/src/Core/examples/CMakeLists.txt b/src/Core/examples/CMakeLists.txt index 6b07dfbbfa6..c8846eb1743 100644 --- a/src/Core/examples/CMakeLists.txt +++ b/src/Core/examples/CMakeLists.txt @@ -13,3 +13,6 @@ target_link_libraries (mysql_protocol PRIVATE dbms) if(USE_SSL) target_include_directories (mysql_protocol SYSTEM PRIVATE ${OPENSSL_INCLUDE_DIR}) endif() + +add_executable (coro coro.cpp) +target_link_libraries (coro PRIVATE clickhouse_common_io) diff --git a/src/Core/examples/coro.cpp b/src/Core/examples/coro.cpp new file mode 100644 index 00000000000..0f152d8090a --- /dev/null +++ b/src/Core/examples/coro.cpp @@ -0,0 +1,189 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#if defined(__clang__) +#include + +namespace std +{ + using namespace experimental::coroutines_v1; +} + +#else +#include +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant" +#endif + + +template +struct suspend_value // NOLINT(readability-identifier-naming) +{ + constexpr bool await_ready() const noexcept { return true; } // NOLINT(readability-identifier-naming) + constexpr void await_suspend(std::coroutine_handle<>) const noexcept {} // NOLINT(readability-identifier-naming) + constexpr T await_resume() const noexcept // NOLINT(readability-identifier-naming) + { + std::cout << " ret " << val << std::endl; + return val; + } + + T val; +}; + +template +struct Task +{ + struct promise_type // NOLINT(readability-identifier-naming) + { + using coro_handle = std::coroutine_handle; + auto get_return_object() { return coro_handle::from_promise(*this); } // NOLINT(readability-identifier-naming) + auto initial_suspend() { return std::suspend_never(); } // NOLINT(readability-identifier-naming) + auto final_suspend() noexcept { return suspend_value{*r->value}; } // NOLINT(readability-identifier-naming) + //void return_void() {} + void return_value(T value_) { r->value = value_; } // NOLINT(readability-identifier-naming) + void unhandled_exception() // NOLINT(readability-identifier-naming) + { + DB::tryLogCurrentException("Logger"); + r->exception = std::current_exception(); // NOLINT(bugprone-throw-keyword-missing) + } + + explicit promise_type(std::string tag_) : tag(tag_) {} + ~promise_type() { std::cout << "~promise_type " << tag << std::endl; } + std::string tag; + coro_handle next; + Task * r = nullptr; + }; + + using coro_handle = std::coroutine_handle; + + bool await_ready() const noexcept { return false; } // NOLINT(readability-identifier-naming) + void await_suspend(coro_handle g) noexcept // NOLINT(readability-identifier-naming) + { + std::cout << " await_suspend " << my.promise().tag << std::endl; + std::cout << " g tag " << g.promise().tag << std::endl; + g.promise().next = my; + } + T await_resume() noexcept // NOLINT(readability-identifier-naming) + { + std::cout << " await_res " << my.promise().tag << std::endl; + return *value; + } + + Task(coro_handle handle) : my(handle), tag(handle.promise().tag) // NOLINT(google-explicit-constructor) + { + assert(handle); + my.promise().r = this; + std::cout << " Task " << tag << std::endl; + } + Task(Task &) = delete; + Task(Task &&rhs) : my(rhs.my), tag(rhs.tag) + { + rhs.my = {}; + std::cout << " Task&& " << tag << std::endl; + } + static bool resumeImpl(Task *r) + { + if (r->value) + return false; + + auto & next = r->my.promise().next; + + if (next) + { + if (resumeImpl(next.promise().r)) + return true; + next = {}; + } + + if (!r->value) + { + r->my.resume(); + if (r->exception) + std::rethrow_exception(r->exception); + } + return !r->value; + } + + bool resume() + { + return resumeImpl(this); + } + + T res() + { + return *value; + } + + ~Task() + { + std::cout << " ~Task " << tag << std::endl; + } + +private: + coro_handle my; + std::string tag; + std::optional value; + std::exception_ptr exception; +}; + +Task boo([[maybe_unused]] std::string tag) +{ + std::cout << "x" << std::endl; + co_await std::suspend_always(); + std::cout << StackTrace().toString(); + std::cout << "y" << std::endl; + co_return 1; +} + +Task bar([[maybe_unused]] std::string tag) +{ + std::cout << "a" << std::endl; + int res1 = co_await boo("boo1"); + std::cout << "b " << res1 << std::endl; + int res2 = co_await boo("boo2"); + if (res2 == 1) + throw DB::Exception(1, "hello"); + std::cout << "c " << res2 << std::endl; + co_return res1 + res2; // 1 + 1 = 2 +} + +Task foo([[maybe_unused]] std::string tag) +{ + std::cout << "Hello" << std::endl; + auto res1 = co_await bar("bar1"); + std::cout << "Coro " << res1 << std::endl; + auto res2 = co_await bar("bar2"); + std::cout << "World " << res2 << std::endl; + co_return res1 * res2; // 2 * 2 = 4 +} + +int main() +{ + Poco::AutoPtr app_channel(new Poco::ConsoleChannel(std::cerr)); + Poco::Logger::root().setChannel(app_channel); + Poco::Logger::root().setLevel("trace"); + + LOG_INFO(&Poco::Logger::get(""), "Starting"); + + try + { + auto t = foo("foo"); + std::cout << ".. started" << std::endl; + while (t.resume()) + std::cout << ".. yielded" << std::endl; + std::cout << ".. done: " << t.res() << std::endl; + } + catch (DB::Exception & e) + { + std::cout << "Got exception " << e.what() << std::endl; + std::cout << e.getStackTraceString() << std::endl; + } +} diff --git a/src/DataStreams/ColumnGathererStream.cpp b/src/DataStreams/ColumnGathererStream.cpp index 9018870f3a4..9b2fac79bb0 100644 --- a/src/DataStreams/ColumnGathererStream.cpp +++ b/src/DataStreams/ColumnGathererStream.cpp @@ -11,104 +11,157 @@ namespace DB namespace ErrorCodes { - extern const int INCOMPATIBLE_COLUMNS; extern const int INCORRECT_NUMBER_OF_COLUMNS; extern const int EMPTY_DATA_PASSED; extern const int RECEIVED_EMPTY_DATA; } ColumnGathererStream::ColumnGathererStream( - const String & column_name_, const BlockInputStreams & source_streams, ReadBuffer & row_sources_buf_, - size_t block_preferred_size_) - : column_name(column_name_), sources(source_streams.size()), row_sources_buf(row_sources_buf_) - , block_preferred_size(block_preferred_size_), log(&Poco::Logger::get("ColumnGathererStream")) + size_t num_inputs, ReadBuffer & row_sources_buf_, size_t block_preferred_size_) + : sources(num_inputs), row_sources_buf(row_sources_buf_) + , block_preferred_size(block_preferred_size_) { - if (source_streams.empty()) + if (num_inputs == 0) throw Exception("There are no streams to gather", ErrorCodes::EMPTY_DATA_PASSED); +} - children.assign(source_streams.begin(), source_streams.end()); - - for (size_t i = 0; i < children.size(); ++i) +void ColumnGathererStream::initialize(Inputs inputs) +{ + for (size_t i = 0; i < inputs.size(); ++i) { - const Block & header = children[i]->getHeader(); - - /// Sometimes MergeTreeReader injects additional column with partitioning key - if (header.columns() > 2) - throw Exception( - "Block should have 1 or 2 columns, but contains " + toString(header.columns()), - ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); - - if (i == 0) + if (inputs[i].chunk) { - column.name = column_name; - column.type = header.getByName(column_name).type; - column.column = column.type->createColumn(); + sources[i].update(inputs[i].chunk.detachColumns().at(0)); + if (!result_column) + result_column = sources[i].column->cloneEmpty(); } - else if (header.getByName(column_name).column->getName() != column.column->getName()) - throw Exception("Column types don't match", ErrorCodes::INCOMPATIBLE_COLUMNS); } } - -Block ColumnGathererStream::readImpl() +IMergingAlgorithm::Status ColumnGathererStream::merge() { + /// Nothing to read after initialize. + if (!result_column) + return Status(Chunk(), true); + + if (source_to_fully_copy) /// Was set on a previous iteration + { + Chunk res; + res.addColumn(source_to_fully_copy->column); + merged_rows += source_to_fully_copy->size; + source_to_fully_copy->pos = source_to_fully_copy->size; + source_to_fully_copy = nullptr; + return Status(std::move(res)); + } + /// Special case: single source and there are no skipped rows - if (children.size() == 1 && row_sources_buf.eof() && !source_to_fully_copy) - return children[0]->read(); + /// Note: looks like this should never happen because row_sources_buf cannot just skip row info. + if (sources.size() == 1 && row_sources_buf.eof()) + { + if (sources.front().pos < sources.front().size) + { + next_required_source = 0; + Chunk res; + merged_rows += sources.front().column->size(); + merged_bytes += sources.front().column->allocatedBytes(); + res.addColumn(std::move(sources.front().column)); + sources.front().pos = sources.front().size = 0; + return Status(std::move(res)); + } - if (!source_to_fully_copy && row_sources_buf.eof()) - return Block(); + if (next_required_source == -1) + return Status(Chunk(), true); - MutableColumnPtr output_column = column.column->cloneEmpty(); - output_block = Block{column.cloneEmpty()}; - /// Surprisingly this call may directly change output_block, bypassing + next_required_source = 0; + return Status(next_required_source); + } + + if (next_required_source != -1 && sources[next_required_source].size == 0) + throw Exception("Cannot fetch required block. Source " + toString(next_required_source), ErrorCodes::RECEIVED_EMPTY_DATA); + + /// Surprisingly this call may directly change some internal state of ColumnGathererStream. /// output_column. See ColumnGathererStream::gather. - output_column->gather(*this); - if (!output_column->empty()) - output_block.getByPosition(0).column = std::move(output_column); + result_column->gather(*this); - return output_block; + if (next_required_source != -1) + return Status(next_required_source); + + if (source_to_fully_copy && result_column->empty()) + { + Chunk res; + merged_rows += source_to_fully_copy->column->size(); + merged_bytes += source_to_fully_copy->column->allocatedBytes(); + res.addColumn(source_to_fully_copy->column); + source_to_fully_copy->pos = source_to_fully_copy->size; + source_to_fully_copy = nullptr; + return Status(std::move(res)); + } + + auto col = result_column->cloneEmpty(); + result_column.swap(col); + + Chunk res; + merged_rows += col->size(); + merged_bytes += col->allocatedBytes(); + res.addColumn(std::move(col)); + return Status(std::move(res), row_sources_buf.eof() && !source_to_fully_copy); } -void ColumnGathererStream::fetchNewBlock(Source & source, size_t source_num) +void ColumnGathererStream::consume(Input & input, size_t source_num) { - try - { - source.block = children[source_num]->read(); - source.update(column_name); - } - catch (Exception & e) - { - e.addMessage("Cannot fetch required block. Stream " + children[source_num]->getName() + ", part " + toString(source_num)); - throw; - } + auto & source = sources[source_num]; + if (input.chunk) + source.update(input.chunk.getColumns().at(0)); if (0 == source.size) { - throw Exception("Fetched block is empty. Stream " + children[source_num]->getName() + ", part " + toString(source_num), + throw Exception("Fetched block is empty. Source " + toString(source_num), ErrorCodes::RECEIVED_EMPTY_DATA); } } - -void ColumnGathererStream::readSuffixImpl() +ColumnGathererTransform::ColumnGathererTransform( + const Block & header, + size_t num_inputs, + ReadBuffer & row_sources_buf_, + size_t block_preferred_size_) + : IMergingTransform( + num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, row_sources_buf_, block_preferred_size_) + , log(&Poco::Logger::get("ColumnGathererStream")) { - const BlockStreamProfileInfo & profile_info = getProfileInfo(); + if (header.columns() != 1) + throw Exception( + "Header should have 1 column, but contains " + toString(header.columns()), + ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); +} +void ColumnGathererTransform::work() +{ + Stopwatch stopwatch; + IMergingTransform::work(); + elapsed_ns += stopwatch.elapsedNanoseconds(); +} + +void ColumnGathererTransform::onFinish() +{ + auto merged_rows = algorithm.getMergedRows(); + auto merged_bytes = algorithm.getMergedRows(); /// Don't print info for small parts (< 10M rows) - if (profile_info.rows < 10000000) + if (merged_rows < 10000000) return; - double seconds = profile_info.total_stopwatch.elapsedSeconds(); + double seconds = static_cast(elapsed_ns) / 1000000000ULL; + const auto & column_name = getOutputPort().getHeader().getByPosition(0).name; if (!seconds) LOG_DEBUG(log, "Gathered column {} ({} bytes/elem.) in 0 sec.", - column_name, static_cast(profile_info.bytes) / profile_info.rows); + column_name, static_cast(merged_bytes) / merged_rows); else LOG_DEBUG(log, "Gathered column {} ({} bytes/elem.) in {} sec., {} rows/sec., {}/sec.", - column_name, static_cast(profile_info.bytes) / profile_info.rows, seconds, - profile_info.rows / seconds, ReadableSize(profile_info.bytes / seconds)); + column_name, static_cast(merged_bytes) / merged_rows, seconds, + merged_rows / seconds, ReadableSize(merged_bytes / seconds)); } } diff --git a/src/DataStreams/ColumnGathererStream.h b/src/DataStreams/ColumnGathererStream.h index 05665ab3f42..2d013e596ce 100644 --- a/src/DataStreams/ColumnGathererStream.h +++ b/src/DataStreams/ColumnGathererStream.h @@ -1,8 +1,9 @@ #pragma once -#include #include #include +#include +#include namespace Poco { class Logger; } @@ -53,77 +54,91 @@ using MergedRowSources = PODArray; * Stream mask maps row number to index of source stream. * Streams should contain exactly one column. */ -class ColumnGathererStream : public IBlockInputStream +class ColumnGathererStream final : public IMergingAlgorithm { public: - ColumnGathererStream( - const String & column_name_, const BlockInputStreams & source_streams, ReadBuffer & row_sources_buf_, - size_t block_preferred_size_ = DEFAULT_BLOCK_SIZE); + ColumnGathererStream(size_t num_inputs, ReadBuffer & row_sources_buf_, size_t block_preferred_size_ = DEFAULT_BLOCK_SIZE); - String getName() const override { return "ColumnGatherer"; } - - Block readImpl() override; - - void readSuffixImpl() override; - - Block getHeader() const override { return children.at(0)->getHeader(); } + void initialize(Inputs inputs) override; + void consume(Input & input, size_t source_num) override; + Status merge() override; /// for use in implementations of IColumn::gather() template void gather(Column & column_res); + UInt64 getMergedRows() const { return merged_rows; } + UInt64 getMergedBytes() const { return merged_bytes; } + private: /// Cache required fields struct Source { - const IColumn * column = nullptr; + ColumnPtr column; size_t pos = 0; size_t size = 0; - Block block; - void update(const String & name) + void update(ColumnPtr column_) { - column = block.getByName(name).column.get(); - size = block.rows(); + column = std::move(column_); + size = column->size(); pos = 0; } }; - void fetchNewBlock(Source & source, size_t source_num); - - String column_name; - ColumnWithTypeAndName column; + MutableColumnPtr result_column; std::vector sources; ReadBuffer & row_sources_buf; - size_t block_preferred_size; + const size_t block_preferred_size; Source * source_to_fully_copy = nullptr; - Block output_block; + + ssize_t next_required_source = -1; + size_t cur_block_preferred_size = 0; + + UInt64 merged_rows = 0; + UInt64 merged_bytes = 0; +}; + +class ColumnGathererTransform final : public IMergingTransform +{ +public: + ColumnGathererTransform( + const Block & header, + size_t num_inputs, + ReadBuffer & row_sources_buf_, + size_t block_preferred_size_ = DEFAULT_BLOCK_SIZE); + + String getName() const override { return "ColumnGathererTransform"; } + + void work() override; + +protected: + void onFinish() override; + UInt64 elapsed_ns = 0; Poco::Logger * log; }; + template void ColumnGathererStream::gather(Column & column_res) { - if (source_to_fully_copy) /// Was set on a previous iteration - { - output_block.getByPosition(0).column = source_to_fully_copy->block.getByName(column_name).column; - source_to_fully_copy->pos = source_to_fully_copy->size; - source_to_fully_copy = nullptr; - return; - } - row_sources_buf.nextIfAtEnd(); RowSourcePart * row_source_pos = reinterpret_cast(row_sources_buf.position()); RowSourcePart * row_sources_end = reinterpret_cast(row_sources_buf.buffer().end()); - size_t cur_block_preferred_size = std::min(static_cast(row_sources_end - row_source_pos), block_preferred_size); - column_res.reserve(cur_block_preferred_size); + if (next_required_source == -1) + { + /// Start new column. + cur_block_preferred_size = std::min(static_cast(row_sources_end - row_source_pos), block_preferred_size); + column_res.reserve(cur_block_preferred_size); + } - size_t cur_size = 0; + size_t cur_size = column_res.size(); + next_required_source = -1; while (row_source_pos < row_sources_end && cur_size < cur_block_preferred_size) { @@ -131,13 +146,15 @@ void ColumnGathererStream::gather(Column & column_res) size_t source_num = row_source.getSourceNum(); Source & source = sources[source_num]; bool source_skip = row_source.getSkipFlag(); - ++row_source_pos; if (source.pos >= source.size) /// Fetch new block from source_num part { - fetchNewBlock(source, source_num); + next_required_source = source_num; + return; } + ++row_source_pos; + /// Consecutive optimization. TODO: precompute lengths size_t len = 1; size_t max_len = std::min(static_cast(row_sources_end - row_source_pos), source.size - source.pos); // interval should be in the same block @@ -156,14 +173,7 @@ void ColumnGathererStream::gather(Column & column_res) { /// If current block already contains data, return it. /// Whole column from current source will be returned on next read() iteration. - if (cur_size > 0) - { - source_to_fully_copy = &source; - return; - } - - output_block.getByPosition(0).column = source.block.getByName(column_name).column; - source.pos += len; + source_to_fully_copy = &source; return; } else if (len == 1) diff --git a/src/DataStreams/DistinctSortedBlockInputStream.cpp b/src/DataStreams/DistinctSortedBlockInputStream.cpp index eab706924c1..47421941b45 100644 --- a/src/DataStreams/DistinctSortedBlockInputStream.cpp +++ b/src/DataStreams/DistinctSortedBlockInputStream.cpp @@ -8,40 +8,28 @@ namespace ErrorCodes extern const int SET_SIZE_LIMIT_EXCEEDED; } -DistinctSortedBlockInputStream::DistinctSortedBlockInputStream( - const BlockInputStreamPtr & input, SortDescription sort_description, const SizeLimits & set_size_limits_, UInt64 limit_hint_, const Names & columns) - : description(std::move(sort_description)) +DistinctSortedTransform::DistinctSortedTransform( + const Block & header, SortDescription sort_description, const SizeLimits & set_size_limits_, UInt64 limit_hint_, const Names & columns) + : ISimpleTransform(header, header, true) + , description(std::move(sort_description)) , columns_names(columns) , limit_hint(limit_hint_) , set_size_limits(set_size_limits_) { - children.push_back(input); } -Block DistinctSortedBlockInputStream::readImpl() +void DistinctSortedTransform::transform(Chunk & chunk) { - /// Execute until end of stream or until - /// a block with some new records will be gotten. - for (;;) - { - /// Stop reading if we already reached the limit. - if (limit_hint && data.getTotalRowCount() >= limit_hint) - return Block(); - - Block block = children.back()->read(); - if (!block) - return Block(); - - const ColumnRawPtrs column_ptrs(getKeyColumns(block)); + const ColumnRawPtrs column_ptrs(getKeyColumns(chunk)); if (column_ptrs.empty()) - return block; + return; - const ColumnRawPtrs clearing_hint_columns(getClearingColumns(block, column_ptrs)); + const ColumnRawPtrs clearing_hint_columns(getClearingColumns(chunk, column_ptrs)); if (data.type == ClearableSetVariants::Type::EMPTY) data.init(ClearableSetVariants::chooseMethod(column_ptrs, key_sizes)); - const size_t rows = block.rows(); + const size_t rows = chunk.getNumRows(); IColumn::Filter filter(rows); bool has_new_data = false; @@ -59,25 +47,36 @@ Block DistinctSortedBlockInputStream::readImpl() /// Just go to the next block if there isn't any new record in the current one. if (!has_new_data) - continue; + { + chunk.clear(); + return; + } if (!set_size_limits.check(data.getTotalRowCount(), data.getTotalByteCount(), "DISTINCT", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED)) - return {}; + { + stopReading(); + chunk.clear(); + return; + } - prev_block.block = block; - prev_block.clearing_hint_columns = std::move(clearing_hint_columns); + /// Stop reading if we already reached the limit. + if (limit_hint && data.getTotalRowCount() >= limit_hint) + stopReading(); - size_t all_columns = block.columns(); + prev_chunk.chunk = std::move(chunk); + prev_chunk.clearing_hint_columns = std::move(clearing_hint_columns); + + size_t all_columns = prev_chunk.chunk.getNumColumns(); + Chunk res_chunk; for (size_t i = 0; i < all_columns; ++i) - block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(filter, -1); + res_chunk.addColumn(prev_chunk.chunk.getColumns().at(i)->filter(filter, -1)); - return block; - } + chunk = std::move(res_chunk); } template -bool DistinctSortedBlockInputStream::buildFilter( +bool DistinctSortedTransform::buildFilter( Method & method, const ColumnRawPtrs & columns, const ColumnRawPtrs & clearing_hint_columns, @@ -90,8 +89,8 @@ bool DistinctSortedBlockInputStream::buildFilter( /// Compare last row of previous block and first row of current block, /// If rows not equal, we can clear HashSet, /// If clearing_hint_columns is empty, we CAN'T clear HashSet. - if (!clearing_hint_columns.empty() && !prev_block.clearing_hint_columns.empty() - && !rowsEqual(clearing_hint_columns, 0, prev_block.clearing_hint_columns, prev_block.block.rows() - 1)) + if (!clearing_hint_columns.empty() && !prev_chunk.clearing_hint_columns.empty() + && !rowsEqual(clearing_hint_columns, 0, prev_chunk.clearing_hint_columns, prev_chunk.chunk.getNumRows() - 1)) { method.data.clear(); } @@ -117,18 +116,20 @@ bool DistinctSortedBlockInputStream::buildFilter( return has_new_data; } -ColumnRawPtrs DistinctSortedBlockInputStream::getKeyColumns(const Block & block) const +ColumnRawPtrs DistinctSortedTransform::getKeyColumns(const Chunk & chunk) const { - size_t columns = columns_names.empty() ? block.columns() : columns_names.size(); + size_t columns = columns_names.empty() ? chunk.getNumColumns() : columns_names.size(); ColumnRawPtrs column_ptrs; column_ptrs.reserve(columns); for (size_t i = 0; i < columns; ++i) { - const auto & column = columns_names.empty() - ? block.safeGetByPosition(i).column - : block.getByName(columns_names[i]).column; + auto pos = i; + if (!columns_names.empty()) + pos = input.getHeader().getPositionByName(columns_names[i]); + + const auto & column = chunk.getColumns()[pos]; /// Ignore all constant columns. if (!isColumnConst(*column)) @@ -138,13 +139,13 @@ ColumnRawPtrs DistinctSortedBlockInputStream::getKeyColumns(const Block & block) return column_ptrs; } -ColumnRawPtrs DistinctSortedBlockInputStream::getClearingColumns(const Block & block, const ColumnRawPtrs & key_columns) const +ColumnRawPtrs DistinctSortedTransform::getClearingColumns(const Chunk & chunk, const ColumnRawPtrs & key_columns) const { ColumnRawPtrs clearing_hint_columns; clearing_hint_columns.reserve(description.size()); for (const auto & sort_column_description : description) { - const auto * sort_column_ptr = block.safeGetByPosition(sort_column_description.column_number).column.get(); + const auto * sort_column_ptr = chunk.getColumns().at(sort_column_description.column_number).get(); const auto it = std::find(key_columns.cbegin(), key_columns.cend(), sort_column_ptr); if (it != key_columns.cend()) /// if found in key_columns clearing_hint_columns.emplace_back(sort_column_ptr); @@ -154,7 +155,7 @@ ColumnRawPtrs DistinctSortedBlockInputStream::getClearingColumns(const Block & b return clearing_hint_columns; } -bool DistinctSortedBlockInputStream::rowsEqual(const ColumnRawPtrs & lhs, size_t n, const ColumnRawPtrs & rhs, size_t m) +bool DistinctSortedTransform::rowsEqual(const ColumnRawPtrs & lhs, size_t n, const ColumnRawPtrs & rhs, size_t m) { for (size_t column_index = 0, num_columns = lhs.size(); column_index < num_columns; ++column_index) { diff --git a/src/DataStreams/DistinctSortedBlockInputStream.h b/src/DataStreams/DistinctSortedBlockInputStream.h index 146c9326e5d..ddac6c18a64 100644 --- a/src/DataStreams/DistinctSortedBlockInputStream.h +++ b/src/DataStreams/DistinctSortedBlockInputStream.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include @@ -18,24 +18,22 @@ namespace DB * set limit_hint to non zero value. So we stop emitting new rows after * count of already emitted rows will reach the limit_hint. */ -class DistinctSortedBlockInputStream : public IBlockInputStream +class DistinctSortedTransform : public ISimpleTransform { public: /// Empty columns_ means all columns. - DistinctSortedBlockInputStream(const BlockInputStreamPtr & input, SortDescription sort_description, const SizeLimits & set_size_limits_, UInt64 limit_hint_, const Names & columns); + DistinctSortedTransform(const Block & header, SortDescription sort_description, const SizeLimits & set_size_limits_, UInt64 limit_hint_, const Names & columns); - String getName() const override { return "DistinctSorted"; } - - Block getHeader() const override { return children.at(0)->getHeader(); } + String getName() const override { return "DistinctSortedTransform"; } protected: - Block readImpl() override; + void transform(Chunk & chunk) override; private: - ColumnRawPtrs getKeyColumns(const Block & block) const; + ColumnRawPtrs getKeyColumns(const Chunk & chunk) const; /// When clearing_columns changed, we can clean HashSet to memory optimization /// clearing_columns is a left-prefix of SortDescription exists in key_columns - ColumnRawPtrs getClearingColumns(const Block & block, const ColumnRawPtrs & key_columns) const; + ColumnRawPtrs getClearingColumns(const Chunk & chunk, const ColumnRawPtrs & key_columns) const; static bool rowsEqual(const ColumnRawPtrs & lhs, size_t n, const ColumnRawPtrs & rhs, size_t m); /// return true if has new data @@ -50,12 +48,12 @@ private: SortDescription description; - struct PreviousBlock + struct PreviousChunk { - Block block; + Chunk chunk; ColumnRawPtrs clearing_hint_columns; }; - PreviousBlock prev_block; + PreviousChunk prev_chunk; Names columns_names; ClearableSetVariants data; diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp index 05d4ba0a395..b476f689e60 100644 --- a/src/DataStreams/TTLBlockInputStream.cpp +++ b/src/DataStreams/TTLBlockInputStream.cpp @@ -16,18 +16,17 @@ namespace DB { -TTLBlockInputStream::TTLBlockInputStream( - const BlockInputStreamPtr & input_, +TTLTransform::TTLTransform( + const Block & header_, const MergeTreeData & storage_, const StorageMetadataPtr & metadata_snapshot_, const MergeTreeData::MutableDataPartPtr & data_part_, time_t current_time_, bool force_) - : data_part(data_part_) - , log(&Poco::Logger::get(storage_.getLogName() + " (TTLBlockInputStream)")) + : IAccumulatingTransform(header_, header_) + , data_part(data_part_) + , log(&Poco::Logger::get(storage_.getLogName() + " (TTLTransform)")) { - children.push_back(input_); - header = children.at(0)->getHeader(); auto old_ttl_infos = data_part->ttl_infos; if (metadata_snapshot_->hasRowsTTL()) @@ -50,7 +49,7 @@ TTLBlockInputStream::TTLBlockInputStream( for (const auto & group_by_ttl : metadata_snapshot_->getGroupByTTLs()) algorithms.emplace_back(std::make_unique( - group_by_ttl, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_, header, storage_)); + group_by_ttl, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_, getInputPort().getHeader(), storage_)); if (metadata_snapshot_->hasAnyColumnTTL()) { @@ -98,22 +97,40 @@ Block reorderColumns(Block block, const Block & header) return res; } -Block TTLBlockInputStream::readImpl() +void TTLTransform::consume(Chunk chunk) { if (all_data_dropped) - return {}; + { + finishConsume(); + return; + } + + auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); - auto block = children.at(0)->read(); for (const auto & algorithm : algorithms) algorithm->execute(block); if (!block) - return block; + return; - return reorderColumns(std::move(block), header); + size_t num_rows = block.rows(); + setReadyChunk(Chunk(reorderColumns(std::move(block), getOutputPort().getHeader()).getColumns(), num_rows)); } -void TTLBlockInputStream::readSuffixImpl() +Chunk TTLTransform::generate() +{ + Block block; + for (const auto & algorithm : algorithms) + algorithm->execute(block); + + if (!block) + return {}; + + size_t num_rows = block.rows(); + return Chunk(reorderColumns(std::move(block), getOutputPort().getHeader()).getColumns(), num_rows); +} + +void TTLTransform::finalize() { data_part->ttl_infos = {}; for (const auto & algorithm : algorithms) @@ -126,4 +143,13 @@ void TTLBlockInputStream::readSuffixImpl() } } +IProcessor::Status TTLTransform::prepare() +{ + auto status = IAccumulatingTransform::prepare(); + if (status == Status::Finished) + finalize(); + + return status; +} + } diff --git a/src/DataStreams/TTLBlockInputStream.h b/src/DataStreams/TTLBlockInputStream.h index bf854d9cc9c..50b28e81bdf 100644 --- a/src/DataStreams/TTLBlockInputStream.h +++ b/src/DataStreams/TTLBlockInputStream.h @@ -1,5 +1,5 @@ #pragma once -#include +#include #include #include #include @@ -12,11 +12,11 @@ namespace DB { -class TTLBlockInputStream : public IBlockInputStream +class TTLTransform : public IAccumulatingTransform { public: - TTLBlockInputStream( - const BlockInputStreamPtr & input_, + TTLTransform( + const Block & header_, const MergeTreeData & storage_, const StorageMetadataPtr & metadata_snapshot_, const MergeTreeData::MutableDataPartPtr & data_part_, @@ -25,13 +25,15 @@ public: ); String getName() const override { return "TTL"; } - Block getHeader() const override { return header; } + + Status prepare() override; protected: - Block readImpl() override; + void consume(Chunk chunk) override; + Chunk generate() override; /// Finalizes ttl infos and updates data part - void readSuffixImpl() override; + void finalize(); private: std::vector algorithms; @@ -41,7 +43,6 @@ private: /// ttl_infos and empty_columns are updating while reading const MergeTreeData::MutableDataPartPtr & data_part; Poco::Logger * log; - Block header; }; } diff --git a/src/DataStreams/TTLCalcInputStream.cpp b/src/DataStreams/TTLCalcInputStream.cpp index 2353e9ec259..fe85e40c003 100644 --- a/src/DataStreams/TTLCalcInputStream.cpp +++ b/src/DataStreams/TTLCalcInputStream.cpp @@ -4,18 +4,17 @@ namespace DB { -TTLCalcInputStream::TTLCalcInputStream( - const BlockInputStreamPtr & input_, +TTLCalcTransform::TTLCalcTransform( + const Block & header_, const MergeTreeData & storage_, const StorageMetadataPtr & metadata_snapshot_, const MergeTreeData::MutableDataPartPtr & data_part_, time_t current_time_, bool force_) - : data_part(data_part_) - , log(&Poco::Logger::get(storage_.getLogName() + " (TTLCalcInputStream)")) + : IAccumulatingTransform(header_, header_) + , data_part(data_part_) + , log(&Poco::Logger::get(storage_.getLogName() + " (TTLCalcTransform)")) { - children.push_back(input_); - header = children.at(0)->getHeader(); auto old_ttl_infos = data_part->ttl_infos; if (metadata_snapshot_->hasRowsTTL()) @@ -51,27 +50,52 @@ TTLCalcInputStream::TTLCalcInputStream( recompression_ttl, TTLUpdateField::RECOMPRESSION_TTL, recompression_ttl.result_column, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_)); } -Block TTLCalcInputStream::readImpl() +void TTLCalcTransform::consume(Chunk chunk) { - auto block = children.at(0)->read(); + auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); for (const auto & algorithm : algorithms) algorithm->execute(block); if (!block) - return block; + return; - Block res; - for (const auto & col : header) - res.insert(block.getByName(col.name)); + Chunk res; + for (const auto & col : getOutputPort().getHeader()) + res.addColumn(block.getByName(col.name).column); + + setReadyChunk(std::move(res)); +} + +Chunk TTLCalcTransform::generate() +{ + Block block; + for (const auto & algorithm : algorithms) + algorithm->execute(block); + + if (!block) + return {}; + + Chunk res; + for (const auto & col : getOutputPort().getHeader()) + res.addColumn(block.getByName(col.name).column); return res; } -void TTLCalcInputStream::readSuffixImpl() +void TTLCalcTransform::finalize() { data_part->ttl_infos = {}; for (const auto & algorithm : algorithms) algorithm->finalize(data_part); } +IProcessor::Status TTLCalcTransform::prepare() +{ + auto status = IAccumulatingTransform::prepare(); + if (status == Status::Finished) + finalize(); + + return status; +} + } diff --git a/src/DataStreams/TTLCalcInputStream.h b/src/DataStreams/TTLCalcInputStream.h index 20148eadfc2..b6318026b8c 100644 --- a/src/DataStreams/TTLCalcInputStream.h +++ b/src/DataStreams/TTLCalcInputStream.h @@ -1,5 +1,5 @@ #pragma once -#include +#include #include #include #include @@ -11,11 +11,11 @@ namespace DB { -class TTLCalcInputStream : public IBlockInputStream +class TTLCalcTransform : public IAccumulatingTransform { public: - TTLCalcInputStream( - const BlockInputStreamPtr & input_, + TTLCalcTransform( + const Block & header_, const MergeTreeData & storage_, const StorageMetadataPtr & metadata_snapshot_, const MergeTreeData::MutableDataPartPtr & data_part_, @@ -24,13 +24,14 @@ public: ); String getName() const override { return "TTL_CALC"; } - Block getHeader() const override { return header; } + Status prepare() override; protected: - Block readImpl() override; + void consume(Chunk chunk) override; + Chunk generate() override; /// Finalizes ttl infos and updates data part - void readSuffixImpl() override; + void finalize(); private: std::vector algorithms; @@ -38,7 +39,6 @@ private: /// ttl_infos and empty_columns are updating while reading const MergeTreeData::MutableDataPartPtr & data_part; Poco::Logger * log; - Block header; }; } diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index 73800e84256..98f697ef641 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -16,6 +16,8 @@ void DDLDependencyVisitor::visit(const ASTPtr & ast, Data & data) visit(*function, data); else if (const auto * dict_source = ast->as()) visit(*dict_source, data); + else if (const auto * storage = ast->as()) + visit(*storage, data); } bool DDLDependencyVisitor::needChildVisit(const ASTPtr & node, const ASTPtr & child) @@ -66,6 +68,16 @@ void DDLDependencyVisitor::visit(const ASTFunctionWithKeyValueArguments & dict_s data.dependencies.emplace(std::move(info->table_name)); } +void DDLDependencyVisitor::visit(const ASTStorage & storage, Data & data) +{ + if (!storage.engine) + return; + if (storage.engine->name != "Dictionary") + return; + + extractTableNameFromArgument(*storage.engine, data, 0); +} + void DDLDependencyVisitor::extractTableNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx) { diff --git a/src/Databases/DDLDependencyVisitor.h b/src/Databases/DDLDependencyVisitor.h index c0b39d70b08..5779aee7d33 100644 --- a/src/Databases/DDLDependencyVisitor.h +++ b/src/Databases/DDLDependencyVisitor.h @@ -8,6 +8,7 @@ namespace DB class ASTFunction; class ASTFunctionWithKeyValueArguments; +class ASTStorage; /// Visits ASTCreateQuery and extracts names of table (or dictionary) dependencies /// from column default expressions (joinGet, dictGet, etc) @@ -33,6 +34,7 @@ public: private: static void visit(const ASTFunction & function, Data & data); static void visit(const ASTFunctionWithKeyValueArguments & dict_source, Data & data); + static void visit(const ASTStorage & storage, Data & data); static void extractTableNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx); }; diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp index 4f5743035a7..5ac4180ec27 100644 --- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp +++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp @@ -250,7 +250,7 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure( "and i.oid = ix.indexrelid " "and a.attrelid = t.oid " "and a.attnum = ANY(ix.indkey) " - "and t.relkind = 'r' " /// simple tables + "and t.relkind in ('r', 'p') " /// simple tables "and t.relname = {} " /// Connection is already done to a needed database, only table name is needed. "and ix.indisreplident = 't' " /// index is is replica identity index "ORDER BY a.attname", /// column names diff --git a/src/Functions/FunctionConstantBase.h b/src/Functions/FunctionConstantBase.h index 35096a9942f..2d237c77256 100644 --- a/src/Functions/FunctionConstantBase.h +++ b/src/Functions/FunctionConstantBase.h @@ -12,18 +12,9 @@ template class FunctionConstantBase : public IFunction { public: - - /// For server-level constants (uptime(), version(), etc) - explicit FunctionConstantBase(ContextPtr context, T && constant_value_) - : is_distributed(context->isDistributed()) - , constant_value(std::forward(constant_value_)) - { - } - - /// For real constants (pi(), e(), etc) - explicit FunctionConstantBase(const T & constant_value_) - : is_distributed(false) - , constant_value(constant_value_) + template + explicit FunctionConstantBase(U && constant_value_, bool is_distributed_ = false) + : constant_value(std::forward(constant_value_)), is_distributed(is_distributed_) { } @@ -56,8 +47,8 @@ public: } private: - bool is_distributed; const T constant_value; + bool is_distributed; }; } diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index dfd986c5f82..dc062ab148a 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -81,6 +81,7 @@ void registerFunctionQueryID(FunctionFactory & factory); void registerFunctionInitialQueryID(FunctionFactory & factory); void registerFunctionServerUUID(FunctionFactory &); void registerFunctionZooKeeperSessionUptime(FunctionFactory &); +void registerFunctionGetOSKernelVersion(FunctionFactory &); #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -162,6 +163,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionInitialQueryID(factory); registerFunctionServerUUID(factory); registerFunctionZooKeeperSessionUptime(factory); + registerFunctionGetOSKernelVersion(factory); #if USE_ICU registerFunctionConvertCharset(factory); diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index 9a53a5cf582..900a6dbf2a9 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -7,6 +7,10 @@ #include #include +#if defined(OS_LINUX) +# include +#endif + #if !defined(ARCADIA_BUILD) # include #endif @@ -24,7 +28,7 @@ namespace public: static constexpr auto name = "buildId"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionBuildId(ContextPtr context) : FunctionConstantBase(context, SymbolIndex::instance()->getBuildIDHex()) {} + explicit FunctionBuildId(ContextPtr context) : FunctionConstantBase(SymbolIndex::instance()->getBuildIDHex(), context->isDistributed()) {} }; #endif @@ -35,7 +39,7 @@ namespace public: static constexpr auto name = "hostName"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionHostName(ContextPtr context) : FunctionConstantBase(context, DNSResolver::instance().getHostName()) {} + explicit FunctionHostName(ContextPtr context) : FunctionConstantBase(DNSResolver::instance().getHostName(), context->isDistributed()) {} }; @@ -44,7 +48,7 @@ namespace public: static constexpr auto name = "serverUUID"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionServerUUID(ContextPtr context) : FunctionConstantBase(context, ServerUUID::get()) {} + explicit FunctionServerUUID(ContextPtr context) : FunctionConstantBase(ServerUUID::get(), context->isDistributed()) {} }; @@ -53,7 +57,7 @@ namespace public: static constexpr auto name = "tcpPort"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionTcpPort(ContextPtr context) : FunctionConstantBase(context, context->getTCPPort()) {} + explicit FunctionTcpPort(ContextPtr context) : FunctionConstantBase(context->getTCPPort(), context->isDistributed()) {} }; @@ -63,7 +67,7 @@ namespace public: static constexpr auto name = "timezone"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionTimezone(ContextPtr context) : FunctionConstantBase(context, String{DateLUT::instance().getTimeZone()}) {} + explicit FunctionTimezone(ContextPtr context) : FunctionConstantBase(String{DateLUT::instance().getTimeZone()}, context->isDistributed()) {} }; @@ -73,7 +77,7 @@ namespace public: static constexpr auto name = "uptime"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionUptime(ContextPtr context) : FunctionConstantBase(context, context->getUptimeSeconds()) {} + explicit FunctionUptime(ContextPtr context) : FunctionConstantBase(context->getUptimeSeconds(), context->isDistributed()) {} }; @@ -83,16 +87,30 @@ namespace public: static constexpr auto name = "version"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionVersion(ContextPtr context) : FunctionConstantBase(context, VERSION_STRING) {} + explicit FunctionVersion(ContextPtr context) : FunctionConstantBase(VERSION_STRING, context->isDistributed()) {} }; class FunctionZooKeeperSessionUptime : public FunctionConstantBase { public: static constexpr auto name = "zookeeperSessionUptime"; - explicit FunctionZooKeeperSessionUptime(ContextPtr context) : FunctionConstantBase(context, context->getZooKeeperSessionUptime()) {} + explicit FunctionZooKeeperSessionUptime(ContextPtr context) + : FunctionConstantBase(context->getZooKeeperSessionUptime(), context->isDistributed()) + { + } static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } }; + +#if defined(OS_LINUX) + class FunctionGetOSKernelVersion : public FunctionConstantBase + { + public: + static constexpr auto name = "getOSKernelVersion"; + explicit FunctionGetOSKernelVersion(ContextPtr context) : FunctionConstantBase(Poco::Environment::osName() + " " + Poco::Environment::osVersion(), context->isDistributed()) {} + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + }; +#endif + } @@ -140,5 +158,14 @@ void registerFunctionZooKeeperSessionUptime(FunctionFactory & factory) factory.registerFunction(); } + +void registerFunctionGetOSKernelVersion([[maybe_unused]] FunctionFactory & factory) +{ +#if defined(OS_LINUX) + factory.registerFunction(); +#endif +} + + } diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index 661497c88f5..ed00de6e803 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -35,8 +35,9 @@ public: enum class HTTPMethod : uint8_t { UNKNOWN = 0, - GET = 1, - POST = 2, + GET = 1, + POST = 2, + OPTIONS = 3 }; enum class QueryKind : uint8_t diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 540d5c76c97..e5a129cbe12 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -932,7 +932,7 @@ void MutationsInterpreter::validate() auto pipeline = addStreamsForLaterStages(stages, plan); } -BlockInputStreamPtr MutationsInterpreter::execute() +QueryPipeline MutationsInterpreter::execute() { if (!can_execute) throw Exception("Cannot execute mutations interpreter because can_execute flag set to false", ErrorCodes::LOGICAL_ERROR); @@ -956,12 +956,11 @@ BlockInputStreamPtr MutationsInterpreter::execute() } auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder)); - BlockInputStreamPtr result_stream = std::make_shared(std::move(pipeline)); if (!updated_header) - updated_header = std::make_unique(result_stream->getHeader()); + updated_header = std::make_unique(pipeline.getHeader()); - return result_stream; + return pipeline; } Block MutationsInterpreter::getUpdatedHeader() const diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index b0540f7d2ed..7b0ccb3bae5 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -50,7 +50,7 @@ public: size_t evaluateCommandsSize(); /// The resulting stream will return blocks containing only changed columns and columns, that we need to recalculate indices. - BlockInputStreamPtr execute(); + QueryPipeline execute(); /// Only changed columns. Block getUpdatedHeader() const; diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index f2860235117..a96713e3b5d 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -120,7 +120,7 @@ static NamesAndTypesList getColumnsList(const ASTExpressionList * columns_defini auto * literal = child->as(); new_child->arguments = std::make_shared(); - new_child->arguments->children.push_back(std::make_shared(literal->value.get())); + new_child->arguments->children.push_back(std::make_shared(literal->value.safeGet())); new_child->arguments->children.push_back(std::make_shared(Int16(++i))); child = new_child; } diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 8576f4662ec..f8402cf0287 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -203,7 +203,6 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as if (query_context->hasTraceCollector()) { /// Set up memory profiling - thread_group->memory_tracker.setOrRaiseProfilerLimit(settings.memory_profiler_step); thread_group->memory_tracker.setProfilerStep(settings.memory_profiler_step); thread_group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); } diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 615180d27dd..ce0062e8c77 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -527,7 +527,7 @@ void SystemLog::prepareTable() auto alias_columns = LogElement::getNamesAndAliases(); auto current_query = InterpreterCreateQuery::formatColumns(ordinary_columns, alias_columns); - if (old_query->getTreeHash() != current_query->getTreeHash()) + if (serializeAST(*old_query) != serializeAST(*current_query)) { /// Rename the existing table. int suffix = 0; diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index dac27aebe58..c16a73e75dc 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -19,6 +19,7 @@ const TraceDataType::Values TraceLogElement::trace_values = {"CPU", static_cast(TraceType::CPU)}, {"Memory", static_cast(TraceType::Memory)}, {"MemorySample", static_cast(TraceType::MemorySample)}, + {"MemoryPeak", static_cast(TraceType::MemoryPeak)}, }; NamesAndTypesList TraceLogElement::getNamesAndTypes() diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index d4525883e36..1da1bfba491 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -481,7 +481,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (attach && s_from.ignore(pos, expected)) { - ParserLiteral from_path_p; + ParserStringLiteral from_path_p; if (!from_path_p.parse(pos, from_path, expected)) return false; } @@ -896,7 +896,7 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (ParserKeyword{"TO INNER UUID"}.ignore(pos, expected)) { - ParserLiteral literal_p; + ParserStringLiteral literal_p; if (!literal_p.parse(pos, to_inner_uuid, expected)) return false; } diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 4800bfca2ce..d904f3755bc 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -104,7 +104,9 @@ Columns Chunk::detachColumns() void Chunk::addColumn(ColumnPtr column) { - if (column->size() != num_rows) + if (empty()) + num_rows = column->size(); + else if (column->size() != num_rows) throw Exception("Invalid number of rows in Chunk column " + column->getName()+ ": expected " + toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR); diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index feb826d5aa0..3dada0d62be 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -568,7 +569,17 @@ void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptrgetName(), header_column.type->getName())); + throw; + } + column.type = header_column.type; num_rows = column.column->size(); columns_list.push_back(std::move(column.column)); diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 5137318c6e1..8f088a3f84a 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -15,6 +15,7 @@ #include + namespace DB { diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index aefa1fe68e1..b7e7ac2ac33 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -24,13 +24,16 @@ #include #include #include +#include #include #include #include #include #include + #include #include +#include #if !defined(ARCADIA_BUILD) # include @@ -107,6 +110,45 @@ namespace ErrorCodes extern const int HTTP_LENGTH_REQUIRED; } +namespace +{ +bool tryAddHeadersFromConfig(HTTPServerResponse & response, const Poco::Util::LayeredConfiguration & config) +{ + if (config.has("http_options_response")) + { + Strings config_keys; + config.keys("http_options_response", config_keys); + for (const std::string & config_key : config_keys) + { + if (config_key == "header" || config_key.starts_with("header[")) + { + /// If there is empty header name, it will not be processed and message about it will be in logs + if (config.getString("http_options_response." + config_key + ".name", "").empty()) + LOG_WARNING(&Poco::Logger::get("processOptionsRequest"), "Empty header was found in config. It will not be processed."); + else + response.add(config.getString("http_options_response." + config_key + ".name", ""), + config.getString("http_options_response." + config_key + ".value", "")); + + } + } + return true; + } + return false; +} + +/// Process options request. Useful for CORS. +void processOptionsRequest(HTTPServerResponse & response, const Poco::Util::LayeredConfiguration & config) +{ + /// If can add some headers from config + if (tryAddHeadersFromConfig(response, config)) + { + response.setKeepAlive(false); + response.setStatusAndReason(HTTPResponse::HTTP_NO_CONTENT); + response.send(); + } +} +} + static String base64Decode(const String & encoded) { String decoded; @@ -703,9 +745,16 @@ void HTTPHandler::processQuery( if (in_post_compressed && settings.http_native_compression_disable_checksumming_on_decompress) static_cast(*in_post_maybe_compressed).disableChecksumming(); - /// Add CORS header if 'add_http_cors_header' setting is turned on and the client passed - /// Origin header. - used_output.out->addHeaderCORS(settings.add_http_cors_header && !request.get("Origin", "").empty()); + /// Add CORS header if 'add_http_cors_header' setting is turned on send * in Access-Control-Allow-Origin, + /// or if config has http_options_response, which means that there + /// are some headers to be sent, and the client passed Origin header. + if (!request.get("Origin", "").empty()) + { + if (config.has("http_options_response")) + tryAddHeadersFromConfig(response, config); + else if (settings.add_http_cors_header) + used_output.out->addHeaderCORS(true); + } auto append_callback = [context = context] (ProgressCallback callback) { @@ -854,6 +903,11 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse try { + if (request.getMethod() == HTTPServerRequest::HTTP_OPTIONS) + { + processOptionsRequest(response, server.config()); + return; + } response.setContentType("text/plain; charset=UTF-8"); response.set("X-ClickHouse-Server-Display-Name", server_display_name); /// For keep-alive to work. diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp index 62225e7e794..526b86a5c28 100644 --- a/src/Server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -123,7 +123,7 @@ static inline HTTPRequestHandlerFactoryPtr createInterserverHTTPHandlerFactory(I addCommonDefaultHandlersFactory(*factory, server); auto main_handler = std::make_shared>(server); - main_handler->allowPostGetOrHeadRequest(); + main_handler->allowPostAndGetParamsAndOptionsRequest(); factory->addHandler(main_handler); return factory; @@ -180,7 +180,7 @@ void addDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer addCommonDefaultHandlersFactory(factory, server); auto query_handler = std::make_shared>(server, "query"); - query_handler->allowPostGetOrHeadRequest(); + query_handler->allowPostAndGetParamsAndOptionsRequest(); factory.addHandler(query_handler); /// We check that prometheus handler will be served on current (default) port. diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h index ed5431113ef..ef16806dfdc 100644 --- a/src/Server/HTTPHandlerFactory.h +++ b/src/Server/HTTPHandlerFactory.h @@ -103,14 +103,15 @@ public: }); } - /// Handle Post request or (Get or Head) requests with params - void allowPostGetOrHeadRequest() + /// Handle Post request or (Get or Head) with params or OPTIONS requests + void allowPostAndGetParamsAndOptionsRequest() { addFilter([](const auto & request) { return (request.getURI().find('?') != std::string::npos && (request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET || request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD)) + || request.getMethod() == Poco::Net::HTTPRequest::HTTP_OPTIONS || request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST; }); } diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 5f981b2ab1a..626f43a1ad2 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -589,7 +589,6 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( block.getNamesAndTypesList(), {}, CompressionCodecFactory::instance().get("NONE", {})); - part_out.writePrefix(); part_out.write(block); part_out.writeSuffixAndFinalizePart(new_projection_part); new_projection_part->checksums.checkEqual(checksums, /* have_uncompressed = */ true); @@ -612,7 +611,6 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( MergedBlockOutputStream part_out( new_data_part, metadata_snapshot, block.getNamesAndTypesList(), {}, CompressionCodecFactory::instance().get("NONE", {})); - part_out.writePrefix(); part_out.write(block); part_out.writeSuffixAndFinalizePart(new_data_part); new_data_part->checksums.checkEqual(checksums, /* have_uncompressed = */ true); diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.h b/src/Storages/MergeTree/IMergedBlockOutputStream.h index 0e689b7c84c..36fbe76cca2 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.h +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.h @@ -2,22 +2,25 @@ #include #include -#include #include #include namespace DB { -class IMergedBlockOutputStream : public IBlockOutputStream +class IMergedBlockOutputStream { public: IMergedBlockOutputStream( const MergeTreeDataPartPtr & data_part, const StorageMetadataPtr & metadata_snapshot_); + virtual ~IMergedBlockOutputStream() = default; + using WrittenOffsetColumns = std::set; + virtual void write(const Block & block) = 0; + const MergeTreeIndexGranularity & getIndexGranularity() const { return writer->getIndexGranularity(); @@ -35,7 +38,6 @@ protected: NamesAndTypesList & columns, MergeTreeData::DataPart::Checksums & checksums); -protected: const MergeTreeData & storage; StorageMetadataPtr metadata_snapshot; diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp index cd2668988a8..07aabf64dfd 100644 --- a/src/Storages/MergeTree/MergeList.cpp +++ b/src/Storages/MergeTree/MergeList.cpp @@ -82,7 +82,6 @@ MergeListElement::MergeListElement( memory_tracker.setDescription("Mutate/Merge"); memory_tracker.setProfilerStep(memory_profiler_step); - memory_tracker.setOrRaiseProfilerLimit(memory_profiler_step); memory_tracker.setSampleProbability(memory_profiler_sample_probability); } diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 5ed148c48e1..0810d45a805 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -11,6 +11,7 @@ #include "Storages/MergeTree/MergeTreeSequentialSource.h" #include "Storages/MergeTree/FutureMergedMutatedPart.h" #include "Processors/Transforms/ExpressionTransform.h" +#include "Processors/Transforms/MaterializingTransform.h" #include "Processors/Merges/MergingSortedTransform.h" #include "Processors/Merges/CollapsingSortedTransform.h" #include "Processors/Merges/SummingSortedTransform.h" @@ -236,11 +237,6 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() ctx->compression_codec, ctx->blocks_are_granules_size); - global_ctx->merged_stream->readPrefix(); - - /// TODO: const - const_cast(*global_ctx->to).writePrefix(); - global_ctx->rows_written = 0; ctx->initial_reservation = global_ctx->space_reservation ? global_ctx->space_reservation->getSize() : 0; @@ -301,14 +297,17 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::execute() bool MergeTask::ExecuteAndFinalizeHorizontalPart::executeImpl() { Block block; - if (!ctx->is_cancelled() && (block = global_ctx->merged_stream->read())) + if (!ctx->is_cancelled() && (global_ctx->merging_executor->pull(block))) { global_ctx->rows_written += block.rows(); const_cast(*global_ctx->to).write(block); - global_ctx->merge_list_element_ptr->rows_written = global_ctx->merged_stream->getProfileInfo().rows; - global_ctx->merge_list_element_ptr->bytes_written_uncompressed = global_ctx->merged_stream->getProfileInfo().bytes; + UInt64 result_rows = 0; + UInt64 result_bytes = 0; + global_ctx->merged_pipeline.tryGetResultRowsAndBytes(result_rows, result_bytes); + global_ctx->merge_list_element_ptr->rows_written = result_rows; + global_ctx->merge_list_element_ptr->bytes_written_uncompressed = result_bytes; /// Reservation updates is not performed yet, during the merge it may lead to higher free space requirements if (global_ctx->space_reservation && ctx->sum_input_rows_upper_bound) @@ -326,8 +325,8 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::executeImpl() return true; } - global_ctx->merged_stream->readSuffix(); - global_ctx->merged_stream.reset(); + global_ctx->merging_executor.reset(); + global_ctx->merged_pipeline.reset(); if (global_ctx->merges_blocker->isCancelled()) throw Exception("Cancelled merging parts", ErrorCodes::ABORTED); @@ -353,8 +352,6 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const global_ctx->merge_list_element_ptr->columns_written = global_ctx->merging_column_names.size(); global_ctx->merge_list_element_ptr->progress.store(ctx->column_sizes->keyColumnsWeight(), std::memory_order_relaxed); - ctx->column_part_streams = BlockInputStreams(global_ctx->future_part->parts.size()); - ctx->rows_sources_write_buf->next(); ctx->rows_sources_uncompressed_write_buf->next(); /// Ensure data has written to disk. @@ -389,6 +386,7 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const global_ctx->column_progress = std::make_unique(ctx->progress_before, ctx->column_sizes->columnWeight(column_name)); + Pipes pipes; for (size_t part_num = 0; part_num < global_ctx->future_part->parts.size(); ++part_num) { auto column_part_source = std::make_shared( @@ -398,20 +396,22 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const column_part_source->setProgressCallback( MergeProgressCallback(global_ctx->merge_list_element_ptr, global_ctx->watch_prev_elapsed, *global_ctx->column_progress)); - QueryPipeline column_part_pipeline(Pipe(std::move(column_part_source))); - column_part_pipeline.setNumThreads(1); - - ctx->column_part_streams[part_num] = - std::make_shared(std::move(column_part_pipeline)); + pipes.emplace_back(std::move(column_part_source)); } + auto pipe = Pipe::unitePipes(std::move(pipes)); + ctx->rows_sources_read_buf->seek(0, 0); - ctx->column_gathered_stream = std::make_unique(column_name, ctx->column_part_streams, *ctx->rows_sources_read_buf); + auto transform = std::make_unique(pipe.getHeader(), pipe.numOutputPorts(), *ctx->rows_sources_read_buf); + pipe.addTransform(std::move(transform)); + + ctx->column_parts_pipeline = QueryPipeline(std::move(pipe)); + ctx->executor = std::make_unique(ctx->column_parts_pipeline); ctx->column_to = std::make_unique( global_ctx->new_data_part, global_ctx->metadata_snapshot, - ctx->column_gathered_stream->getHeader(), + ctx->executor->getHeader(), ctx->compression_codec, /// we don't need to recalc indices here /// because all of them were already recalculated and written @@ -421,15 +421,13 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const global_ctx->to->getIndexGranularity()); ctx->column_elems_written = 0; - - ctx->column_to->writePrefix(); } bool MergeTask::VerticalMergeStage::executeVerticalMergeForOneColumn() const { Block block; - if (!global_ctx->merges_blocker->isCancelled() && (block = ctx->column_gathered_stream->read())) + if (!global_ctx->merges_blocker->isCancelled() && ctx->executor->pull(block)) { ctx->column_elems_written += block.rows(); ctx->column_to->write(block); @@ -447,7 +445,7 @@ void MergeTask::VerticalMergeStage::finalizeVerticalMergeForOneColumn() const if (global_ctx->merges_blocker->isCancelled()) throw Exception("Cancelled merging parts", ErrorCodes::ABORTED); - ctx->column_gathered_stream->readSuffix(); + ctx->executor.reset(); auto changed_checksums = ctx->column_to->writeSuffixAndGetChecksums(global_ctx->new_data_part, global_ctx->checksums_gathered_columns, ctx->need_sync); global_ctx->checksums_gathered_columns.add(std::move(changed_checksums)); @@ -457,10 +455,14 @@ void MergeTask::VerticalMergeStage::finalizeVerticalMergeForOneColumn() const ", but " + toString(global_ctx->rows_written) + " rows of PK columns", ErrorCodes::LOGICAL_ERROR); } + UInt64 rows = 0; + UInt64 bytes = 0; + ctx->column_parts_pipeline.tryGetResultRowsAndBytes(rows, bytes); + /// NOTE: 'progress' is modified by single thread, but it may be concurrently read from MergeListElement::getInfo() (StorageSystemMerges). global_ctx->merge_list_element_ptr->columns_written += 1; - global_ctx->merge_list_element_ptr->bytes_written_uncompressed += ctx->column_gathered_stream->getProfileInfo().bytes; + global_ctx->merge_list_element_ptr->bytes_written_uncompressed += bytes; global_ctx->merge_list_element_ptr->progress.store(ctx->progress_before + ctx->column_sizes->columnWeight(column_name), std::memory_order_relaxed); /// This is the external cycle increment. @@ -799,26 +801,25 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() auto res_pipe = Pipe::unitePipes(std::move(pipes)); res_pipe.addTransform(std::move(merged_transform)); - QueryPipeline pipeline(std::move(res_pipe)); - pipeline.setNumThreads(1); - - global_ctx->merged_stream = std::make_shared(std::move(pipeline)); if (global_ctx->deduplicate) - global_ctx->merged_stream = std::make_shared( - global_ctx->merged_stream, sort_description, SizeLimits(), 0 /*limit_hint*/, global_ctx->deduplicate_by_columns); + res_pipe.addTransform(std::make_shared( + res_pipe.getHeader(), sort_description, SizeLimits(), 0 /*limit_hint*/, global_ctx->deduplicate_by_columns)); if (ctx->need_remove_expired_values) - global_ctx->merged_stream = std::make_shared( - global_ctx->merged_stream, *global_ctx->data, global_ctx->metadata_snapshot, global_ctx->new_data_part, global_ctx->time_of_merge, ctx->force_ttl); + res_pipe.addTransform(std::make_shared( + res_pipe.getHeader(), *global_ctx->data, global_ctx->metadata_snapshot, global_ctx->new_data_part, global_ctx->time_of_merge, ctx->force_ttl)); if (global_ctx->metadata_snapshot->hasSecondaryIndices()) { const auto & indices = global_ctx->metadata_snapshot->getSecondaryIndices(); - global_ctx->merged_stream = std::make_shared( - global_ctx->merged_stream, indices.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), global_ctx->data->getContext())); - global_ctx->merged_stream = std::make_shared(global_ctx->merged_stream); + res_pipe.addTransform(std::make_shared( + res_pipe.getHeader(), indices.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), global_ctx->data->getContext()))); + res_pipe.addTransform(std::make_shared(res_pipe.getHeader())); } + + global_ctx->merged_pipeline = QueryPipeline(std::move(res_pipe)); + global_ctx->merging_executor = std::make_unique(global_ctx->merged_pipeline); } diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 54b0255fd5c..aceca912cea 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -147,7 +148,8 @@ private: std::unique_ptr column_progress{nullptr}; std::shared_ptr to{nullptr}; - BlockInputStreamPtr merged_stream{nullptr}; + QueryPipeline merged_pipeline; + std::unique_ptr merging_executor; SyncGuardPtr sync_guard{nullptr}; MergeTreeData::MutableDataPartPtr new_data_part{nullptr}; @@ -263,8 +265,8 @@ private: Float64 progress_before = 0; std::unique_ptr column_to{nullptr}; size_t column_elems_written{0}; - BlockInputStreams column_part_streams; - std::unique_ptr column_gathered_stream; + QueryPipeline column_parts_pipeline; + std::unique_ptr executor; std::unique_ptr rows_sources_read_buf{nullptr}; }; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 9885a10cd62..b35a41d5d19 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -328,7 +328,11 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti MergeTreeData::DataPartsVector parts = selectAllPartsFromPartition(partition_id); if (parts.empty()) + { + if (out_disable_reason) + *out_disable_reason = "There are no parts inside partition"; return SelectPartsDecision::CANNOT_SELECT; + } if (!final && parts.size() == 1) { @@ -342,6 +346,8 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti if (final && optimize_skip_merged_partitions && parts.size() == 1 && parts[0]->info.level > 0 && (!metadata_snapshot->hasAnyTTL() || parts[0]->checkAllTTLCalculated(metadata_snapshot))) { + if (out_disable_reason) + *out_disable_reason = "Partition skipped due to optimize_skip_merged_partitions"; return SelectPartsDecision::NOTHING_TO_MERGE; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index f80c2a67cc6..3a1ea474d74 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -92,7 +92,6 @@ void MergeTreeDataPartInMemory::flushToDisk(const String & base_path, const Stri auto compression_codec = storage.getContext()->chooseCompressionCodec(0, 0); auto indices = MergeTreeIndexFactory::instance().getMany(metadata_snapshot->getSecondaryIndices()); MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, indices, compression_codec); - out.writePrefix(); out.write(block); const auto & projections = metadata_snapshot->getProjections(); for (const auto & [projection_name, projection] : projection_parts) @@ -123,7 +122,6 @@ void MergeTreeDataPartInMemory::flushToDisk(const String & base_path, const Stri auto projection_indices = MergeTreeIndexFactory::instance().getMany(desc.metadata->getSecondaryIndices()); MergedBlockOutputStream projection_out( projection_data_part, desc.metadata, projection_part->columns, projection_indices, projection_compression_codec); - projection_out.writePrefix(); projection_out.write(projection_part->block); projection_out.writeSuffixAndFinalizePart(projection_data_part); new_data_part->addProjectionPart(projection_name, std::move(projection_data_part)); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 95ddf105b79..d939312c0bb 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -412,7 +412,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart( MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec); bool sync_on_insert = data.getSettings()->fsync_after_insert; - out.writePrefix(); out.writeWithPermutation(block, perm_ptr); for (const auto & projection : metadata_snapshot->getProjections()) @@ -508,7 +507,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeProjectionPartImpl( {}, compression_codec); - out.writePrefix(); out.writeWithPermutation(block, perm_ptr); out.writeSuffixAndFinalizePart(new_data_part); diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 024b87c9a3e..60b9ddae329 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -461,7 +461,7 @@ bool MergeTreeIndexConditionSet::checkASTUseless(const ASTPtr & node, bool atomi [this](const auto & arg) { return checkASTUseless(arg, true); }); } else if (const auto * literal = node->as()) - return !atomic && literal->value.get(); + return !atomic && literal->value.safeGet(); else if (const auto * identifier = node->as()) return key_columns.find(identifier->getColumnName()) == std::end(key_columns); else diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index 3e8aa6af536..2c1d785236c 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -202,7 +202,6 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor if (metadata_snapshot->hasSortingKey()) metadata_snapshot->getSortingKey().expression->execute(block); - part_out.writePrefix(); part_out.write(block); for (const auto & projection : metadata_snapshot->getProjections()) diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 31675789257..5206f77290b 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -8,7 +8,6 @@ namespace DB namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; } @@ -51,11 +50,6 @@ void MergedBlockOutputStream::writeWithPermutation(const Block & block, const IC writeImpl(block, permutation); } -void MergedBlockOutputStream::writeSuffix() -{ - throw Exception("Method writeSuffix is not supported by MergedBlockOutputStream", ErrorCodes::NOT_IMPLEMENTED); -} - void MergedBlockOutputStream::writeSuffixAndFinalizePart( MergeTreeData::MutableDataPartPtr & new_part, bool sync, diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index 95cc91a8ebc..5965331ee81 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -21,7 +21,7 @@ public: CompressionCodecPtr default_codec_, bool blocks_are_granules_size = false); - Block getHeader() const override { return metadata_snapshot->getSampleBlock(); } + Block getHeader() const { return metadata_snapshot->getSampleBlock(); } /// If the data is pre-sorted. void write(const Block & block) override; @@ -31,8 +31,6 @@ public: */ void writeWithPermutation(const Block & block, const IColumn::Permutation * permutation); - void writeSuffix() override; - /// Finalize writing part and fill inner structures /// If part is new and contains projections, they should be added before invoking this method. void writeSuffixAndFinalizePart( @@ -53,7 +51,6 @@ private: MergeTreeData::DataPart::Checksums & checksums, bool sync); -private: NamesAndTypesList columns_list; IMergeTreeDataPart::MinMaxIndex minmax_idx; size_t rows_count = 0; diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 3638212b320..4b760103750 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -53,11 +53,6 @@ void MergedColumnOnlyOutputStream::write(const Block & block) writer->write(block, nullptr); } -void MergedColumnOnlyOutputStream::writeSuffix() -{ - throw Exception("Method writeSuffix is not supported by MergedColumnOnlyOutputStream", ErrorCodes::NOT_IMPLEMENTED); -} - MergeTreeData::DataPart::Checksums MergedColumnOnlyOutputStream::writeSuffixAndGetChecksums( MergeTreeData::MutableDataPartPtr & new_part, diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h index c82357dfb1d..4b75bc52f72 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h @@ -23,9 +23,8 @@ public: const MergeTreeIndexGranularity & index_granularity = {}, const MergeTreeIndexGranularityInfo * index_granularity_info_ = nullptr); - Block getHeader() const override { return header; } + Block getHeader() const { return header; } void write(const Block & block) override; - void writeSuffix() override; MergeTreeData::DataPart::Checksums writeSuffixAndGetChecksums(MergeTreeData::MutableDataPartPtr & new_part, MergeTreeData::DataPart::Checksums & all_checksums, bool sync = false); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index c72d95973d5..e38342e21dd 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -11,6 +11,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -182,7 +185,7 @@ static std::vector getProjectionsForNewDataPart( /// Return set of indices which should be recalculated during mutation also /// wraps input stream into additional expression stream static std::set getIndicesToRecalculate( - BlockInputStreamPtr & input_stream, + QueryPipeline & pipeline, const NameSet & updated_columns, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, @@ -234,9 +237,9 @@ static std::set getIndicesToRecalculate( } } - if (!indices_to_recalc.empty() && input_stream) + if (!indices_to_recalc.empty() && pipeline.initialized()) { - auto indices_recalc_syntax = TreeRewriter(context).analyze(indices_recalc_expr_list, input_stream->getHeader().getNamesAndTypesList()); + auto indices_recalc_syntax = TreeRewriter(context).analyze(indices_recalc_expr_list, pipeline.getHeader().getNamesAndTypesList()); auto indices_recalc_expr = ExpressionAnalyzer( indices_recalc_expr_list, indices_recalc_syntax, context).getActions(false); @@ -246,8 +249,11 @@ static std::set getIndicesToRecalculate( /// MutationsInterpreter which knows about skip indices and stream 'in' already has /// all required columns. /// TODO move this logic to single place. - input_stream = std::make_shared( - std::make_shared(input_stream, indices_recalc_expr)); + QueryPipelineBuilder builder; + builder.init(std::move(pipeline)); + builder.addTransform(std::make_shared(builder.getHeader(), indices_recalc_expr)); + builder.addTransform(std::make_shared(builder.getHeader())); + pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); } return indices_to_recalc; } @@ -500,7 +506,8 @@ struct MutationContext std::unique_ptr num_mutations; - BlockInputStreamPtr mutating_stream{nullptr}; // in + QueryPipeline mutating_pipeline; // in + std::unique_ptr mutating_executor; Block updated_header; std::unique_ptr interpreter; @@ -795,24 +802,25 @@ void PartMergerWriter::prepare() bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { - if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && (block = ctx->mutating_stream->read())) + Block cur_block; + if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block)) { if (ctx->minmax_idx) - ctx->minmax_idx->update(block, ctx->data->getMinMaxColumnsNames(ctx->metadata_snapshot->getPartitionKey())); + ctx->minmax_idx->update(cur_block, ctx->data->getMinMaxColumnsNames(ctx->metadata_snapshot->getPartitionKey())); - ctx->out->write(block); + ctx->out->write(cur_block); for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { const auto & projection = *ctx->projections_to_build[i]; - auto projection_block = projection_squashes[i].add(projection.calculate(block, ctx->context)); + auto projection_block = projection_squashes[i].add(projection.calculate(cur_block, ctx->context)); if (projection_block) projection_parts[projection.name].emplace_back(MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, projection_block, projection, ctx->new_data_part.get(), ++block_num)); } - (*ctx->mutate_entry)->rows_written += block.rows(); - (*ctx->mutate_entry)->bytes_written_uncompressed += block.bytes(); + (*ctx->mutate_entry)->rows_written += cur_block.rows(); + (*ctx->mutate_entry)->bytes_written_uncompressed += cur_block.bytes(); /// Need execute again return true; @@ -937,18 +945,25 @@ private: auto skip_part_indices = MutationHelpers::getIndicesForNewDataPart(ctx->metadata_snapshot->getSecondaryIndices(), ctx->for_file_renames); ctx->projections_to_build = MutationHelpers::getProjectionsForNewDataPart(ctx->metadata_snapshot->getProjections(), ctx->for_file_renames); - if (ctx->mutating_stream == nullptr) + if (!ctx->mutating_pipeline.initialized()) throw Exception("Cannot mutate part columns with uninitialized mutations stream. It's a bug", ErrorCodes::LOGICAL_ERROR); + QueryPipelineBuilder builder; + builder.init(std::move(ctx->mutating_pipeline)); + if (ctx->metadata_snapshot->hasPrimaryKey() || ctx->metadata_snapshot->hasSecondaryIndices()) - ctx->mutating_stream = std::make_shared( - std::make_shared(ctx->mutating_stream, ctx->data->getPrimaryKeyAndSkipIndicesExpression(ctx->metadata_snapshot))); + { + builder.addTransform( + std::make_shared(builder.getHeader(), ctx->data->getPrimaryKeyAndSkipIndicesExpression(ctx->metadata_snapshot))); + + builder.addTransform(std::make_shared(builder.getHeader())); + } if (ctx->execute_ttl_type == ExecuteTTLType::NORMAL) - ctx->mutating_stream = std::make_shared(ctx->mutating_stream, *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true); + builder.addTransform(std::make_shared(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true)); if (ctx->execute_ttl_type == ExecuteTTLType::RECALCULATE) - ctx->mutating_stream = std::make_shared(ctx->mutating_stream, *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true); + builder.addTransform(std::make_shared(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true)); ctx->minmax_idx = std::make_shared(); @@ -959,8 +974,8 @@ private: skip_part_indices, ctx->compression_codec); - ctx->mutating_stream->readPrefix(); - ctx->out->writePrefix(); + ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); + ctx->mutating_executor = std::make_unique(ctx->mutating_pipeline); part_merger_writer_task = std::make_unique(ctx); } @@ -969,7 +984,8 @@ private: void finalize() { ctx->new_data_part->minmax_idx = std::move(ctx->minmax_idx); - ctx->mutating_stream->readSuffix(); + ctx->mutating_executor.reset(); + ctx->mutating_pipeline.reset(); static_pointer_cast(ctx->out)->writeSuffixAndFinalizePart(ctx->new_data_part, ctx->need_sync); } @@ -1088,16 +1104,16 @@ private: ctx->compression_codec = ctx->source_part->default_codec; - if (ctx->mutating_stream) + if (ctx->mutating_pipeline.initialized()) { - if (ctx->mutating_stream == nullptr) - throw Exception("Cannot mutate part columns with uninitialized mutations stream. It's a bug", ErrorCodes::LOGICAL_ERROR); + QueryPipelineBuilder builder; + builder.init(std::move(ctx->mutating_pipeline)); if (ctx->execute_ttl_type == ExecuteTTLType::NORMAL) - ctx->mutating_stream = std::make_shared(ctx->mutating_stream, *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true); + builder.addTransform(std::make_shared(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true)); if (ctx->execute_ttl_type == ExecuteTTLType::RECALCULATE) - ctx->mutating_stream = std::make_shared(ctx->mutating_stream, *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true); + builder.addTransform(std::make_shared(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true)); ctx->out = std::make_shared( ctx->new_data_part, @@ -1110,8 +1126,9 @@ private: &ctx->source_part->index_granularity_info ); - ctx->mutating_stream->readPrefix(); - ctx->out->writePrefix(); + ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); + ctx->mutating_executor = std::make_unique(ctx->mutating_pipeline); + ctx->projections_to_build = std::vector{ctx->projections_to_recalc.begin(), ctx->projections_to_recalc.end()}; part_merger_writer_task = std::make_unique(ctx); @@ -1121,9 +1138,10 @@ private: void finalize() { - if (ctx->mutating_stream) + if (ctx->mutating_executor) { - ctx->mutating_stream->readSuffix(); + ctx->mutating_executor.reset(); + ctx->mutating_pipeline.reset(); auto changed_checksums = static_pointer_cast(ctx->out)->writeSuffixAndGetChecksums( @@ -1269,9 +1287,9 @@ bool MutateTask::prepare() ctx->materialized_indices = ctx->interpreter->grabMaterializedIndices(); ctx->materialized_projections = ctx->interpreter->grabMaterializedProjections(); ctx->mutation_kind = ctx->interpreter->getMutationKind(); - ctx->mutating_stream = ctx->interpreter->execute(); + ctx->mutating_pipeline = ctx->interpreter->execute(); ctx->updated_header = ctx->interpreter->getUpdatedHeader(); - ctx->mutating_stream->setProgressCallback(MergeProgressCallback((*ctx->mutate_entry)->ptr(), ctx->watch_prev_elapsed, *ctx->stage_progress)); + ctx->mutating_pipeline.setProgressCallback(MergeProgressCallback((*ctx->mutate_entry)->ptr(), ctx->watch_prev_elapsed, *ctx->stage_progress)); } ctx->single_disk_volume = std::make_shared("volume_" + ctx->future_part->name, ctx->space_reservation->getDisk(), 0); @@ -1301,7 +1319,7 @@ bool MutateTask::prepare() ctx->need_sync = needSyncPart(ctx->source_part->rows_count, ctx->source_part->getBytesOnDisk(), *data_settings); ctx->execute_ttl_type = ExecuteTTLType::NONE; - if (ctx->mutating_stream) + if (ctx->mutating_pipeline.initialized()) ctx->execute_ttl_type = MergeTreeDataMergerMutator::shouldExecuteTTL(ctx->metadata_snapshot, ctx->interpreter->getColumnDependencies()); @@ -1320,7 +1338,7 @@ bool MutateTask::prepare() ctx->updated_columns.emplace(name_type.name); ctx->indices_to_recalc = MutationHelpers::getIndicesToRecalculate( - ctx->mutating_stream, ctx->updated_columns, ctx->metadata_snapshot, ctx->context, ctx->materialized_indices, ctx->source_part); + ctx->mutating_pipeline, ctx->updated_columns, ctx->metadata_snapshot, ctx->context, ctx->materialized_indices, ctx->source_part); ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate( ctx->updated_columns, ctx->metadata_snapshot, ctx->materialized_projections, ctx->source_part); diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index e45183591f2..2acdba18c2d 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include /// toLower @@ -114,17 +115,16 @@ void StorageJoin::mutate(const MutationCommands & commands, ContextPtr context) { auto storage_ptr = DatabaseCatalog::instance().getTable(getStorageID(), context); auto interpreter = std::make_unique(storage_ptr, metadata_snapshot, commands, context, true); - auto in = interpreter->execute(); - in->readPrefix(); + auto pipeline = interpreter->execute(); + PullingPipelineExecutor executor(pipeline); - while (const Block & block = in->read()) + Block block; + while (executor.pull(block)) { new_data->addJoinedBlock(block, true); if (persistent) backup_stream.write(block); } - - in->readSuffix(); } /// Now acquire exclusive lock and modify storage. diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 3fe6083ab13..299e39a3836 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB @@ -263,11 +264,12 @@ void StorageMemory::mutate(const MutationCommands & commands, ContextPtr context new_context->setSetting("max_threads", 1); auto interpreter = std::make_unique(storage_ptr, metadata_snapshot, commands, new_context, true); - auto in = interpreter->execute(); + auto pipeline = interpreter->execute(); + PullingPipelineExecutor executor(pipeline); - in->readPrefix(); Blocks out; - while (Block block = in->read()) + Block block; + while (executor.pull(block)) { if (compress) for (auto & elem : block) @@ -275,7 +277,6 @@ void StorageMemory::mutate(const MutationCommands & commands, ContextPtr context out.push_back(block); } - in->readSuffix(); std::unique_ptr new_data; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e5d59f2a950..50c7fe0610d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4203,143 +4203,123 @@ bool StorageReplicatedMergeTree::optimize( if (!is_leader) throw Exception("OPTIMIZE cannot be done on this replica because it is not a leader", ErrorCodes::NOT_A_LEADER); - constexpr size_t max_retries = 10; - - std::vector merge_entries; + auto handle_noop = [&] (const String & message) { - auto zookeeper = getZooKeeper(); + if (query_context->getSettingsRef().optimize_throw_if_noop) + throw Exception(message, ErrorCodes::CANNOT_ASSIGN_OPTIMIZE); + return false; + }; - auto handle_noop = [&] (const String & message) + auto zookeeper = getZooKeeper(); + UInt64 disk_space = getStoragePolicy()->getMaxUnreservedFreeSpace(); + const auto storage_settings_ptr = getSettings(); + auto metadata_snapshot = getInMemoryMetadataPtr(); + std::vector merge_entries; + + auto try_assign_merge = [&](const String & partition_id) -> bool + { + constexpr size_t max_retries = 10; + size_t try_no = 0; + for (; try_no < max_retries; ++try_no) { - if (query_context->getSettingsRef().optimize_throw_if_noop) - throw Exception(message, ErrorCodes::CANNOT_ASSIGN_OPTIMIZE); - return false; - }; + /// We must select parts for merge under merge_selecting_mutex because other threads + /// (merge_selecting_thread or OPTIMIZE queries) could assign new merges. + std::lock_guard merge_selecting_lock(merge_selecting_mutex); + ReplicatedMergeTreeMergePredicate can_merge = queue.getMergePredicate(zookeeper); - const auto storage_settings_ptr = getSettings(); - auto metadata_snapshot = getInMemoryMetadataPtr(); + auto future_merged_part = std::make_shared(); + if (storage_settings.get()->assign_part_uuids) + future_merged_part->uuid = UUIDHelpers::generateV4(); - if (!partition && final) - { - DataPartsVector data_parts = getDataPartsVector(); - std::unordered_set partition_ids; + constexpr const char * unknown_disable_reason = "unknown reason"; + String disable_reason = unknown_disable_reason; + SelectPartsDecision select_decision = SelectPartsDecision::CANNOT_SELECT; - for (const DataPartPtr & part : data_parts) - partition_ids.emplace(part->info.partition_id); - - UInt64 disk_space = getStoragePolicy()->getMaxUnreservedFreeSpace(); - - for (const String & partition_id : partition_ids) + if (partition_id.empty()) { - size_t try_no = 0; - for (; try_no < max_retries; ++try_no) - { - /// We must select parts for merge under merge_selecting_mutex because other threads - /// (merge_selecting_thread or OPTIMIZE queries) could assign new merges. - std::lock_guard merge_selecting_lock(merge_selecting_mutex); - ReplicatedMergeTreeMergePredicate can_merge = queue.getMergePredicate(zookeeper); - - auto future_merged_part = std::make_shared(); - - if (storage_settings.get()->assign_part_uuids) - future_merged_part->uuid = UUIDHelpers::generateV4(); - - SelectPartsDecision select_decision = merger_mutator.selectAllPartsToMergeWithinPartition( - future_merged_part, disk_space, can_merge, partition_id, true, metadata_snapshot, nullptr, query_context->getSettingsRef().optimize_skip_merged_partitions); - - if (select_decision != SelectPartsDecision::SELECTED) - break; - - ReplicatedMergeTreeLogEntryData merge_entry; - CreateMergeEntryResult create_result = createLogEntryToMergeParts( - zookeeper, future_merged_part->parts, - future_merged_part->name, future_merged_part->uuid, future_merged_part->type, - deduplicate, deduplicate_by_columns, - &merge_entry, can_merge.getVersion(), future_merged_part->merge_type); - - if (create_result == CreateMergeEntryResult::MissingPart) - return handle_noop("Can't create merge queue node in ZooKeeper, because some parts are missing"); - - if (create_result == CreateMergeEntryResult::LogUpdated) - continue; - - merge_entries.push_back(std::move(merge_entry)); - break; - } - if (try_no == max_retries) - return handle_noop("Can't create merge queue node in ZooKeeper, because log was updated in every of " - + toString(max_retries) + " tries"); + select_decision = merger_mutator.selectPartsToMerge( + future_merged_part, /* aggressive */ true, storage_settings_ptr->max_bytes_to_merge_at_max_space_in_pool, + can_merge, /* merge_with_ttl_allowed */ false, &disable_reason); } - } - else - { - size_t try_no = 0; - for (; try_no < max_retries; ++try_no) + else { - std::lock_guard merge_selecting_lock(merge_selecting_mutex); - ReplicatedMergeTreeMergePredicate can_merge = queue.getMergePredicate(zookeeper); + select_decision = merger_mutator.selectAllPartsToMergeWithinPartition( + future_merged_part, disk_space, can_merge, partition_id, final, metadata_snapshot, + &disable_reason, query_context->getSettingsRef().optimize_skip_merged_partitions); + } - auto future_merged_part = std::make_shared(); - if (storage_settings.get()->assign_part_uuids) - future_merged_part->uuid = UUIDHelpers::generateV4(); + /// If there is nothing to merge then we treat this merge as successful (needed for optimize final optimization) + if (select_decision == SelectPartsDecision::NOTHING_TO_MERGE) + return false; - String disable_reason; - SelectPartsDecision select_decision = SelectPartsDecision::CANNOT_SELECT; + if (select_decision != SelectPartsDecision::SELECTED) + { + constexpr const char * message_fmt = "Cannot select parts for optimization: {}"; + assert(disable_reason != unknown_disable_reason); + if (!partition_id.empty()) + disable_reason += fmt::format(" (in partition {})", partition_id); + String message = fmt::format(message_fmt, disable_reason); + LOG_INFO(log, message); + return handle_noop(message); + } - if (!partition) - { - select_decision = merger_mutator.selectPartsToMerge( - future_merged_part, true, storage_settings_ptr->max_bytes_to_merge_at_max_space_in_pool, can_merge, false, &disable_reason); - } - else - { - UInt64 disk_space = getStoragePolicy()->getMaxUnreservedFreeSpace(); - String partition_id = getPartitionIDFromQuery(partition, query_context); - select_decision = merger_mutator.selectAllPartsToMergeWithinPartition( - future_merged_part, disk_space, can_merge, partition_id, final, metadata_snapshot, &disable_reason, query_context->getSettingsRef().optimize_skip_merged_partitions); - } + ReplicatedMergeTreeLogEntryData merge_entry; + CreateMergeEntryResult create_result = createLogEntryToMergeParts( + zookeeper, future_merged_part->parts, + future_merged_part->name, future_merged_part->uuid, future_merged_part->type, + deduplicate, deduplicate_by_columns, + &merge_entry, can_merge.getVersion(), future_merged_part->merge_type); - /// If there is nothing to merge then we treat this merge as successful (needed for optimize final optimization) - if (select_decision == SelectPartsDecision::NOTHING_TO_MERGE) - break; + if (create_result == CreateMergeEntryResult::MissingPart) + { + String message = "Can't create merge queue node in ZooKeeper, because some parts are missing"; + LOG_TRACE(log, message); + return handle_noop(message); + } - if (select_decision != SelectPartsDecision::SELECTED) - { - constexpr const char * message_fmt = "Cannot select parts for optimization: {}"; - if (disable_reason.empty()) - disable_reason = "unknown reason"; - LOG_INFO(log, message_fmt, disable_reason); - return handle_noop(fmt::format(message_fmt, disable_reason)); - } + if (create_result == CreateMergeEntryResult::LogUpdated) + continue; - ReplicatedMergeTreeLogEntryData merge_entry; - CreateMergeEntryResult create_result = createLogEntryToMergeParts( - zookeeper, future_merged_part->parts, - future_merged_part->name, future_merged_part->uuid, future_merged_part->type, - deduplicate, deduplicate_by_columns, - &merge_entry, can_merge.getVersion(), future_merged_part->merge_type); + merge_entries.push_back(std::move(merge_entry)); + return true; + } - if (create_result == CreateMergeEntryResult::MissingPart) - return handle_noop("Can't create merge queue node in ZooKeeper, because some parts are missing"); + assert(try_no == max_retries); + String message = fmt::format("Can't create merge queue node in ZooKeeper, because log was updated in every of {} tries", try_no); + LOG_TRACE(log, message); + return handle_noop(message); + }; - if (create_result == CreateMergeEntryResult::LogUpdated) - continue; + bool assigned = false; + if (!partition && final) + { + DataPartsVector data_parts = getDataPartsVector(); + std::unordered_set partition_ids; - merge_entries.push_back(std::move(merge_entry)); + for (const DataPartPtr & part : data_parts) + partition_ids.emplace(part->info.partition_id); + + for (const String & partition_id : partition_ids) + { + assigned = try_assign_merge(partition_id); + if (!assigned) break; - } - if (try_no == max_retries) - return handle_noop("Can't create merge queue node in ZooKeeper, because log was updated in every of " - + toString(max_retries) + " tries"); } } + else + { + String partition_id; + if (partition) + partition_id = getPartitionIDFromQuery(partition, query_context); + assigned = try_assign_merge(partition_id); + } table_lock.reset(); for (auto & merge_entry : merge_entries) waitForLogEntryToBeProcessedIfNecessary(merge_entry, query_context); - return true; + return assigned; } bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMergeTree::LogEntry & entry) @@ -7159,7 +7139,6 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec); bool sync_on_insert = settings->fsync_after_insert; - out.writePrefix(); out.write(block); /// TODO(ab): What projections should we add to the empty part? How can we make sure that it /// won't block future merges? Perhaps we should also check part emptiness when selecting parts diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index a781f44e9b2..fb7a1bc59b8 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -21,6 +21,13 @@ NamesAndTypesList StorageSystemDatabases::getNamesAndTypes() }; } +NamesAndAliases StorageSystemDatabases::getNamesAndAliases() +{ + return { + {"database", std::make_shared(), "name"} + }; +} + void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { const auto access = context->getAccess(); diff --git a/src/Storages/System/StorageSystemDatabases.h b/src/Storages/System/StorageSystemDatabases.h index 4aaae1bfd7f..3de0da126d4 100644 --- a/src/Storages/System/StorageSystemDatabases.h +++ b/src/Storages/System/StorageSystemDatabases.h @@ -23,6 +23,8 @@ public: static NamesAndTypesList getNamesAndTypes(); + static NamesAndAliases getNamesAndAliases(); + protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index f1f7fa4fa08..254e6f77e0c 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -59,6 +59,8 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_) {"lifetime_bytes", std::make_shared(std::make_shared())}, {"comment", std::make_shared()}, {"has_own_data", std::make_shared()}, + }, { + {"table", std::make_shared(), "name"} })); setInMemoryMetadata(storage_metadata); } diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 19080f3934f..f10e38b87e5 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1,6 +1,8 @@ #!/usr/bin/env python3 # pylint: disable=too-many-return-statements +# pylint: disable=global-variable-not-assigned + import enum import shutil import sys @@ -13,13 +15,10 @@ import traceback import math from argparse import ArgumentParser -from typing import Tuple, Union, Optional, TextIO, Dict, Set, List -import shlex +from typing import Tuple, Union, Optional, Dict, Set, List import subprocess from subprocess import Popen from subprocess import PIPE -from subprocess import CalledProcessError -from subprocess import TimeoutExpired from datetime import datetime from time import time, sleep from errno import ESRCH @@ -35,6 +34,9 @@ import multiprocessing import socket from contextlib import closing +import clickhouse_driver +import pandas + USE_JINJA = True try: import jinja2 @@ -42,20 +44,59 @@ except ImportError: USE_JINJA = False print('WARNING: jinja2 not installed! Template tests will be skipped.') -DISTRIBUTED_DDL_TIMEOUT_MSG = "is executing longer than distributed_ddl_task_timeout" - MESSAGES_TO_RETRY = [ "ConnectionPoolWithFailover: Connection failed at try", "DB::Exception: New table appeared in database being dropped or detached. Try again", "is already started to be removing by another replica right now", "DB::Exception: Cannot enqueue query", - DISTRIBUTED_DDL_TIMEOUT_MSG # FIXME + "is executing longer than distributed_ddl_task_timeout" # FIXME +] +error_codes = clickhouse_driver.errors.ErrorCodes +error_codes.NOT_A_LEADER = 529 +ERROR_CODES_TO_RETRY = [ + error_codes.ALL_CONNECTION_TRIES_FAILED, + error_codes.DATABASE_NOT_EMPTY, + error_codes.NOT_A_LEADER, + error_codes.UNFINISHED, ] MAX_RETRIES = 3 TEST_FILE_EXTENSIONS = ['.sql', '.sql.j2', '.sh', '.py', '.expect'] +class Client(clickhouse_driver.Client): + # return first column of the first row + def execute_one(self, *args, **kwargs): + return super().execute(*args, **kwargs)[0][0] + + # return pandas.DataFrame + def execute_pandas(self, *args, **kwargs): + data = super().execute(*args, **kwargs, with_column_types=True) + return Client.__combine(data) + + @staticmethod + def __combine(data): + cols = data[1] + rows = data[0] + header = [ i[0] for i in cols ] + data = pandas.DataFrame(data=rows, columns=header) + return data + +# Helpers +def make_clickhouse_client(base_args): + return Client(host=base_args.tcp_host, port=base_args.tcp_port, + # hung check in stress tests may remove the database, + # hence we should use 'system'. + database='system', + settings=get_additional_client_options_dict(base_args)) +def clickhouse_execute_one(base_args, *args, **kwargs): + return make_clickhouse_client(base_args).execute_one(*args, **kwargs) +def clickhouse_execute(base_args, *args, **kwargs): + return make_clickhouse_client(base_args).execute(*args, **kwargs) +def clickhouse_execute_pandas(base_args, *args, **kwargs): + return make_clickhouse_client(base_args).execute_pandas(*args, **kwargs) + + class Terminated(KeyboardInterrupt): pass @@ -98,17 +139,13 @@ def get_db_engine(args, database_name): def get_zookeeper_session_uptime(args): try: - query = b"SELECT zookeeperSessionUptime()" - if args.replicated_database: - query = b"SELECT min(materialize(zookeeperSessionUptime())) " \ - b"FROM clusterAllReplicas('test_cluster_database_replicated', system.one) " - - clickhouse_proc = open_client_process(args.client) - - (stdout, _) = clickhouse_proc.communicate((query), timeout=20) - - return int(stdout.decode('utf-8').strip()) + return int(clickhouse_execute_one(args, """ + SELECT min(materialize(zookeeperSessionUptime())) + FROM clusterAllReplicas('test_cluster_database_replicated', system.one) + """)) + else: + return int(clickhouse_execute_one(args, 'SELECT zookeeperSessionUptime()')) except: return None @@ -122,24 +159,30 @@ def need_retry(args, stdout, stderr, total_time): return True return any(msg in stdout for msg in MESSAGES_TO_RETRY) or any(msg in stderr for msg in MESSAGES_TO_RETRY) +def need_retry_error(args, error, total_time): + # Sometimes we may get unexpected exception like "Replica is readonly" or "Shutdown is called for table" + # instead of "Session expired" or "Connection loss" + # Retry if session was expired during test execution + session_uptime = get_zookeeper_session_uptime(args) + if session_uptime is not None and session_uptime < math.ceil(total_time): + return True + if isinstance(error, clickhouse_driver.errors.Error): + if error.code in ERROR_CODES_TO_RETRY: + return True + if any(msg in error.message for msg in MESSAGES_TO_RETRY): + return True + return False + def get_processlist(args): - try: - query = b"SHOW PROCESSLIST FORMAT Vertical" - - if args.replicated_database: - query = b"SELECT materialize((hostName(), tcpPort())) as host, * " \ - b"FROM clusterAllReplicas('test_cluster_database_replicated', system.processes) " \ - b"WHERE query NOT LIKE '%system.processes%' FORMAT Vertical" - - clickhouse_proc = open_client_process(args.client) - - (stdout, _) = clickhouse_proc.communicate((query), timeout=20) - - return False, stdout.decode('utf-8') - except Exception as ex: - print("Exception", ex) - return True, "" + if args.replicated_database: + return clickhouse_execute_pandas(args, """ + SELECT materialize((hostName(), tcpPort())) as host, * + FROM clusterAllReplicas('test_cluster_database_replicated', system.processes) + WHERE query NOT LIKE '%system.processes%' + """) + else: + return clickhouse_execute_pandas(args, 'SHOW PROCESSLIST') # collect server stacktraces using gdb @@ -305,7 +348,7 @@ class TestCase: return None @staticmethod - def configure_testcase_args(args, case_file, suite_tmp_dir, stderr_file): + def configure_testcase_args(args, case_file, suite_tmp_dir): testcase_args = copy.deepcopy(args) testcase_args.testcase_start_time = datetime.now() @@ -325,23 +368,11 @@ class TestCase: database = 'test_{suffix}'.format(suffix=random_str()) - with open(stderr_file, 'w') as stderr: - client_cmd = testcase_args.testcase_client + " " \ - + get_additional_client_options(args) - - clickhouse_proc_create = open_client_process( - universal_newlines=True, - client_args=client_cmd, - stderr_file=stderr) - - try: - clickhouse_proc_create.communicate( - ("CREATE DATABASE " + database + get_db_engine(testcase_args, database)), - timeout=testcase_args.timeout) - except TimeoutExpired: - total_time = (datetime.now() - testcase_args.testcase_start_time).total_seconds() - return clickhouse_proc_create, "", "Timeout creating database {} before test".format( - database), total_time + try: + clickhouse_execute(args, "CREATE DATABASE " + database + get_db_engine(testcase_args, database), settings={'log_comment': testcase_basename}) + except (TimeoutError, clickhouse_driver.errors.SocketTimeoutError): + total_time = (datetime.now() - testcase_args.testcase_start_time).total_seconds() + return None, "", f"Timeout creating database {database} before test", total_time os.environ["CLICKHOUSE_DATABASE"] = database # Set temporary directory to match the randomly generated database, @@ -412,41 +443,42 @@ class TestCase: def process_result_impl(self, proc, stdout: str, stderr: str, total_time: float): description = "" - if proc.returncode is None: - try: - proc.kill() - except OSError as e: - if e.errno != ESRCH: - raise + if proc: + if proc.returncode is None: + try: + proc.kill() + except OSError as e: + if e.errno != ESRCH: + raise - if stderr: - description += stderr - return TestResult(self.name, TestStatus.FAIL, FailureReason.TIMEOUT, total_time, description) + if stderr: + description += stderr + return TestResult(self.name, TestStatus.FAIL, FailureReason.TIMEOUT, total_time, description) - if proc.returncode != 0: - reason = FailureReason.EXIT_CODE - description += str(proc.returncode) + if proc.returncode != 0: + reason = FailureReason.EXIT_CODE + description += str(proc.returncode) - if stderr: - description += "\n" - description += stderr + if stderr: + description += "\n" + description += stderr - # Stop on fatal errors like segmentation fault. They are sent to client via logs. - if ' ' in stderr: - reason = FailureReason.SERVER_DIED + # Stop on fatal errors like segmentation fault. They are sent to client via logs. + if ' ' in stderr: + reason = FailureReason.SERVER_DIED - if self.testcase_args.stop \ - and ('Connection refused' in stderr or 'Attempt to read after eof' in stderr) \ - and 'Received exception from server' not in stderr: - reason = FailureReason.SERVER_DIED + if self.testcase_args.stop \ + and ('Connection refused' in stderr or 'Attempt to read after eof' in stderr) \ + and 'Received exception from server' not in stderr: + reason = FailureReason.SERVER_DIED - if os.path.isfile(self.stdout_file): - description += ", result:\n\n" - description += '\n'.join(open(self.stdout_file).read().split('\n')[:100]) - description += '\n' + if os.path.isfile(self.stdout_file): + description += ", result:\n\n" + description += '\n'.join(open(self.stdout_file).read().split('\n')[:100]) + description += '\n' - description += "\nstdout:\n{}\n".format(stdout) - return TestResult(self.name, TestStatus.FAIL, reason, total_time, description) + description += "\nstdout:\n{}\n".format(stdout) + return TestResult(self.name, TestStatus.FAIL, reason, total_time, description) if stderr: description += "\n{}\n".format('\n'.join(stderr.split('\n')[:100])) @@ -510,19 +542,8 @@ class TestCase: @staticmethod def send_test_name_failed(suite: str, case: str) -> bool: - clickhouse_proc = open_client_process(args.client, universal_newlines=True) - - failed_to_check = False - pid = os.getpid() - query = f"SELECT 'Running test {suite}/{case} from pid={pid}';" - - try: - clickhouse_proc.communicate((query), timeout=20) - except: - failed_to_check = True - - return failed_to_check or clickhouse_proc.returncode != 0 + clickhouse_execute(args, f"SELECT 'Running test {suite}/{case} from pid={pid}'") def run_single_test(self, server_logs_level, client_options): args = self.testcase_args @@ -566,28 +587,15 @@ class TestCase: need_drop_database = not maybe_passed if need_drop_database: - with open(self.stderr_file, 'a') as stderr: - clickhouse_proc_create = open_client_process(client, universal_newlines=True, stderr_file=stderr) - seconds_left = max(args.timeout - (datetime.now() - start_time).total_seconds(), 20) - try: - drop_database_query = "DROP DATABASE " + database - if args.replicated_database: - drop_database_query += " ON CLUSTER test_cluster_database_replicated" - clickhouse_proc_create.communicate((drop_database_query), timeout=seconds_left) - except TimeoutExpired: - # kill test process because it can also hung - if proc.returncode is None: - try: - proc.kill() - except OSError as e: - if e.errno != ESRCH: - raise - + client = make_clickhouse_client(args) + client.connection.force_connect() + with client.connection.timeout_setter(seconds_left): + client.execute("DROP DATABASE " + database) + except (TimeoutError, clickhouse_driver.errors.SocketTimeoutError): total_time = (datetime.now() - start_time).total_seconds() - return clickhouse_proc_create, "", f"Timeout dropping database {database} after test", total_time - + return None, "", f"Timeout dropping database {database} after test", total_time shutil.rmtree(args.test_tmp_dir) total_time = (datetime.now() - start_time).total_seconds() @@ -618,12 +626,15 @@ class TestCase: if skip_reason is not None: return TestResult(self.name, TestStatus.SKIPPED, skip_reason, 0., "") - if args.testname and self.send_test_name_failed(suite.suite, self.case): - description = "\nServer does not respond to health check\n" - return TestResult(self.name, TestStatus.FAIL, FailureReason.SERVER_DIED, 0., description) + if args.testname: + try: + self.send_test_name_failed(suite.suite, self.case) + except: + return TestResult(self.name, TestStatus.FAIL, FailureReason.SERVER_DIED, 0., + "\nServer does not respond to health check\n") self.runs_count += 1 - self.testcase_args = self.configure_testcase_args(args, self.case_file, suite.suite_tmp_path, self.stderr_file) + self.testcase_args = self.configure_testcase_args(args, self.case_file, suite.suite_tmp_path) proc, stdout, stderr, total_time = self.run_single_test(server_logs_level, client_options) result = self.process_result_impl(proc, stdout, stderr, total_time) @@ -788,12 +799,7 @@ class TestSuite: @staticmethod def readTestSuite(args, suite_dir_name: str): def is_data_present(): - clickhouse_proc = open_client_process(args.client) - (stdout, stderr) = clickhouse_proc.communicate(b"EXISTS TABLE test.hits") - if clickhouse_proc.returncode != 0: - raise CalledProcessError(clickhouse_proc.returncode, args.client, stderr) - - return stdout.startswith(b'1') + return int(clickhouse_execute_one(args, 'EXISTS TABLE test.hits')) base_dir = os.path.abspath(args.queries) tmp_dir = os.path.abspath(args.tmp) @@ -955,42 +961,26 @@ def run_tests_array(all_tests_with_params): server_logs_level = "warning" -def check_server_started(client, retry_count): +def check_server_started(args): print("Connecting to ClickHouse server...", end='') sys.stdout.flush() - + retry_count = args.server_check_retries while retry_count > 0: - clickhouse_proc = open_client_process(client) - (stdout, stderr) = clickhouse_proc.communicate(b"SELECT 1") - - if clickhouse_proc.returncode == 0 and stdout.startswith(b"1"): + try: + clickhouse_execute(args, 'SELECT 1') print(" OK") sys.stdout.flush() return True - - if clickhouse_proc.returncode == 210: - # Connection refused, retry + except (ConnectionRefusedError, ConnectionResetError, clickhouse_driver.errors.NetworkError): print('.', end='') sys.stdout.flush() retry_count -= 1 sleep(0.5) continue - code: int = clickhouse_proc.returncode - - print(f"\nClient invocation failed with code {code}:\n\ - stdout: {stdout}\n\ - stderr: {stderr}\n\ - args: {''.join(clickhouse_proc.args)}\n") - - sys.stdout.flush() - - return False - print('\nAll connection tries failed') sys.stdout.flush() - return False @@ -1006,60 +996,36 @@ class BuildFlags(): POLYMORPHIC_PARTS = 'polymorphic-parts' -def collect_build_flags(client): - clickhouse_proc = open_client_process(client) - (stdout, stderr) = clickhouse_proc.communicate(b"SELECT value FROM system.build_options WHERE name = 'CXX_FLAGS'") +def collect_build_flags(args): result = [] - if clickhouse_proc.returncode == 0: - if b'-fsanitize=thread' in stdout: - result.append(BuildFlags.THREAD) - elif b'-fsanitize=address' in stdout: - result.append(BuildFlags.ADDRESS) - elif b'-fsanitize=undefined' in stdout: - result.append(BuildFlags.UNDEFINED) - elif b'-fsanitize=memory' in stdout: - result.append(BuildFlags.MEMORY) - else: - raise Exception("Cannot get information about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr)) + value = clickhouse_execute_one(args, "SELECT value FROM system.build_options WHERE name = 'CXX_FLAGS'") + if '-fsanitize=thread' in value: + result.append(BuildFlags.THREAD) + elif '-fsanitize=address' in value: + result.append(BuildFlags.ADDRESS) + elif '-fsanitize=undefined' in value: + result.append(BuildFlags.UNDEFINED) + elif '-fsanitize=memory' in value: + result.append(BuildFlags.MEMORY) - clickhouse_proc = open_client_process(client) - (stdout, stderr) = clickhouse_proc.communicate(b"SELECT value FROM system.build_options WHERE name = 'BUILD_TYPE'") + value = clickhouse_execute_one(args, "SELECT value FROM system.build_options WHERE name = 'BUILD_TYPE'") + if 'Debug' in value: + result.append(BuildFlags.DEBUG) + elif 'RelWithDebInfo' in value or 'Release' in value: + result.append(BuildFlags.RELEASE) - if clickhouse_proc.returncode == 0: - if b'Debug' in stdout: - result.append(BuildFlags.DEBUG) - elif b'RelWithDebInfo' in stdout or b'Release' in stdout: - result.append(BuildFlags.RELEASE) - else: - raise Exception("Cannot get information about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr)) + value = clickhouse_execute_one(args, "SELECT value FROM system.build_options WHERE name = 'UNBUNDLED'") + if value in ('ON', '1'): + result.append(BuildFlags.UNBUNDLED) - clickhouse_proc = open_client_process(client) - (stdout, stderr) = clickhouse_proc.communicate(b"SELECT value FROM system.build_options WHERE name = 'UNBUNDLED'") + value = clickhouse_execute_one(args, "SELECT value FROM system.settings WHERE name = 'default_database_engine'") + if value == 'Ordinary': + result.append(BuildFlags.ORDINARY_DATABASE) - if clickhouse_proc.returncode == 0: - if b'ON' in stdout or b'1' in stdout: - result.append(BuildFlags.UNBUNDLED) - else: - raise Exception("Cannot get information about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr)) - - clickhouse_proc = open_client_process(client) - (stdout, stderr) = clickhouse_proc.communicate(b"SELECT value FROM system.settings WHERE name = 'default_database_engine'") - - if clickhouse_proc.returncode == 0: - if b'Ordinary' in stdout: - result.append(BuildFlags.ORDINARY_DATABASE) - else: - raise Exception("Cannot get information about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr)) - - clickhouse_proc = open_client_process(client) - (stdout, stderr) = clickhouse_proc.communicate(b"SELECT value FROM system.merge_tree_settings WHERE name = 'min_bytes_for_wide_part'") - - if clickhouse_proc.returncode == 0: - if stdout == b'0\n': - result.append(BuildFlags.POLYMORPHIC_PARTS) - else: - raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr)) + value = int(clickhouse_execute_one(args, "SELECT value FROM system.merge_tree_settings WHERE name = 'min_bytes_for_wide_part'")) + if value == 0: + result.append(BuildFlags.POLYMORPHIC_PARTS) return result @@ -1086,16 +1052,6 @@ def extract_key(key: str) -> str: args.configserver + key)[1] -def open_client_process( - client_args: str, - universal_newlines: bool = False, - stderr_file: Optional[TextIO] = None): - return Popen( - shlex.split(client_args), stdin=PIPE, stdout=PIPE, - stderr=stderr_file if stderr_file is not None else PIPE, - universal_newlines=True if universal_newlines else None) - - def do_run_tests(jobs, test_suite: TestSuite, parallel): if jobs > 1 and len(test_suite.parallel_tests) > 0: print("Found", len(test_suite.parallel_tests), "parallel tests and", len(test_suite.sequential_tests), "sequential tests") @@ -1165,7 +1121,7 @@ def main(args): global server_logs_level global restarted_tests - if not check_server_started(args.client, args.server_check_retries): + if not check_server_started(args): msg = "Server is not responding. Cannot execute 'SELECT 1' query. \ If you are using split build, you have to specify -c option." if args.hung_check: @@ -1175,13 +1131,12 @@ def main(args): print_stacktraces() raise Exception(msg) - args.build_flags = collect_build_flags(args.client) + args.build_flags = collect_build_flags(args) if args.skip: args.skip = set(args.skip) base_dir = os.path.abspath(args.queries) - tmp_dir = os.path.abspath(args.tmp) # Keep same default values as in queries/shell_config.sh os.environ.setdefault("CLICKHOUSE_BINARY", args.binary) @@ -1212,17 +1167,12 @@ def main(args): create_database_retries = 0 while create_database_retries < MAX_RETRIES: start_time = datetime.now() - - client_cmd = args.client + " " + get_additional_client_options(args) - - clickhouse_proc_create = open_client_process(client_cmd, universal_newlines=True) - - (stdout, stderr) = clickhouse_proc_create.communicate(("CREATE DATABASE IF NOT EXISTS " + db_name + get_db_engine(args, db_name))) - - total_time = (datetime.now() - start_time).total_seconds() - - if not need_retry(args, stdout, stderr, total_time): - break + try: + clickhouse_execute(args, "CREATE DATABASE IF NOT EXISTS " + db_name + get_db_engine(args, db_name)) + except Exception as e: + total_time = (datetime.now() - start_time).total_seconds() + if not need_retry_error(args, e, total_time): + break create_database_retries += 1 if args.database and args.database != "test": @@ -1249,18 +1199,14 @@ def main(args): # Some queries may execute in background for some time after test was finished. This is normal. for _ in range(1, 60): - timeout, processlist = get_processlist(args) - if timeout or not processlist: + processlist = get_processlist(args) + if processlist.empty: break sleep(1) - if timeout or processlist: - if processlist: - print(colored("\nFound hung queries in processlist:", args, "red", attrs=["bold"])) - print(processlist) - else: - print(colored("Seems like server hung and cannot respond to queries", args, "red", attrs=["bold"])) - + if not processlist.empty: + print(colored("\nFound hung queries in processlist:", args, "red", attrs=["bold"])) + print(processlist) print_stacktraces() exit_code.value = 1 @@ -1305,16 +1251,20 @@ def find_binary(name): def get_additional_client_options(args): if args.client_option: return ' '.join('--' + option for option in args.client_option) - return '' - def get_additional_client_options_url(args): if args.client_option: return '&'.join(args.client_option) - return '' +def get_additional_client_options_dict(args): + settings = {} + if args.client_option: + for key, value in map(lambda x: x.split('='), args.client_option): + settings[key] = value + return settings + if __name__ == '__main__': stop_time = None @@ -1433,14 +1383,26 @@ if __name__ == '__main__': if args.configclient: args.client += ' --config-file=' + args.configclient - if os.getenv("CLICKHOUSE_HOST"): - args.client += ' --host=' + os.getenv("CLICKHOUSE_HOST") + tcp_host = os.getenv("CLICKHOUSE_HOST") + if tcp_host is not None: + args.tcp_host = tcp_host + args.client += f' --host={tcp_host}' + else: + args.tcp_host = 'localhost' - args.tcp_port = int(os.getenv("CLICKHOUSE_PORT_TCP", "9000")) - args.client += f" --port={args.tcp_port}" + tcp_port = os.getenv("CLICKHOUSE_PORT_TCP") + if tcp_port is not None: + args.tcp_port = int(tcp_port) + args.client += f" --port={tcp_port}" + else: + args.tcp_port = 9000 - if os.getenv("CLICKHOUSE_DATABASE"): - args.client += ' --database=' + os.getenv("CLICKHOUSE_DATABASE") + client_database = os.getenv("CLICKHOUSE_DATABASE") + if client_database is not None: + args.client += f' --database={client_database}' + args.client_database = client_database + else: + args.client_database = 'default' if args.client_option: # Set options for client @@ -1468,4 +1430,8 @@ if __name__ == '__main__': if args.jobs is None: args.jobs = multiprocessing.cpu_count() + # configure pandas to make it more like Vertical format + pandas.options.display.max_columns = None + pandas.options.display.width = None + main(args) diff --git a/tests/config/config.d/CORS.xml b/tests/config/config.d/CORS.xml new file mode 100644 index 00000000000..b96209866a7 --- /dev/null +++ b/tests/config/config.d/CORS.xml @@ -0,0 +1,20 @@ + + +
+ Access-Control-Allow-Origin + * +
+
+ Access-Control-Allow-Headers + origin, x-requested-with +
+
+ Access-Control-Allow-Methods + POST, GET, OPTIONS +
+
+ Access-Control-Max-Age + 86400 +
+
+
diff --git a/tests/config/install.sh b/tests/config/install.sh index 05db883c691..94ad55504a8 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -37,6 +37,7 @@ ln -sf $SRC_PATH/config.d/tcp_with_proxy.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/top_level_domains_lists.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/top_level_domains_path.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/encryption.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/CORS.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/zookeeper_log.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/logger.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/users.d/log_queries.xml $DEST_SERVER_PATH/users.d/ diff --git a/tests/integration/helpers/0_common_instance_config.xml b/tests/integration/helpers/0_common_instance_config.xml index 717f6db7e4b..c848ebdf45c 100644 --- a/tests/integration/helpers/0_common_instance_config.xml +++ b/tests/integration/helpers/0_common_instance_config.xml @@ -9,7 +9,7 @@ users.xml - trace + test /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.err.log 1000M diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 5ac4dbc8ad0..51b7bfcbcb8 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -2073,11 +2073,11 @@ class ClickHouseInstance: def contains_in_log(self, substring, from_host=False): if from_host: result = subprocess_check_call(["bash", "-c", - f'[ -f {self.logs_dir}/clickhouse-server.log ] && grep "{substring}" {self.logs_dir}/clickhouse-server.log || true' + f'[ -f {self.logs_dir}/clickhouse-server.log ] && grep -a "{substring}" {self.logs_dir}/clickhouse-server.log || true' ]) else: result = self.exec_in_container(["bash", "-c", - f'[ -f /var/log/clickhouse-server/clickhouse-server.log ] && grep "{substring}" /var/log/clickhouse-server/clickhouse-server.log || true' + f'[ -f /var/log/clickhouse-server/clickhouse-server.log ] && grep -a "{substring}" /var/log/clickhouse-server/clickhouse-server.log || true' ]) return len(result) > 0 @@ -2085,18 +2085,18 @@ class ClickHouseInstance: logging.debug(f"grep in log called %s", substring) if from_host: result = subprocess_check_call(["bash", "-c", - f'grep "{substring}" {self.logs_dir}/clickhouse-server.log || true' + f'grep -a "{substring}" {self.logs_dir}/clickhouse-server.log || true' ]) else: result = self.exec_in_container(["bash", "-c", - f'grep "{substring}" /var/log/clickhouse-server/clickhouse-server.log || true' + f'grep -a "{substring}" /var/log/clickhouse-server/clickhouse-server.log || true' ]) logging.debug("grep result %s", result) return result def count_in_log(self, substring): result = self.exec_in_container( - ["bash", "-c", 'grep "{}" /var/log/clickhouse-server/clickhouse-server.log | wc -l'.format(substring)]) + ["bash", "-c", 'grep -a "{}" /var/log/clickhouse-server/clickhouse-server.log | wc -l'.format(substring)]) return result def wait_for_log_line(self, regexp, filename='/var/log/clickhouse-server/clickhouse-server.log', timeout=30, repetitions=1, look_behind_lines=100): @@ -2357,17 +2357,20 @@ class ClickHouseInstance: dictionaries_dir = p.abspath(p.join(instance_config_dir, 'dictionaries')) os.mkdir(dictionaries_dir) - def write_embedded_config(name, dest_dir): + def write_embedded_config(name, dest_dir, fix_log_level=False): with open(p.join(HELPERS_DIR, name), 'r') as f: data = f.read() data = data.replace('yandex', self.config_root_name) + if fix_log_level: + data = data.replace('test', 'trace') with open(p.join(dest_dir, name), 'w') as r: r.write(data) logging.debug("Copy common configuration from helpers") # The file is named with 0_ prefix to be processed before other configuration overloads. if self.copy_common_configs: - write_embedded_config('0_common_instance_config.xml', self.config_d_dir) + need_fix_log_level = self.tag != 'latest' + write_embedded_config('0_common_instance_config.xml', self.config_d_dir, need_fix_log_level) write_embedded_config('0_common_instance_users.xml', users_d_dir) diff --git a/tests/integration/test_dictionaries_dependency/test.py b/tests/integration/test_dictionaries_dependency/test.py index d615f90dc79..9b1019822e3 100644 --- a/tests/integration/test_dictionaries_dependency/test.py +++ b/tests/integration/test_dictionaries_dependency/test.py @@ -36,6 +36,8 @@ def cleanup_after_test(): yield finally: for node in nodes: + for i in range(4): + node.query("DROP DICTIONARY IF EXISTS test.other_{}".format(i)) node.query("DROP DICTIONARY IF EXISTS test.adict") node.query("DROP DICTIONARY IF EXISTS test.zdict") node.query("DROP DICTIONARY IF EXISTS atest.dict") @@ -104,8 +106,11 @@ def test_dependency_via_dictionary_database(node): for d_name in d_names: assert node.query("SELECT dictGet({}, 'y', toUInt64(5))".format(d_name)) == "6\n" - check() + + for d_name in d_names: + assert node.query("SELECT dictGet({}, 'y', toUInt64(5))".format(d_name)) == "6\n" # Restart must not break anything. node.restart_clickhouse() - check() + for d_name in d_names: + assert node.query_with_retry("SELECT dictGet({}, 'y', toUInt64(5))".format(d_name)) == "6\n" diff --git a/tests/integration/test_postgresql_replica_database_engine/test.py b/tests/integration/test_postgresql_replica_database_engine/test.py index 92423598a35..d3ce2295614 100644 --- a/tests/integration/test_postgresql_replica_database_engine/test.py +++ b/tests/integration/test_postgresql_replica_database_engine/test.py @@ -77,7 +77,8 @@ def create_materialized_db(ip, port, materialized_database='test_database', postgres_database='postgres_database', settings=[]): - create_query = "CREATE DATABASE {} ENGINE = MaterializedPostgreSQL('{}:{}', '{}', 'postgres', 'mysecretpassword')".format(materialized_database, ip, port, postgres_database) + instance.query(f"DROP DATABASE IF EXISTS {materialized_database}") + create_query = f"CREATE DATABASE {materialized_database} ENGINE = MaterializedPostgreSQL('{ip}:{port}', '{postgres_database}', 'postgres', 'mysecretpassword')" if len(settings) > 0: create_query += " SETTINGS " for i in range(len(settings)): @@ -131,6 +132,14 @@ def assert_nested_table_is_created(table_name, materialized_database='test_datab assert(table_name in database_tables) +@pytest.mark.timeout(320) +def assert_number_of_columns(expected, table_name, database_name='test_database'): + result = instance.query(f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')") + while (int(result) != expected): + time.sleep(1) + result = instance.query(f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')") + + @pytest.mark.timeout(320) def check_tables_are_synchronized(table_name, order_by='key', postgres_database='postgres_database', materialized_database='test_database'): assert_nested_table_is_created(table_name, materialized_database) @@ -479,27 +488,30 @@ def test_table_schema_changes(started_cluster): expected = instance.query("SELECT key, value1, value3 FROM test_database.postgresql_replica_3 ORDER BY key"); - altered_table = random.randint(0, 4) - cursor.execute("ALTER TABLE postgresql_replica_{} DROP COLUMN value2".format(altered_table)) + altered_idx = random.randint(0, 4) + altered_table = f'postgresql_replica_{altered_idx}' + cursor.execute(f"ALTER TABLE {altered_table} DROP COLUMN value2") for i in range(NUM_TABLES): - cursor.execute("INSERT INTO postgresql_replica_{} VALUES (50, {}, {})".format(i, i, i)) - cursor.execute("UPDATE postgresql_replica_{} SET value3 = 12 WHERE key%2=0".format(i)) + cursor.execute(f"INSERT INTO postgresql_replica_{i} VALUES (50, {i}, {i})") + cursor.execute(f"UPDATE {altered_table} SET value3 = 12 WHERE key%2=0") - assert_nested_table_is_created('postgresql_replica_{}'.format(altered_table)) - check_tables_are_synchronized('postgresql_replica_{}'.format(altered_table)) + time.sleep(2) + assert_nested_table_is_created(altered_table) + assert_number_of_columns(3, altered_table) + check_tables_are_synchronized(altered_table) print('check1 OK') for i in range(NUM_TABLES): check_tables_are_synchronized('postgresql_replica_{}'.format(i)); for i in range(NUM_TABLES): - if i != altered_table: + if i != altered_idx: instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT 51 + number, {}, {}, {} from numbers(49)".format(i, i, i, i)) else: instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT 51 + number, {}, {} from numbers(49)".format(i, i, i)) - check_tables_are_synchronized('postgresql_replica_{}'.format(altered_table)); + check_tables_are_synchronized(altered_table); print('check2 OK') for i in range(NUM_TABLES): check_tables_are_synchronized('postgresql_replica_{}'.format(i)); @@ -645,6 +657,7 @@ def test_virtual_columns(started_cluster): cursor.execute("ALTER TABLE postgresql_replica_0 ADD COLUMN value2 integer") instance.query("INSERT INTO postgres_database.postgresql_replica_0 SELECT number, number, number from numbers(10, 10)") + assert_number_of_columns(3, 'postgresql_replica_0') check_tables_are_synchronized('postgresql_replica_0'); result = instance.query('SELECT key, value, value2, _sign, _version FROM test_database.postgresql_replica_0;') diff --git a/tests/queries/0_stateless/00372_cors_header.reference b/tests/queries/0_stateless/00372_cors_header.reference index e22493782f0..2f1465d1598 100644 --- a/tests/queries/0_stateless/00372_cors_header.reference +++ b/tests/queries/0_stateless/00372_cors_header.reference @@ -1,3 +1,3 @@ 1 +1 0 -0 diff --git a/tests/queries/0_stateless/01092_memory_profiler.reference b/tests/queries/0_stateless/01092_memory_profiler.reference index 0d66ea1aee9..74240c4b196 100644 --- a/tests/queries/0_stateless/01092_memory_profiler.reference +++ b/tests/queries/0_stateless/01092_memory_profiler.reference @@ -1,2 +1,4 @@ 0 1 +1 +1 diff --git a/tests/queries/0_stateless/01092_memory_profiler.sql b/tests/queries/0_stateless/01092_memory_profiler.sql index 0988f56413e..0db88bb14b3 100644 --- a/tests/queries/0_stateless/01092_memory_profiler.sql +++ b/tests/queries/0_stateless/01092_memory_profiler.sql @@ -3,6 +3,11 @@ SET allow_introspection_functions = 1; SET memory_profiler_step = 1000000; +SET memory_profiler_sample_probability = 1; +SET log_queries = 1; + SELECT ignore(groupArray(number), 'test memory profiler') FROM numbers(10000000); SYSTEM FLUSH LOGS; WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND trace_type = 'Memory' AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND event_date >= yesterday() AND query LIKE '%test memory profiler%' ORDER BY event_time DESC LIMIT 1); +WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND trace_type = 'MemoryPeak' AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND event_date >= yesterday() AND query LIKE '%test memory profiler%' ORDER BY event_time DESC LIMIT 1); +WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND trace_type = 'MemorySample' AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND event_date >= yesterday() AND query LIKE '%test memory profiler%' ORDER BY event_time DESC LIMIT 1); diff --git a/tests/queries/0_stateless/01188_attach_table_from_path.sql b/tests/queries/0_stateless/01188_attach_table_from_path.sql index 5b99c07e986..9bf401c8ea4 100644 --- a/tests/queries/0_stateless/01188_attach_table_from_path.sql +++ b/tests/queries/0_stateless/01188_attach_table_from_path.sql @@ -7,6 +7,7 @@ drop table if exists mt; attach table test from 'some/path' (n UInt8) engine=Memory; -- { serverError 48 } attach table test from '/etc/passwd' (s String) engine=File(TSVRaw); -- { serverError 481 } attach table test from '../../../../../../../../../etc/passwd' (s String) engine=File(TSVRaw); -- { serverError 481 } +attach table test from 42 (s String) engine=File(TSVRaw); -- { clientError 62 } insert into table function file('01188_attach/file/data.TSV', 'TSV', 's String, n UInt8') values ('file', 42); attach table file from '01188_attach/file' (s String, n UInt8) engine=File(TSV); diff --git a/tests/queries/0_stateless/02029_test_options_requests.reference b/tests/queries/0_stateless/02029_test_options_requests.reference new file mode 100644 index 00000000000..8b001eacbe3 --- /dev/null +++ b/tests/queries/0_stateless/02029_test_options_requests.reference @@ -0,0 +1,5 @@ +< HTTP/1.1 204 No Content +< Access-Control-Allow-Origin: * +< Access-Control-Allow-Headers: origin, x-requested-with +< Access-Control-Allow-Methods: POST, GET, OPTIONS +< Access-Control-Max-Age: 86400 diff --git a/tests/queries/0_stateless/02029_test_options_requests.sh b/tests/queries/0_stateless/02029_test_options_requests.sh new file mode 100755 index 00000000000..8ea09e3ffe6 --- /dev/null +++ b/tests/queries/0_stateless/02029_test_options_requests.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# grep all fields, that should be set for CORS support (see CORS.xml) +$CLICKHOUSE_CURL "${CLICKHOUSE_URL}" -X OPTIONS -vs 2>&1 | grep -E "HTTP/1.1 204 No Content|Access-Control-Allow-Origin|Access-Control-Allow-Headers|Access-Control-Allow-Methods|Access-Control-Max-Age" diff --git a/tests/queries/0_stateless/02047_alias_for_table_and_database_name.reference b/tests/queries/0_stateless/02047_alias_for_table_and_database_name.reference new file mode 100644 index 00000000000..324e7ff9ab8 --- /dev/null +++ b/tests/queries/0_stateless/02047_alias_for_table_and_database_name.reference @@ -0,0 +1,2 @@ +numbers numbers +default default diff --git a/tests/queries/0_stateless/02047_alias_for_table_and_database_name.sql b/tests/queries/0_stateless/02047_alias_for_table_and_database_name.sql new file mode 100644 index 00000000000..2fabd2affd4 --- /dev/null +++ b/tests/queries/0_stateless/02047_alias_for_table_and_database_name.sql @@ -0,0 +1,2 @@ +SELECT name,table from system.tables where database = 'system' and name = 'numbers'; +SELECt name,database from system.databases where name = 'default'; diff --git a/tests/queries/0_stateless/02047_client_exception.expect b/tests/queries/0_stateless/02047_client_exception.expect new file mode 100755 index 00000000000..120f8ef11e3 --- /dev/null +++ b/tests/queries/0_stateless/02047_client_exception.expect @@ -0,0 +1,32 @@ +#!/usr/bin/expect -f +# Tags: no-unbundled, no-fasttest + +log_user 0 +set timeout 20 +match_max 100000 + +# A default timeout action is to fail +expect_after { + timeout { + exit 1 + } +} + +set basedir [file dirname $argv0] +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion" +expect ":) " + +send -- "DROP TABLE IF EXISTS test_02047\r" +expect "Ok." + +send -- "CREATE TABLE test_02047 (s Int32) ENGINE=Memory()\r" +expect "Ok." + +send -- "INSERT INTO test_02047 SELECT 'f' \r" +expect "Received exception from server" + +send -- "DROP TABLE test_02047\r" +expect "Ok." + +send -- "\4" +expect eof diff --git a/tests/queries/0_stateless/02047_client_exception.reference b/tests/queries/0_stateless/02047_client_exception.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02095_function_get_os_kernel_version.reference b/tests/queries/0_stateless/02095_function_get_os_kernel_version.reference new file mode 100644 index 00000000000..9b075671eac --- /dev/null +++ b/tests/queries/0_stateless/02095_function_get_os_kernel_version.reference @@ -0,0 +1 @@ +Linux diff --git a/tests/queries/0_stateless/02095_function_get_os_kernel_version.sql b/tests/queries/0_stateless/02095_function_get_os_kernel_version.sql new file mode 100644 index 00000000000..d62b360f7e0 --- /dev/null +++ b/tests/queries/0_stateless/02095_function_get_os_kernel_version.sql @@ -0,0 +1 @@ +WITH splitByChar(' ', getOSKernelVersion()) AS version_pair SELECT version_pair[1] diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index e7fa2dd5404..61491630f46 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -40,6 +40,17 @@ function check_replication_consistency() table_name_prefix=$1 check_query_part=$2 + # Wait for all queries to finish (query may still be running if thread is killed by timeout) + num_tries=0 + while [[ $($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE current_database=currentDatabase() AND query LIKE '%$table_name_prefix%'") -ne 1 ]]; do + sleep 0.5; + num_tries=$((num_tries-1)) + if [ $num_tries -eq 100 ]; then + $CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE current_database=currentDatabase() AND query LIKE '%$table_name_prefix%' FORMAT Vertical" + break + fi + done + # Do not check anything if all replicas are readonly, # because is this case all replicas are probably lost (it may happen and it's not a bug) res=$($CLICKHOUSE_CLIENT -q "SELECT count() - sum(is_readonly) FROM system.replicas WHERE database=currentDatabase() AND table LIKE '$table_name_prefix%'")