diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 7d6b9c0e374..d15715c7b8f 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -14,6 +14,11 @@ unset (_current_dir_name) set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w") +if (SANITIZE STREQUAL "undefined") + # 3rd-party libraries usually not intended to work with UBSan. + add_compile_options(-fno-sanitize=undefined) +endif() + set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1) add_subdirectory (boost-cmake) @@ -157,9 +162,6 @@ if(USE_INTERNAL_SNAPPY_LIBRARY) add_subdirectory(snappy) set (SNAPPY_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/snappy") - if(SANITIZE STREQUAL "undefined") - target_compile_options(${SNAPPY_LIBRARY} PRIVATE -fno-sanitize=undefined) - endif() endif() if (USE_INTERNAL_PARQUET_LIBRARY) diff --git a/contrib/poco b/contrib/poco index 757d947235b..f49c6ab8d3a 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 757d947235b307675cff964f29b19d388140a9eb +Subproject commit f49c6ab8d3aa71828bd1b411485c21722e8c9d82 diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index bcba50e9d81..eb737948aa6 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -240,6 +240,10 @@ TESTS_TO_SKIP=( 01354_order_by_tuple_collate_const 01355_ilike 01411_bayesian_ab_testing + 01532_collate_in_low_cardinality + 01533_collate_in_nullable + 01542_collate_in_array + 01543_collate_in_tuple _orc_ arrow avro diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 07ed3e5c884..42416383860 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -123,6 +123,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va - `--stacktrace` – If specified, also print the stack trace if an exception occurs. - `--config-file` – The name of the configuration file. - `--secure` – If specified, will connect to server over secure connection. +- `--history_file` — Path to a file containing command history. - `--param_` — Value for a [query with parameters](#cli-queries-with-parameters). ### Configuration Files {#configuration_files} diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index d08e7833b33..b367a97771a 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -36,6 +36,7 @@ toc_title: Adopters | Criteo | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | | Dataliance for China Telecom | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | | Deutsche Bank | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | +| Deeplay | Gaming Analytics | — | — | — | [Job advertisement, 2020](https://career.habr.com/vacancies/1000062568) | | Diva-e | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) | | Ecwid | E-commerce SaaS | Metrics, Logging | — | — | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf) | | eBay | E-commerce | Logs, Metrics and Events | — | — | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/) | @@ -45,6 +46,7 @@ toc_title: Adopters | FunCorp | Games | | — | — | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) | | Geniee | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | | HUYA | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | +| ICA | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) | | Idealista | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | | Infovista | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | | InnoGames | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | @@ -68,6 +70,7 @@ toc_title: Adopters | Nuna Inc. | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) | | OneAPM | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) | | Percent 百分点 | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | +| Percona | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) | | Plausible | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) | | PostHog | Product Analytics | Main Product | — | — | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0) | | Postmates | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) | diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 31a8e896438..a37ae685368 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -571,7 +571,7 @@ For more information, see the MergeTreeSettings.h header file. Fine tuning for tables in the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/mergetree.md). -This setting has higher priority. +This setting has a higher priority. For more information, see the MergeTreeSettings.h header file. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 63ff688e51a..d83f7d6c219 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1765,6 +1765,23 @@ Default value: `0`. - [Distributed Table Engine](../../engines/table-engines/special/distributed.md#distributed) - [Managing Distributed Tables](../../sql-reference/statements/system.md#query-language-system-distributed) + + +## use_compact_format_in_distributed_parts_names {#use_compact_format_in_distributed_parts_names} + +Uses compact format for storing blocks for async (`insert_distributed_sync`) INSERT into tables with `Distributed` engine. + +Possible values: + +- 0 — Uses `user[:password]@host:port#default_database` directory format. +- 1 — Uses `[shard{shard_index}[_replica{replica_index}]]` directory format. + +Default value: `1`. + +!!! note "Note" + - with `use_compact_format_in_distributed_parts_names=0` changes from cluster definition will not be applied for async INSERT. + - with `use_compact_format_in_distributed_parts_names=1` changing the order of the nodes in the cluster definition, will change the `shard_index`/`replica_index` so be aware. + ## background_buffer_flush_schedule_pool_size {#background_buffer_flush_schedule_pool_size} Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at the ClickHouse server start and can’t be changed in a user session. diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md b/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md index fa603b4b155..1b39e9d0eb1 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md @@ -4,6 +4,6 @@ toc_priority: 140 # sumWithOverflow {#sumwithoverflowx} -Computes the sum of the numbers, using the same data type for the result as for the input parameters. If the sum exceeds the maximum value for this data type, the function returns an error. +Computes the sum of the numbers, using the same data type for the result as for the input parameters. If the sum exceeds the maximum value for this data type, it is calculated with overflow. Only works for numbers. diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md new file mode 100644 index 00000000000..93b9b340e89 --- /dev/null +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md @@ -0,0 +1,91 @@ +--- +toc_priority: 46 +toc_title: Polygon Dictionaries With Grids +--- + + +# Polygon dictionaries {#polygon-dictionaries} + +Polygon dictionaries allow you to efficiently search for the polygon containing specified points. +For example: defining a city area by geographical coordinates. + +Example configuration: + +``` xml + + + + key + Array(Array(Array(Array(Float64)))) + + + + name + String + + + + + value + UInt64 + 0 + + + + + + + + + +``` + +Tne corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md#create-dictionary-query): +``` sql +CREATE DICTIONARY polygon_dict_name ( + key Array(Array(Array(Array(Float64)))), + name String, + value UInt64 +) +PRIMARY KEY key +LAYOUT(POLYGON()) +... +``` + +When configuring the polygon dictionary, the key must have one of two types: +- A simple polygon. It is an array of points. +- MultiPolygon. It is an array of polygons. Each polygon is a two-dimensional array of points. The first element of this array is the outer boundary of the polygon, and subsequent elements specify areas to be excluded from it. + +Points can be specified as an array or a tuple of their coordinates. In the current implementation, only two-dimensional points are supported. + +The user can [upload their own data](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) in all formats supported by ClickHouse. + + +There are 3 types of [in-memory storage](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) available: + +- POLYGON_SIMPLE. This is a naive implementation, where a linear pass through all polygons is made for each query, and membership is checked for each one without using additional indexes. + +- POLYGON_INDEX_EACH. A separate index is built for each polygon, which allows you to quickly check whether it belongs in most cases (optimized for geographical regions). +Also, a grid is superimposed on the area under consideration, which significantly narrows the number of polygons under consideration. +The grid is created by recursively dividing the cell into 16 equal parts and is configured with two parameters. +The division stops when the recursion depth reaches MAX_DEPTH or when the cell crosses no more than MIN_INTERSECTIONS polygons. +To respond to the query, there is a corresponding cell, and the index for the polygons stored in it is accessed alternately. + +- POLYGON_INDEX_CELL. This placement also creates the grid described above. The same options are available. For each sheet cell, an index is built on all pieces of polygons that fall into it, which allows you to quickly respond to a request. + +- POLYGON. Synonym to POLYGON_INDEX_CELL. + +Dictionary queries are carried out using standard [functions](../../../sql-reference/functions/ext-dict-functions.md) for working with external dictionaries. +An important difference is that here the keys will be the points for which you want to find the polygon containing them. + +Example of working with the dictionary defined above: +``` sql +CREATE TABLE points ( + x Float64, + y Float64 +) +... +SELECT tuple(x, y) AS key, dictGet(dict_name, 'name', key), dictGet(dict_name, 'value', key) FROM points ORDER BY x, y; +``` + +As a result of executing the last command for each point in the 'points' table, a minimum area polygon containing this point will be found, and the requested attributes will be output. diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index a4e5e3655c6..57e071d6734 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -221,3 +221,85 @@ returns │ 1970-03-12 │ 1970-01-08 │ original │ └────────────┴────────────┴──────────┘ ``` + +## OFFSET FETCH Clause {#offset-fetch} + +`OFFSET` and `FETCH` allow you to retrieve data by portions. They specify a row block which you want to get by a single query. + +``` sql +OFFSET offset_row_count {ROW | ROWS}] [FETCH {FIRST | NEXT} fetch_row_count {ROW | ROWS} {ONLY | WITH TIES}] +``` + +The `offset_row_count` or `fetch_row_count` value can be a number or a literal constant. You can omit `fetch_row_count`; by default, it equals 1. + +`OFFSET` specifies the number of rows to skip before starting to return rows from the query. + +The `FETCH` specifies the maximum number of rows that can be in the result of a query. + +The `ONLY` option is used to return rows that immediately follow the rows omitted by the `OFFSET`. In this case the `FETCH` is an alternative to the [LIMIT](../../../sql-reference/statements/select/limit.md) clause. For example, the following query + +``` sql +SELECT * FROM test_fetch ORDER BY a OFFSET 1 ROW FETCH FIRST 3 ROWS ONLY; +``` + +is identical to the query + +``` sql +SELECT * FROM test_fetch ORDER BY a LIMIT 3 OFFSET 1; +``` + +The `WITH TIES` option is used to return any additional rows that tie for the last place in the result set according to the `ORDER BY` clause. For example, if `fetch_row_count` is set to 5 but two additional rows match the values of the `ORDER BY` columns in the fifth row, the result set will contain seven rows. + +!!! note "Note" + According to the standard, the `OFFSET` clause must come before the `FETCH` clause if both are present. + +### Examples {#examples} + +Input table: + +``` text +┌─a─┬─b─┐ +│ 1 │ 1 │ +│ 2 │ 1 │ +│ 3 │ 4 │ +│ 1 │ 3 │ +│ 5 │ 4 │ +│ 0 │ 6 │ +│ 5 │ 7 │ +└───┴───┘ +``` + +Usage of the `ONLY` option: + +``` sql +SELECT * FROM test_fetch ORDER BY a OFFSET 3 ROW FETCH FIRST 3 ROWS ONLY; +``` + +Result: + +``` text +┌─a─┬─b─┐ +│ 2 │ 1 │ +│ 3 │ 4 │ +│ 5 │ 4 │ +└───┴───┘ +``` + +Usage of the `WITH TIES` option: + +``` sql +SELECT * FROM test_fetch ORDER BY a OFFSET 3 ROW FETCH FIRST 3 ROWS WITH TIES; +``` + +Result: + +``` text +┌─a─┬─b─┐ +│ 2 │ 1 │ +│ 3 │ 4 │ +│ 5 │ 4 │ +│ 5 │ 7 │ +└───┴───┘ +``` + +[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/order-by/) diff --git a/docs/ru/engines/table-engines/special/buffer.md b/docs/ru/engines/table-engines/special/buffer.md index 986fe9adbb9..75ce12f50fa 100644 --- a/docs/ru/engines/table-engines/special/buffer.md +++ b/docs/ru/engines/table-engines/special/buffer.md @@ -64,6 +64,6 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10 Таблицы типа Buffer используются в тех случаях, когда от большого количества серверов поступает слишком много INSERT-ов в единицу времени, и нет возможности заранее самостоятельно буферизовать данные перед вставкой, в результате чего, INSERT-ы не успевают выполняться. -Заметим, что даже для таблиц типа Buffer не имеет смысла вставлять данные по одной строке, так как таким образом будет достигнута скорость всего лишь в несколько тысяч строк в секунду, тогда как при вставке более крупными блоками, достижимо более миллиона строк в секунду (смотрите раздел «Производительность»). +Заметим, что даже для таблиц типа Buffer не имеет смысла вставлять данные по одной строке, так как таким образом будет достигнута скорость всего лишь в несколько тысяч строк в секунду, тогда как при вставке более крупными блоками, достижимо более миллиона строк в секунду (смотрите раздел [«Производительность»](../../../introduction/performance/). [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/buffer/) diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 2a23a27f4c0..2745718381b 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -555,6 +555,22 @@ ClickHouse проверяет условия для `min_part_size` и `min_part ``` +## replicated\_merge\_tree {#server_configuration_parameters-replicated_merge_tree} + +Тонкая настройка таблиц в [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/mergetree.md). + +Эта настройка имеет более высокий приоритет. + +Подробнее смотрите в заголовочном файле MergeTreeSettings.h. + +**Пример** + +``` xml + + 5 + +``` + ## openSSL {#server_configuration_parameters-openssl} Настройки клиента/сервера SSL. diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md index bfd8663a754..70839d21a78 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md @@ -1,4 +1,4 @@ -# Cловари полигонов {#slovari-polygonov} +# Cловари полигонов {#polygon-dictionaries} Словари полигонов позволяют эффективно искать полигон, в который попадают данные точки, среди множества полигонов. Для примера: определение района города по географическим координатам. diff --git a/docs/ru/sql-reference/statements/select/order-by.md b/docs/ru/sql-reference/statements/select/order-by.md index 0476c5da5af..ea0f40b2dc0 100644 --- a/docs/ru/sql-reference/statements/select/order-by.md +++ b/docs/ru/sql-reference/statements/select/order-by.md @@ -214,3 +214,85 @@ ORDER BY │ 1970-03-12 │ 1970-01-08 │ original │ └────────────┴────────────┴──────────┘ ``` + +## Секция OFFSET FETCH {#offset-fetch} + +`OFFSET` и `FETCH` позволяют извлекать данные по частям. Они указывают строки, которые вы хотите получить в результате запроса. + +``` sql +OFFSET offset_row_count {ROW | ROWS}] [FETCH {FIRST | NEXT} fetch_row_count {ROW | ROWS} {ONLY | WITH TIES}] +``` + +`offset_row_count` или `fetch_row_count` может быть числом или литеральной константой. Если вы не используете `fetch_row_count`, то его значение равно 1. + +`OFFSET` указывает количество строк, которые необходимо пропустить перед началом возврата строк из запроса. + +`FETCH` указывает максимальное количество строк, которые могут быть получены в результате запроса. + +Опция `ONLY` используется для возврата строк, которые следуют сразу же за строками, пропущенными секцией `OFFSET`. В этом случае `FETCH` — это альтернатива [LIMIT](../../../sql-reference/statements/select/limit.md). Например, следующий запрос + +``` sql +SELECT * FROM test_fetch ORDER BY a OFFSET 1 ROW FETCH FIRST 3 ROWS ONLY; +``` + +идентичен запросу + +``` sql +SELECT * FROM test_fetch ORDER BY a LIMIT 3 OFFSET 1; +``` + +Опция `WITH TIES` используется для возврата дополнительных строк, которые привязываются к последней в результате запроса. Например, если `fetch_row_count` имеет значение 5 и существуют еще 2 строки с такими же значениями столбцов, указанных в `ORDER BY`, что и у пятой строки результата, то финальный набор будет содержать 7 строк. + +!!! note "Примечание" + Секция `OFFSET` должна находиться перед секцией `FETCH`, если обе присутствуют. + +### Примеры {#examples} + +Входная таблица: + +``` text +┌─a─┬─b─┐ +│ 1 │ 1 │ +│ 2 │ 1 │ +│ 3 │ 4 │ +│ 1 │ 3 │ +│ 5 │ 4 │ +│ 0 │ 6 │ +│ 5 │ 7 │ +└───┴───┘ +``` + +Использование опции `ONLY`: + +``` sql +SELECT * FROM test_fetch ORDER BY a OFFSET 3 ROW FETCH FIRST 3 ROWS ONLY; +``` + +Результат: + +``` text +┌─a─┬─b─┐ +│ 2 │ 1 │ +│ 3 │ 4 │ +│ 5 │ 4 │ +└───┴───┘ +``` + +Использование опции `WITH TIES`: + +``` sql +SELECT * FROM test_fetch ORDER BY a OFFSET 3 ROW FETCH FIRST 3 ROWS WITH TIES; +``` + +Результат: + +``` text +┌─a─┬─b─┐ +│ 2 │ 1 │ +│ 3 │ 4 │ +│ 5 │ 4 │ +│ 5 │ 7 │ +└───┴───┘ +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/select/order-by/) diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 3577ee3df31..3817bc62bcb 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -227,9 +227,6 @@ else () install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-git-import DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-git-import) endif () - if(ENABLE_CLICKHOUSE_ODBC_BRIDGE) - list(APPEND CLICKHOUSE_BUNDLE clickhouse-odbc-bridge) - endif() install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 005fece3277..fc452c165e7 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -75,6 +75,7 @@ #include #include #include +#include #include #include #include @@ -463,6 +464,7 @@ private: { UseSSL use_ssl; + registerFormats(); registerFunctions(); registerAggregateFunctions(); @@ -2329,6 +2331,7 @@ public: ("query-fuzzer-runs", po::value()->default_value(0), "query fuzzer runs") ("opentelemetry-traceparent", po::value(), "OpenTelemetry traceparent header as described by W3C Trace Context recommendation") ("opentelemetry-tracestate", po::value(), "OpenTelemetry tracestate header as described by W3C Trace Context recommendation") + ("history_file", po::value(), "path to history file") ; Settings cmd_settings; @@ -2485,6 +2488,8 @@ public: config().setInt("suggestion_limit", options["suggestion_limit"].as()); if (options.count("highlight")) config().setBool("highlight", options["highlight"].as()); + if (options.count("history_file")) + config().setString("history_file", options["history_file"].as()); if ((query_fuzzer_runs = options["query-fuzzer-runs"].as())) { diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp index 08a7e50a9d7..c2946e12c34 100644 --- a/programs/copier/ClusterCopierApp.cpp +++ b/programs/copier/ClusterCopierApp.cpp @@ -1,6 +1,7 @@ #include "ClusterCopierApp.h" #include #include +#include #include @@ -122,6 +123,7 @@ void ClusterCopierApp::mainImpl() registerStorages(); registerDictionaries(); registerDisks(); + registerFormats(); static const std::string default_database = "_local"; DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared(default_database, *context)); diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index b9dde555788..9ecc2a50a42 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -224,6 +225,7 @@ try registerStorages(); registerDictionaries(); registerDisks(); + registerFormats(); /// Maybe useless if (config().has("macros")) diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 756aab0a574..d0d7f201c68 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -1050,6 +1051,8 @@ try using namespace DB; namespace po = boost::program_options; + registerFormats(); + po::options_description description = createOptionsDescription("Options", getTerminalWidth()); description.add_options() ("help", "produce help message") diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 8cfa110adad..11864354619 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -10,19 +10,8 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES PingHandler.cpp SchemaAllowedHandler.cpp validateODBCConnectionString.cpp + odbc-bridge.cpp ) -set (CLICKHOUSE_ODBC_BRIDGE_LINK - PRIVATE - clickhouse_parsers - clickhouse_aggregate_functions - daemon - dbms - Poco::Data - PUBLIC - Poco::Data::ODBC -) - -clickhouse_program_add_library(odbc-bridge) if (OS_LINUX) # clickhouse-odbc-bridge is always a separate binary. @@ -30,10 +19,17 @@ if (OS_LINUX) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") endif () -add_executable(clickhouse-odbc-bridge odbc-bridge.cpp) -set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) +add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) -clickhouse_program_link_split_binary(odbc-bridge) +target_link_libraries(clickhouse-odbc-bridge PRIVATE + daemon + dbms + clickhouse_parsers + Poco::Data + Poco::Data::ODBC +) + +set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) if (USE_GDB_ADD_INDEX) add_custom_command(TARGET clickhouse-odbc-bridge POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} ../clickhouse-odbc-bridge COMMENT "Adding .gdb-index to clickhouse-odbc-bridge" VERBATIM) diff --git a/programs/odbc-bridge/ODBCBridge.cpp b/programs/odbc-bridge/ODBCBridge.cpp index 3f119fbf7ba..24aa8e32ddb 100644 --- a/programs/odbc-bridge/ODBCBridge.cpp +++ b/programs/odbc-bridge/ODBCBridge.cpp @@ -18,11 +18,13 @@ #include #include #include +#include #include #include #include #include + namespace DB { namespace ErrorCodes @@ -160,6 +162,8 @@ int ODBCBridge::main(const std::vector & /*args*/) if (is_help) return Application::EXIT_OK; + registerFormats(); + LOG_INFO(log, "Starting up"); Poco::Net::ServerSocket socket; auto address = socketBindListen(socket, hostname, port, log); diff --git a/programs/odbc-bridge/tests/CMakeLists.txt b/programs/odbc-bridge/tests/CMakeLists.txt index 60e7afab969..3e0af8c940f 100644 --- a/programs/odbc-bridge/tests/CMakeLists.txt +++ b/programs/odbc-bridge/tests/CMakeLists.txt @@ -1,3 +1,2 @@ -add_executable (validate-odbc-connection-string validate-odbc-connection-string.cpp) -clickhouse_target_link_split_lib(validate-odbc-connection-string odbc-bridge) +add_executable (validate-odbc-connection-string validate-odbc-connection-string.cpp ../validateODBCConnectionString.cpp) target_link_libraries (validate-odbc-connection-string PRIVATE clickhouse_common_io) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index b85cb5e75f2..ed18793a537 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -266,6 +267,7 @@ int Server::main(const std::vector & /*args*/) registerStorages(); registerDictionaries(); registerDisks(); + registerFormats(); CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision()); CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger()); diff --git a/programs/server/config.d/test_cluster_with_incorrect_pw.xml b/programs/server/config.d/test_cluster_with_incorrect_pw.xml new file mode 120000 index 00000000000..4e4b334c6d1 --- /dev/null +++ b/programs/server/config.d/test_cluster_with_incorrect_pw.xml @@ -0,0 +1 @@ +../../../tests/config/config.d/test_cluster_with_incorrect_pw.xml \ No newline at end of file diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 0c7425327ad..abf4ff12d5a 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -198,6 +198,7 @@ namespace /// Serialize the list of ATTACH queries to a string. std::stringstream ss; + ss.exceptions(std::ios::failbit); for (const ASTPtr & query : queries) ss << *query << ";\n"; String file_contents = std::move(ss).str(); @@ -353,6 +354,7 @@ String DiskAccessStorage::getStorageParamsJSON() const if (readonly) json.set("readonly", readonly.load()); std::ostringstream oss; + oss.exceptions(std::ios::failbit); Poco::JSON::Stringifier::stringify(json, oss); return oss.str(); } diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index b20ef3a39a9..c9f00f2f4ab 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -151,6 +151,7 @@ String LDAPAccessStorage::getStorageParamsJSON() const params_json.set("roles", default_role_names); std::ostringstream oss; + oss.exceptions(std::ios::failbit); Poco::JSON::Stringifier::stringify(params_json, oss); return oss.str(); diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index ce10ebf0bcc..ba3c2d5b8e5 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -461,6 +461,7 @@ String UsersConfigAccessStorage::getStorageParamsJSON() const if (!path.empty()) json.set("path", path); std::ostringstream oss; + oss.exceptions(std::ios::failbit); Poco::JSON::Stringifier::stringify(json, oss); return oss.str(); } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.h b/src/AggregateFunctions/AggregateFunctionGroupArray.h index 02b9003eb96..ba9b235de07 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -245,6 +245,7 @@ public: { DB::writeIntBinary(this->data(place).total_values, buf); std::ostringstream rng_stream; + rng_stream.exceptions(std::ios::failbit); rng_stream << this->data(place).rng; DB::writeStringBinary(rng_stream.str(), buf); } @@ -275,6 +276,7 @@ public: std::string rng_string; DB::readStringBinary(rng_string, buf); std::istringstream rng_stream(rng_string); + rng_stream.exceptions(std::ios::failbit); rng_stream >> this->data(place).rng; } @@ -564,6 +566,7 @@ public: { DB::writeIntBinary(data(place).total_values, buf); std::ostringstream rng_stream; + rng_stream.exceptions(std::ios::failbit); rng_stream << data(place).rng; DB::writeStringBinary(rng_stream.str(), buf); } @@ -598,6 +601,7 @@ public: std::string rng_string; DB::readStringBinary(rng_string, buf); std::istringstream rng_stream(rng_string); + rng_stream.exceptions(std::ios::failbit); rng_stream >> data(place).rng; } diff --git a/src/AggregateFunctions/ReservoirSampler.h b/src/AggregateFunctions/ReservoirSampler.h index bbb8172b209..f82b0b856a0 100644 --- a/src/AggregateFunctions/ReservoirSampler.h +++ b/src/AggregateFunctions/ReservoirSampler.h @@ -191,6 +191,7 @@ public: std::string rng_string; DB::readStringBinary(rng_string, buf); std::istringstream rng_stream(rng_string); + rng_stream.exceptions(std::ios::failbit); rng_stream >> rng; for (size_t i = 0; i < samples.size(); ++i) @@ -205,6 +206,7 @@ public: DB::writeIntBinary(total_values, buf); std::ostringstream rng_stream; + rng_stream.exceptions(std::ios::failbit); rng_stream << rng; DB::writeStringBinary(rng_stream.str(), buf); diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index 282aaabd119..b6cb55fa979 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -223,6 +223,7 @@ std::string MultiplexedConnections::dumpAddressesUnlocked() const { bool is_first = true; std::ostringstream os; + os.exceptions(std::ios::failbit); for (const ReplicaState & state : replica_states) { const Connection * connection = state.connection; diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index e4d17c586ac..9b948236943 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -324,8 +324,7 @@ void ColumnArray::popBack(size_t n) offsets_data.resize_assume_reserved(offsets_data.size() - n); } - -int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const +int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator) const { const ColumnArray & rhs = assert_cast(rhs_); @@ -334,8 +333,15 @@ int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_dir size_t rhs_size = rhs.sizeAt(m); size_t min_size = std::min(lhs_size, rhs_size); for (size_t i = 0; i < min_size; ++i) - if (int res = getData().compareAt(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint)) + { + int res; + if (collator) + res = getData().compareAtWithCollation(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint, *collator); + else + res = getData().compareAt(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint); + if (res) return res; + } return lhs_size < rhs_size ? -1 @@ -344,6 +350,16 @@ int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_dir : 1); } +int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const +{ + return compareAtImpl(n, m, rhs_, nan_direction_hint); +} + +int ColumnArray::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const +{ + return compareAtImpl(n, m, rhs_, nan_direction_hint, &collator); +} + void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const @@ -352,27 +368,26 @@ void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num, compare_results, direction, nan_direction_hint); } -namespace +template +struct ColumnArray::Cmp { - template - struct Less + const ColumnArray & parent; + int nan_direction_hint; + const Collator * collator; + + Cmp(const ColumnArray & parent_, int nan_direction_hint_, const Collator * collator_=nullptr) + : parent(parent_), nan_direction_hint(nan_direction_hint_), collator(collator_) {} + + int operator()(size_t lhs, size_t rhs) const { - const ColumnArray & parent; - int nan_direction_hint; - - Less(const ColumnArray & parent_, int nan_direction_hint_) - : parent(parent_), nan_direction_hint(nan_direction_hint_) {} - - bool operator()(size_t lhs, size_t rhs) const - { - if (positive) - return parent.compareAt(lhs, rhs, parent, nan_direction_hint) < 0; - else - return parent.compareAt(lhs, rhs, parent, nan_direction_hint) > 0; - } - }; -} - + int res; + if (collator) + res = parent.compareAtWithCollation(lhs, rhs, parent, nan_direction_hint, *collator); + else + res = parent.compareAt(lhs, rhs, parent, nan_direction_hint); + return positive ? res : -res; + } +}; void ColumnArray::reserve(size_t n) { @@ -753,7 +768,8 @@ ColumnPtr ColumnArray::indexImpl(const PaddedPODArray & indexes, size_t limit INSTANTIATE_INDEX_IMPL(ColumnArray) -void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +template +void ColumnArray::getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const { size_t s = size(); if (limit >= s) @@ -763,23 +779,16 @@ void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_h for (size_t i = 0; i < s; ++i) res[i] = i; + auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; }; + if (limit) - { - if (reverse) - std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less(*this, nan_direction_hint)); - else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less(*this, nan_direction_hint)); - } + std::partial_sort(res.begin(), res.begin() + limit, res.end(), less); else - { - if (reverse) - std::sort(res.begin(), res.end(), Less(*this, nan_direction_hint)); - else - std::sort(res.begin(), res.end(), Less(*this, nan_direction_hint)); - } + std::sort(res.begin(), res.end(), less); } -void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const +template +void ColumnArray::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const { if (equal_range.empty()) return; @@ -792,20 +801,19 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio if (limit) --number_of_ranges; + auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; }; + EqualRanges new_ranges; for (size_t i = 0; i < number_of_ranges; ++i) { const auto & [first, last] = equal_range[i]; - if (reverse) - std::sort(res.begin() + first, res.begin() + last, Less(*this, nan_direction_hint)); - else - std::sort(res.begin() + first, res.begin() + last, Less(*this, nan_direction_hint)); + std::sort(res.begin() + first, res.begin() + last, less); auto new_first = first; for (auto j = first + 1; j < last; ++j) { - if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0) + if (cmp(res[new_first], res[j]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -827,14 +835,11 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio /// Since then we are working inside the interval. - if (reverse) - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less(*this, nan_direction_hint)); - else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less(*this, nan_direction_hint)); + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); auto new_first = first; for (auto j = first + 1; j < limit; ++j) { - if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0) + if (cmp(res[new_first], res[j]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -845,7 +850,7 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio auto new_last = limit; for (auto j = limit; j < last; ++j) { - if (compareAt(res[new_first], res[j], *this, nan_direction_hint) == 0) + if (cmp(res[new_first], res[j]) == 0) { std::swap(res[new_last], res[j]); ++new_last; @@ -859,6 +864,39 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio equal_range = std::move(new_ranges); } +void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +{ + if (reverse) + getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint)); + else + getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint)); + +} + +void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const +{ + if (reverse) + updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint)); + else + updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint)); +} + +void ColumnArray::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +{ + if (reverse) + getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint, &collator)); + else + getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint, &collator)); +} + +void ColumnArray::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const +{ + if (reverse) + updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint, &collator)); + else + updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint, &collator)); +} + ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const { if (replicate_offsets.empty()) diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index cec8387ab66..8a02af92dce 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -77,8 +77,11 @@ public: void compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const override; + int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const override; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; + void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_range) const override; void reserve(size_t n) override; size_t byteSize() const override; size_t allocatedBytes() const override; @@ -132,6 +135,8 @@ public: return false; } + bool isCollationSupported() const override { return getData().isCollationSupported(); } + private: WrappedPtr data; WrappedPtr offsets; @@ -169,6 +174,17 @@ private: ColumnPtr filterTuple(const Filter & filt, ssize_t result_size_hint) const; ColumnPtr filterNullable(const Filter & filt, ssize_t result_size_hint) const; ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint) const; + + int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator=nullptr) const; + + template + void getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const; + + template + void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const; + + template + struct Cmp; }; diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 4942d27b6c9..3680926cd9b 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -248,6 +248,8 @@ public: /// The constant value. It is valid even if the size of the column is 0. template T getValue() const { return getField().safeGet>(); } + + bool isCollationSupported() const override { return data->isCollationSupported(); } }; } diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 64b503ed325..3f03734b738 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -278,14 +279,26 @@ MutableColumnPtr ColumnLowCardinality::cloneResized(size_t size) const return ColumnLowCardinality::create(IColumn::mutate(std::move(unique_ptr)), getIndexes().cloneResized(size)); } -int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator) const { const auto & low_cardinality_column = assert_cast(rhs); size_t n_index = getIndexes().getUInt(n); size_t m_index = low_cardinality_column.getIndexes().getUInt(m); + if (collator) + return getDictionary().getNestedColumn()->compareAtWithCollation(n_index, m_index, *low_cardinality_column.getDictionary().getNestedColumn(), nan_direction_hint, *collator); return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint); } +int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +{ + return compareAtImpl(n, m, rhs, nan_direction_hint); +} + +int ColumnLowCardinality::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const +{ + return compareAtImpl(n, m, rhs, nan_direction_hint, &collator); +} + void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const @@ -295,14 +308,17 @@ void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num compare_results, direction, nan_direction_hint); } -void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +void ColumnLowCardinality::getPermutationImpl(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator) const { if (limit == 0) limit = size(); size_t unique_limit = getDictionary().size(); Permutation unique_perm; - getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm); + if (collator) + getDictionary().getNestedColumn()->getPermutationWithCollation(*collator, reverse, unique_limit, nan_direction_hint, unique_perm); + else + getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm); /// TODO: optimize with sse. @@ -330,7 +346,8 @@ void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_di } } -void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +template +void ColumnLowCardinality::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const { if (equal_ranges.empty()) return; @@ -345,20 +362,17 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan EqualRanges new_ranges; SCOPE_EXIT({equal_ranges = std::move(new_ranges);}); + auto less = [&comparator](size_t lhs, size_t rhs){ return comparator(lhs, rhs) < 0; }; + for (size_t i = 0; i < number_of_ranges; ++i) { const auto& [first, last] = equal_ranges[i]; - if (reverse) - std::sort(res.begin() + first, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b) - {return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; }); - else - std::sort(res.begin() + first, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b) - {return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) < 0; }); + std::sort(res.begin() + first, res.begin() + last, less); auto new_first = first; for (auto j = first + 1; j < last; ++j) { - if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0) + if (comparator(res[new_first], res[j]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -379,17 +393,12 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan /// Since then we are working inside the interval. - if (reverse) - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b) - {return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; }); - else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b) - {return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) < 0; }); + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); auto new_first = first; for (auto j = first + 1; j < limit; ++j) { - if (getDictionary().compareAt(getIndexes().getUInt(res[new_first]), getIndexes().getUInt(res[j]), getDictionary(), nan_direction_hint) != 0) + if (comparator(res[new_first],res[j]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -401,7 +410,7 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan auto new_last = limit; for (auto j = limit; j < last; ++j) { - if (getDictionary().compareAt(getIndexes().getUInt(res[new_first]), getIndexes().getUInt(res[j]), getDictionary(), nan_direction_hint) == 0) + if (comparator(res[new_first], res[j]) == 0) { std::swap(res[new_last], res[j]); ++new_last; @@ -412,6 +421,38 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan } } +void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +{ + getPermutationImpl(reverse, limit, nan_direction_hint, res); +} + +void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +{ + auto comparator = [this, nan_direction_hint, reverse](size_t lhs, size_t rhs) + { + int ret = getDictionary().compareAt(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), getDictionary(), nan_direction_hint); + return reverse ? -ret : ret; + }; + + updatePermutationImpl(limit, res, equal_ranges, comparator); +} + +void ColumnLowCardinality::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +{ + getPermutationImpl(reverse, limit, nan_direction_hint, res, &collator); +} + +void ColumnLowCardinality::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const +{ + auto comparator = [this, &collator, reverse, nan_direction_hint](size_t lhs, size_t rhs) + { + int ret = getDictionary().getNestedColumn()->compareAtWithCollation(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), *getDictionary().getNestedColumn(), nan_direction_hint, collator); + return reverse ? -ret : ret; + }; + + updatePermutationImpl(limit, res, equal_ranges, comparator); +} + std::vector ColumnLowCardinality::scatter(ColumnIndex num_columns, const Selector & selector) const { auto columns = getIndexes().scatter(num_columns, selector); diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index 0aeda4567fd..0874f619b8a 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -125,10 +125,16 @@ public: PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const override; + int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator &) const override; + void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_range) const override; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; + + void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_range) const override; + ColumnPtr replicate(const Offsets & offsets) const override { return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets)); @@ -170,6 +176,7 @@ public: size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); } bool isNumeric() const override { return getDictionary().isNumeric(); } bool lowCardinality() const override { return true; } + bool isCollationSupported() const override { return getDictionary().getNestedColumn()->isCollationSupported(); } /** * Checks if the dictionary column is Nullable(T). @@ -309,6 +316,13 @@ private: void compactInplace(); void compactIfSharedDictionary(); + + int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const; + + void getPermutationImpl(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator = nullptr) const; + + template + void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const; }; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 51248a598af..4f2117b1405 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -223,7 +224,7 @@ ColumnPtr ColumnNullable::index(const IColumn & indexes, size_t limit) const return ColumnNullable::create(indexed_data, indexed_null_map); } -int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator * collator) const { /// NULL values share the properties of NaN values. /// Here the last parameter of compareAt is called null_direction_hint @@ -245,9 +246,22 @@ int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null } const IColumn & nested_rhs = nullable_rhs.getNestedColumn(); + if (collator) + return getNestedColumn().compareAtWithCollation(n, m, nested_rhs, null_direction_hint, *collator); + return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); } +int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +{ + return compareAtImpl(n, m, rhs_, null_direction_hint); +} + +int ColumnNullable::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator & collator) const +{ + return compareAtImpl(n, m, rhs_, null_direction_hint, &collator); +} + void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const @@ -256,10 +270,14 @@ void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num, compare_results, direction, nan_direction_hint); } -void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const +void ColumnNullable::getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator) const { /// Cannot pass limit because of unknown amount of NULLs. - getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res); + + if (collator) + getNestedColumn().getPermutationWithCollation(*collator, reverse, 0, null_direction_hint, res); + else + getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res); if ((null_direction_hint > 0) != reverse) { @@ -329,7 +347,7 @@ void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_directi } } -void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +void ColumnNullable::updatePermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator) const { if (equal_ranges.empty()) return; @@ -432,12 +450,35 @@ void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_dire } } - getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges); + if (collator) + getNestedColumn().updatePermutationWithCollation(*collator, reverse, limit, null_direction_hint, res, new_ranges); + else + getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges); equal_ranges = std::move(new_ranges); std::move(null_ranges.begin(), null_ranges.end(), std::back_inserter(equal_ranges)); } +void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const +{ + getPermutationImpl(reverse, limit, null_direction_hint, res); +} + +void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +{ + updatePermutationImpl(reverse, limit, null_direction_hint, res, equal_ranges); +} + +void ColumnNullable::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const +{ + getPermutationImpl(reverse, limit, null_direction_hint, res, &collator); +} + +void ColumnNullable::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const +{ + updatePermutationImpl(reverse, limit, null_direction_hint, res, equal_range, &collator); +} + void ColumnNullable::gather(ColumnGathererStream & gatherer) { gatherer.gather(*this); diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index e4033e22737..47b0103eab4 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -6,6 +6,7 @@ #include #include +class Collator; namespace DB { @@ -92,8 +93,12 @@ public: void compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const override; + int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int null_direction_hint, const Collator &) const override; void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override; - void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override; + void updatePermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const override; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override; + void updatePermutationWithCollation( + const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_range) const override; void reserve(size_t n) override; size_t byteSize() const override; size_t allocatedBytes() const override; @@ -129,6 +134,7 @@ public: bool valuesHaveFixedSize() const override { return nested_column->valuesHaveFixedSize(); } size_t sizeOfValueIfFixed() const override { return null_map->sizeOfValueIfFixed() + nested_column->sizeOfValueIfFixed(); } bool onlyNull() const override { return nested_column->isDummy(); } + bool isCollationSupported() const override { return nested_column->isCollationSupported(); } /// Return the column that represents values. @@ -164,6 +170,13 @@ private: template void applyNullMapImpl(const ColumnUInt8 & map); + + int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator * collator=nullptr) const; + + void getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator = nullptr) const; + + void updatePermutationImpl( + bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator = nullptr) const; }; ColumnPtr makeNullable(const ColumnPtr & column); diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index cd06ea20f83..23798f64a9c 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -285,21 +285,22 @@ void ColumnString::compareColumn( } template -struct ColumnString::less +struct ColumnString::Cmp { const ColumnString & parent; - explicit less(const ColumnString & parent_) : parent(parent_) {} - bool operator()(size_t lhs, size_t rhs) const + explicit Cmp(const ColumnString & parent_) : parent(parent_) {} + int operator()(size_t lhs, size_t rhs) const { int res = memcmpSmallAllowOverflow15( parent.chars.data() + parent.offsetAt(lhs), parent.sizeAt(lhs) - 1, parent.chars.data() + parent.offsetAt(rhs), parent.sizeAt(rhs) - 1); - return positive ? (res < 0) : (res > 0); + return positive ? res : -res; } }; -void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const +template +void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const { size_t s = offsets.size(); res.resize(s); @@ -309,23 +310,16 @@ void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_directio if (limit >= s) limit = 0; + auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; }; + if (limit) - { - if (reverse) - std::partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this)); - else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this)); - } + std::partial_sort(res.begin(), res.begin() + limit, res.end(), less); else - { - if (reverse) - std::sort(res.begin(), res.end(), less(*this)); - else - std::sort(res.begin(), res.end(), less(*this)); - } + std::sort(res.begin(), res.end(), less); } -void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const +template +void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Comparator cmp) const { if (equal_ranges.empty()) return; @@ -340,21 +334,17 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc if (limit) --number_of_ranges; + auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; }; + for (size_t i = 0; i < number_of_ranges; ++i) { const auto & [first, last] = equal_ranges[i]; - - if (reverse) - std::sort(res.begin() + first, res.begin() + last, less(*this)); - else - std::sort(res.begin() + first, res.begin() + last, less(*this)); + std::sort(res.begin() + first, res.begin() + last, less); size_t new_first = first; for (size_t j = first + 1; j < last; ++j) { - if (memcmpSmallAllowOverflow15( - chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1, - chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) != 0) + if (cmp(res[j], res[new_first]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -375,17 +365,12 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc /// Since then we are working inside the interval. - if (reverse) - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this)); - else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this)); + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); size_t new_first = first; for (size_t j = first + 1; j < limit; ++j) { - if (memcmpSmallAllowOverflow15( - chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1, - chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) != 0) + if (cmp(res[j], res[new_first]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -395,9 +380,7 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc size_t new_last = limit; for (size_t j = limit; j < last; ++j) { - if (memcmpSmallAllowOverflow15( - chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1, - chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) == 0) + if (cmp(res[j], res[new_first]) == 0) { std::swap(res[j], res[new_last]); ++new_last; @@ -408,6 +391,56 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc } } +void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const +{ + if (reverse) + getPermutationImpl(limit, res, Cmp(*this)); + else + getPermutationImpl(limit, res, Cmp(*this)); +} + +void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const +{ + if (reverse) + updatePermutationImpl(limit, res, equal_ranges, Cmp(*this)); + else + updatePermutationImpl(limit, res, equal_ranges, Cmp(*this)); +} + +template +struct ColumnString::CmpWithCollation +{ + const ColumnString & parent; + const Collator & collator; + + CmpWithCollation(const ColumnString & parent_, const Collator & collator_) : parent(parent_), collator(collator_) {} + + int operator()(size_t lhs, size_t rhs) const + { + int res = collator.compare( + reinterpret_cast(&parent.chars[parent.offsetAt(lhs)]), parent.sizeAt(lhs), + reinterpret_cast(&parent.chars[parent.offsetAt(rhs)]), parent.sizeAt(rhs)); + + return positive ? res : -res; + } +}; + +void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res) const +{ + if (reverse) + getPermutationImpl(limit, res, CmpWithCollation(*this, collator)); + else + getPermutationImpl(limit, res, CmpWithCollation(*this, collator)); +} + +void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const +{ + if (reverse) + updatePermutationImpl(limit, res, equal_ranges, CmpWithCollation(*this, collator)); + else + updatePermutationImpl(limit, res, equal_ranges, CmpWithCollation(*this, collator)); +} + ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const { size_t col_size = size(); @@ -476,13 +509,13 @@ void ColumnString::getExtremes(Field & min, Field & max) const size_t min_idx = 0; size_t max_idx = 0; - less less_op(*this); + Cmp cmp_op(*this); for (size_t i = 1; i < col_size; ++i) { - if (less_op(i, min_idx)) + if (cmp_op(i, min_idx) < 0) min_idx = i; - else if (less_op(max_idx, i)) + else if (cmp_op(max_idx, i) < 0) max_idx = i; } @@ -491,7 +524,7 @@ void ColumnString::getExtremes(Field & min, Field & max) const } -int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const +int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const { const ColumnString & rhs = assert_cast(rhs_); @@ -500,134 +533,6 @@ int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs reinterpret_cast(&rhs.chars[rhs.offsetAt(m)]), rhs.sizeAt(m)); } - -template -struct ColumnString::lessWithCollation -{ - const ColumnString & parent; - const Collator & collator; - - lessWithCollation(const ColumnString & parent_, const Collator & collator_) : parent(parent_), collator(collator_) {} - - bool operator()(size_t lhs, size_t rhs) const - { - int res = collator.compare( - reinterpret_cast(&parent.chars[parent.offsetAt(lhs)]), parent.sizeAt(lhs), - reinterpret_cast(&parent.chars[parent.offsetAt(rhs)]), parent.sizeAt(rhs)); - - return positive ? (res < 0) : (res > 0); - } -}; - -void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const -{ - size_t s = offsets.size(); - res.resize(s); - for (size_t i = 0; i < s; ++i) - res[i] = i; - - if (limit >= s) - limit = 0; - - if (limit) - { - if (reverse) - std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation(*this, collator)); - else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation(*this, collator)); - } - else - { - if (reverse) - std::sort(res.begin(), res.end(), lessWithCollation(*this, collator)); - else - std::sort(res.begin(), res.end(), lessWithCollation(*this, collator)); - } -} - -void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const -{ - if (equal_ranges.empty()) - return; - - if (limit >= size() || limit >= equal_ranges.back().second) - limit = 0; - - size_t number_of_ranges = equal_ranges.size(); - if (limit) - --number_of_ranges; - - EqualRanges new_ranges; - SCOPE_EXIT({equal_ranges = std::move(new_ranges);}); - - for (size_t i = 0; i < number_of_ranges; ++i) - { - const auto& [first, last] = equal_ranges[i]; - - if (reverse) - std::sort(res.begin() + first, res.begin() + last, lessWithCollation(*this, collator)); - else - std::sort(res.begin() + first, res.begin() + last, lessWithCollation(*this, collator)); - auto new_first = first; - for (auto j = first + 1; j < last; ++j) - { - if (collator.compare( - reinterpret_cast(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]), - reinterpret_cast(&chars[offsetAt(res[j])]), sizeAt(res[j])) != 0) - { - if (j - new_first > 1) - new_ranges.emplace_back(new_first, j); - - new_first = j; - } - } - if (last - new_first > 1) - new_ranges.emplace_back(new_first, last); - } - - if (limit) - { - const auto & [first, last] = equal_ranges.back(); - - if (limit < first || limit > last) - return; - - /// Since then we are working inside the interval. - - if (reverse) - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation(*this, collator)); - else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation(*this, collator)); - - auto new_first = first; - for (auto j = first + 1; j < limit; ++j) - { - if (collator.compare( - reinterpret_cast(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]), - reinterpret_cast(&chars[offsetAt(res[j])]), sizeAt(res[j])) != 0) - { - if (j - new_first > 1) - new_ranges.emplace_back(new_first, j); - - new_first = j; - } - } - auto new_last = limit; - for (auto j = limit; j < last; ++j) - { - if (collator.compare( - reinterpret_cast(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]), - reinterpret_cast(&chars[offsetAt(res[j])]), sizeAt(res[j])) == 0) - { - std::swap(res[new_last], res[j]); - ++new_last; - } - } - if (new_last - new_first > 1) - new_ranges.emplace_back(new_first, new_last); - } -} - void ColumnString::protect() { getChars().protect(); diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 19398e07b83..b71751dbc4e 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -43,14 +43,20 @@ private: size_t ALWAYS_INLINE sizeAt(ssize_t i) const { return offsets[i] - offsets[i - 1]; } template - struct less; + struct Cmp; template - struct lessWithCollation; + struct CmpWithCollation; ColumnString() = default; ColumnString(const ColumnString & src); + template + void getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const; + + template + void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Comparator cmp) const; + public: const char * getFamilyName() const override { return "String"; } TypeIndex getDataType() const override { return TypeIndex::String; } @@ -229,16 +235,16 @@ public: int direction, int nan_direction_hint) const override; /// Variant of compareAt for string comparison with respect of collation. - int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const; + int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; - void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override; + void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override; /// Sorting with respect of collation. - void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res) const override; - void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges& equal_range) const; + void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override; ColumnPtr replicate(const Offsets & replicate_offsets) const override; @@ -270,6 +276,8 @@ public: // Throws an exception if offsets/chars are messed up void validate() const; + + bool isCollationSupported() const override { return true; } }; diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 98a6611edb7..d6e1ca982d6 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -275,16 +275,27 @@ MutableColumns ColumnTuple::scatter(ColumnIndex num_columns, const Selector & se return res; } -int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator) const { const size_t tuple_size = columns.size(); for (size_t i = 0; i < tuple_size; ++i) - if (int res = columns[i]->compareAt(n, m, *assert_cast(rhs).columns[i], nan_direction_hint)) + { + int res; + if (collator && columns[i]->isCollationSupported()) + res = columns[i]->compareAtWithCollation(n, m, *assert_cast(rhs).columns[i], nan_direction_hint, *collator); + else + res = columns[i]->compareAt(n, m, *assert_cast(rhs).columns[i], nan_direction_hint); + if (res) return res; - + } return 0; } +int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +{ + return compareAtImpl(n, m, rhs, nan_direction_hint); +} + void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const @@ -293,14 +304,20 @@ void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num, compare_results, direction, nan_direction_hint); } +int ColumnTuple::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const +{ + return compareAtImpl(n, m, rhs, nan_direction_hint, &collator); +} + template struct ColumnTuple::Less { TupleColumns columns; int nan_direction_hint; + const Collator * collator; - Less(const TupleColumns & columns_, int nan_direction_hint_) - : columns(columns_), nan_direction_hint(nan_direction_hint_) + Less(const TupleColumns & columns_, int nan_direction_hint_, const Collator * collator_=nullptr) + : columns(columns_), nan_direction_hint(nan_direction_hint_), collator(collator_) { } @@ -308,7 +325,11 @@ struct ColumnTuple::Less { for (const auto & column : columns) { - int res = column->compareAt(a, b, *column, nan_direction_hint); + int res; + if (collator && column->isCollationSupported()) + res = column->compareAtWithCollation(a, b, *column, nan_direction_hint, *collator); + else + res = column->compareAt(a, b, *column, nan_direction_hint); if (res < 0) return positive; else if (res > 0) @@ -318,7 +339,8 @@ struct ColumnTuple::Less } }; -void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +template +void ColumnTuple::getPermutationImpl(size_t limit, Permutation & res, LessOperator less) const { size_t rows = size(); res.resize(rows); @@ -330,28 +352,25 @@ void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_h if (limit) { - if (reverse) - std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less(columns, nan_direction_hint)); - else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less(columns, nan_direction_hint)); + std::partial_sort(res.begin(), res.begin() + limit, res.end(), less); } else { - if (reverse) - std::sort(res.begin(), res.end(), Less(columns, nan_direction_hint)); - else - std::sort(res.begin(), res.end(), Less(columns, nan_direction_hint)); + std::sort(res.begin(), res.end(), less); } } -void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +void ColumnTuple::updatePermutationImpl(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges, const Collator * collator) const { if (equal_ranges.empty()) return; for (const auto & column : columns) { - column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges); + if (collator && column->isCollationSupported()) + column->updatePermutationWithCollation(*collator, reverse, limit, nan_direction_hint, res, equal_ranges); + else + column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges); while (limit && !equal_ranges.empty() && limit <= equal_ranges.back().first) equal_ranges.pop_back(); @@ -361,6 +380,32 @@ void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_directio } } +void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +{ + if (reverse) + getPermutationImpl(limit, res, Less(columns, nan_direction_hint)); + else + getPermutationImpl(limit, res, Less(columns, nan_direction_hint)); +} + +void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +{ + updatePermutationImpl(reverse, limit, nan_direction_hint, res, equal_ranges); +} + +void ColumnTuple::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +{ + if (reverse) + getPermutationImpl(limit, res, Less(columns, nan_direction_hint, &collator)); + else + getPermutationImpl(limit, res, Less(columns, nan_direction_hint, &collator)); +} + +void ColumnTuple::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const +{ + updatePermutationImpl(reverse, limit, nan_direction_hint, res, equal_ranges, &collator); +} + void ColumnTuple::gather(ColumnGathererStream & gatherer) { gatherer.gather(*this); @@ -433,5 +478,15 @@ bool ColumnTuple::structureEquals(const IColumn & rhs) const return false; } +bool ColumnTuple::isCollationSupported() const +{ + for (const auto& column : columns) + { + if (column->isCollationSupported()) + return true; + } + return false; +} + } diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index e8dfd4c8e44..0bee3463f2f 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -75,15 +75,19 @@ public: void compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const override; + int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override; void getExtremes(Field & min, Field & max) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; - void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const override; + void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; + void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override; void reserve(size_t n) override; size_t byteSize() const override; size_t allocatedBytes() const override; void protect() override; void forEachSubcolumn(ColumnCallback callback) override; bool structureEquals(const IColumn & rhs) const override; + bool isCollationSupported() const override; size_t tupleSize() const { return columns.size(); } @@ -94,6 +98,15 @@ public: Columns getColumnsCopy() const { return {columns.begin(), columns.end()}; } const ColumnPtr & getColumnPtr(size_t idx) const { return columns[idx]; } + +private: + int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const; + + template + void getPermutationImpl(size_t limit, Permutation & res, LessOperator less) const; + + void updatePermutationImpl( + bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges, const Collator * collator=nullptr) const; }; diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 14e6a9d7eed..6dbcfacefe9 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -9,7 +9,7 @@ class SipHash; - +class Collator; namespace DB { @@ -18,6 +18,7 @@ namespace ErrorCodes { extern const int CANNOT_GET_SIZE_OF_FIELD; extern const int NOT_IMPLEMENTED; + extern const int BAD_COLLATION; } class Arena; @@ -250,6 +251,12 @@ public: */ virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; + /// Equivalent to compareAt, but collator is used to compare values. + virtual int compareAtWithCollation(size_t, size_t, const IColumn &, int, const Collator &) const + { + throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing it.", ErrorCodes::BAD_COLLATION); + } + /// Compare the whole column with single value from rhs column. /// If row_indexes is nullptr, it's ignored. Otherwise, it is a set of rows to compare. /// compare_results[i] will be equal to compareAt(row_indexes[i], rhs_row_num, rhs, nan_direction_hint) * direction @@ -277,6 +284,18 @@ public: */ virtual void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const = 0; + /** Equivalent to getPermutation and updatePermutation but collator is used to compare values. + * Supported for String, LowCardinality(String), Nullable(String) and for Array and Tuple, containing them. + */ + virtual void getPermutationWithCollation(const Collator &, bool, size_t, int, Permutation &) const + { + throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION); + } + virtual void updatePermutationWithCollation(const Collator &, bool, size_t, int, Permutation &, EqualRanges&) const + { + throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION); + } + /** Copies each element according offsets parameter. * (i-th element should be copied offsets[i] - offsets[i - 1] times.) * It is necessary in ARRAY JOIN operation. @@ -402,6 +421,8 @@ public: virtual bool lowCardinality() const { return false; } + virtual bool isCollationSupported() const { return false; } + virtual ~IColumn() = default; IColumn() = default; IColumn(const IColumn &) = default; diff --git a/src/Columns/tests/gtest_weak_hash_32.cpp b/src/Columns/tests/gtest_weak_hash_32.cpp index ea4c0db33e9..a7fd60a3b9c 100644 --- a/src/Columns/tests/gtest_weak_hash_32.cpp +++ b/src/Columns/tests/gtest_weak_hash_32.cpp @@ -71,7 +71,8 @@ void checkColumn( std::unordered_map map; size_t num_collisions = 0; - std::stringstream collitions_str; + std::stringstream collisions_str; + collisions_str.exceptions(std::ios::failbit); for (size_t i = 0; i < eq_class.size(); ++i) { @@ -86,14 +87,14 @@ void checkColumn( if (num_collisions <= max_collisions_to_print) { - collitions_str << "Collision:\n"; - collitions_str << print_for_row(it->second) << '\n'; - collitions_str << print_for_row(i) << std::endl; + collisions_str << "Collision:\n"; + collisions_str << print_for_row(it->second) << '\n'; + collisions_str << print_for_row(i) << std::endl; } if (num_collisions > allowed_collisions) { - std::cerr << collitions_str.rdbuf(); + std::cerr << collisions_str.rdbuf(); break; } } diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index d2213a01498..8a6093c47c9 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -538,6 +538,7 @@ XMLDocumentPtr ConfigProcessor::processConfig( *has_zk_includes = !contributing_zk_paths.empty(); std::stringstream comment; + comment.exceptions(std::ios::failbit); comment << " This file was generated automatically.\n"; comment << " Do not edit it: it is likely to be discarded and generated again before it's read next time.\n"; comment << " Files used to generate this file:"; diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 0214fa7b065..820e3857bfc 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -246,6 +246,7 @@ static std::string getExtraExceptionInfo(const std::exception & e) std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded_stacktrace /*= false*/, bool with_extra_info /*= true*/) { std::stringstream stream; + stream.exceptions(std::ios::failbit); try { @@ -365,6 +366,7 @@ void tryLogException(std::exception_ptr e, Poco::Logger * logger, const std::str std::string getExceptionMessage(const Exception & e, bool with_stacktrace, bool check_embedded_stacktrace) { std::stringstream stream; + stream.exceptions(std::ios::failbit); try { diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 380fcb1b2b6..5257f95898a 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -134,6 +134,7 @@ void MemoryTracker::alloc(Int64 size) ProfileEvents::increment(ProfileEvents::QueryMemoryLimitExceeded); std::stringstream message; + message.exceptions(std::ios::failbit); message << "Memory tracker"; if (const auto * description = description_ptr.load(std::memory_order_relaxed)) message << " " << description; @@ -166,6 +167,7 @@ void MemoryTracker::alloc(Int64 size) ProfileEvents::increment(ProfileEvents::QueryMemoryLimitExceeded); std::stringstream message; + message.exceptions(std::ios::failbit); message << "Memory limit"; if (const auto * description = description_ptr.load(std::memory_order_relaxed)) message << " " << description; diff --git a/src/Common/ShellCommand.cpp b/src/Common/ShellCommand.cpp index 992419adb6d..db0928ea605 100644 --- a/src/Common/ShellCommand.cpp +++ b/src/Common/ShellCommand.cpp @@ -74,6 +74,7 @@ ShellCommand::~ShellCommand() void ShellCommand::logCommand(const char * filename, char * const argv[]) { std::stringstream args; + args.exceptions(std::ios::failbit); for (int i = 0; argv != nullptr && argv[i] != nullptr; ++i) { if (i > 0) diff --git a/src/Common/SmallObjectPool.h b/src/Common/SmallObjectPool.h index 469c102bdae..0eda8931946 100644 --- a/src/Common/SmallObjectPool.h +++ b/src/Common/SmallObjectPool.h @@ -1,76 +1,44 @@ #pragma once #include -#include -#include -#include -#include -#include +#include namespace DB { - /** Can allocate memory objects of fixed size with deletion support. - * For small `object_size`s allocated no less than getMinAllocationSize() bytes. */ + * For small `object_size`s allocated no less than pointer size. + */ class SmallObjectPool { private: - struct Block { Block * next; }; - static constexpr auto getMinAllocationSize() { return sizeof(Block); } - const size_t object_size; Arena pool; - Block * free_list{}; + char * free_list = nullptr; public: - SmallObjectPool( - const size_t object_size_, const size_t initial_size = 4096, const size_t growth_factor = 2, - const size_t linear_growth_threshold = 128 * 1024 * 1024) - : object_size{std::max(object_size_, getMinAllocationSize())}, - pool{initial_size, growth_factor, linear_growth_threshold} + SmallObjectPool(size_t object_size_) + : object_size{std::max(object_size_, sizeof(char *))} { - if (pool.size() < object_size) - return; - - const auto num_objects = pool.size() / object_size; - auto head = free_list = ext::bit_cast(pool.alloc(num_objects * object_size)); - - for (const auto i : ext::range(0, num_objects - 1)) - { - (void) i; - head->next = ext::bit_cast(ext::bit_cast(head) + object_size); - head = head->next; - } - - head->next = nullptr; } char * alloc() { if (free_list) { - const auto res = reinterpret_cast(free_list); - free_list = free_list->next; + char * res = free_list; + free_list = unalignedLoad(free_list); return res; } return pool.alloc(object_size); } - void free(const void * ptr) + void free(char * ptr) { - union - { - const void * p_v; - Block * block; - }; - - p_v = ptr; - block->next = free_list; - - free_list = block; + unalignedStore(ptr, free_list); + free_list = ptr; } /// The size of the allocated pool in bytes @@ -81,5 +49,4 @@ public: }; - } diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 677af269ca0..7e9474ac3a7 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -24,6 +24,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext_t & context) { std::stringstream error; + error.exceptions(std::ios::failbit); switch (sig) { case SIGSEGV: @@ -319,6 +320,7 @@ static void toStringEveryLineImpl( std::unordered_map dwarfs; std::stringstream out; + out.exceptions(std::ios::failbit); for (size_t i = offset; i < size; ++i) { @@ -358,6 +360,7 @@ static void toStringEveryLineImpl( } #else std::stringstream out; + out.exceptions(std::ios::failbit); for (size_t i = offset; i < size; ++i) { @@ -373,6 +376,7 @@ static void toStringEveryLineImpl( static std::string toStringImpl(const StackTrace::FramePointers & frame_pointers, size_t offset, size_t size) { std::stringstream out; + out.exceptions(std::ios::failbit); toStringEveryLineImpl(frame_pointers, offset, size, [&](const std::string & str) { out << str << '\n'; }); return out.str(); } diff --git a/src/Common/StudentTTest.cpp b/src/Common/StudentTTest.cpp index 170f06c2877..fe605092acc 100644 --- a/src/Common/StudentTTest.cpp +++ b/src/Common/StudentTTest.cpp @@ -154,6 +154,8 @@ std::pair StudentTTest::compareAndReport(size_t confidence_le double mean_confidence_interval = table_value * t_statistic; std::stringstream ss; + ss.exceptions(std::ios::failbit); + if (mean_difference > mean_confidence_interval && (mean_difference - mean_confidence_interval > 0.0001)) /// difference must be more than 0.0001, to take into account connection latency. { ss << "Difference at " << confidence_level[confidence_level_index] << "% confidence : "; diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index e527e97d608..7b2c2108629 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -216,7 +216,7 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ if (!jobs.empty()) { - job = jobs.top().job; + job = std::move(jobs.top().job); jobs.pop(); } else diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp index ec9f47aebb1..7b94ca0f2b2 100644 --- a/src/Common/ThreadProfileEvents.cpp +++ b/src/Common/ThreadProfileEvents.cpp @@ -398,8 +398,7 @@ bool PerfEventsCounters::processThreadLocalChanges(const std::string & needed_ev return true; } -// Parse comma-separated list of event names. Empty means all available -// events. +// Parse comma-separated list of event names. Empty means all available events. std::vector PerfEventsCounters::eventIndicesFromString(const std::string & events_list) { std::vector result; @@ -418,8 +417,7 @@ std::vector PerfEventsCounters::eventIndicesFromString(const std::string std::string event_name; while (std::getline(iss, event_name, ',')) { - // Allow spaces at the beginning of the token, so that you can write - // 'a, b'. + // Allow spaces at the beginning of the token, so that you can write 'a, b'. event_name.erase(0, event_name.find_first_not_of(' ')); auto entry = event_name_to_index.find(event_name); diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index bac0559fc6b..f5ad28f57af 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -80,6 +80,7 @@ void ThreadStatus::assertState(const std::initializer_list & permitted_stat } std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << "Unexpected thread state " << getCurrentState(); if (description) ss << ": " << description; diff --git a/src/Common/UInt128.h b/src/Common/UInt128.h index 3944d8073c2..7b6f8e7c7be 100644 --- a/src/Common/UInt128.h +++ b/src/Common/UInt128.h @@ -49,6 +49,7 @@ struct UInt128 String toHexString() const { std::ostringstream os; + os.exceptions(std::ios::failbit); os << std::setw(16) << std::setfill('0') << std::hex << high << low; return String(os.str()); } diff --git a/src/Common/XDBCBridgeHelper.h b/src/Common/XDBCBridgeHelper.h index a425cd36b11..c794d2fe3cd 100644 --- a/src/Common/XDBCBridgeHelper.h +++ b/src/Common/XDBCBridgeHelper.h @@ -308,6 +308,7 @@ struct ODBCBridgeMixin path.setFileName("clickhouse-odbc-bridge"); std::stringstream command; + command.exceptions(std::ios::failbit); #if !CLICKHOUSE_SPLIT_BINARY cmd_args.push_back("odbc-bridge"); diff --git a/src/Common/ZooKeeper/TestKeeper.cpp b/src/Common/ZooKeeper/TestKeeper.cpp index 4f7beadef5f..f7db8a85f96 100644 --- a/src/Common/ZooKeeper/TestKeeper.cpp +++ b/src/Common/ZooKeeper/TestKeeper.cpp @@ -219,6 +219,7 @@ std::pair TestKeeperCreateRequest::process(TestKeeper::Contai ++it->second.seq_num; std::stringstream seq_num_str; + seq_num_str.exceptions(std::ios::failbit); seq_num_str << std::setw(10) << std::setfill('0') << seq_num; path_created += seq_num_str.str(); diff --git a/src/Common/checkStackSize.cpp b/src/Common/checkStackSize.cpp index 10e93a8356c..bdc117eccac 100644 --- a/src/Common/checkStackSize.cpp +++ b/src/Common/checkStackSize.cpp @@ -81,6 +81,7 @@ __attribute__((__weak__)) void checkStackSize() if (stack_size * 2 > max_stack_size) { std::stringstream message; + message.exceptions(std::ios::failbit); message << "Stack size too large" << ". Stack address: " << stack_address << ", frame address: " << frame_address diff --git a/src/Common/parseGlobs.cpp b/src/Common/parseGlobs.cpp index c07664c4662..f04ad1ea8a0 100644 --- a/src/Common/parseGlobs.cpp +++ b/src/Common/parseGlobs.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include @@ -20,6 +19,7 @@ namespace DB std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_globs) { std::ostringstream oss_for_escaping; + oss_for_escaping.exceptions(std::ios::failbit); /// Escaping only characters that not used in glob syntax for (const auto & letter : initial_str_with_globs) { @@ -33,6 +33,7 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob re2::StringPiece input(escaped_with_globs); re2::StringPiece matched; std::ostringstream oss_for_replacing; + oss_for_replacing.exceptions(std::ios::failbit); size_t current_index = 0; while (RE2::FindAndConsume(&input, enum_or_range, &matched)) { @@ -45,8 +46,8 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob size_t range_end = 0; char point; std::istringstream iss_range(buffer); + iss_range.exceptions(std::ios::failbit); iss_range >> range_begin >> point >> point >> range_end; - assert(!iss_range.fail()); bool leading_zeros = buffer[0] == '0'; size_t num_len = std::to_string(range_end).size(); if (leading_zeros) @@ -71,6 +72,7 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob oss_for_replacing << escaped_with_globs.substr(current_index); std::string almost_res = oss_for_replacing.str(); std::ostringstream oss_final_processing; + oss_final_processing.exceptions(std::ios::failbit); for (const auto & letter : almost_res) { if ((letter == '?') || (letter == '*')) diff --git a/src/Common/tests/gtest_global_register.h b/src/Common/tests/gtest_global_register.h new file mode 100644 index 00000000000..c4bde825109 --- /dev/null +++ b/src/Common/tests/gtest_global_register.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include + + +inline void tryRegisterFunctions() +{ + static struct Register { Register() { DB::registerFunctions(); } } registered; +} + +inline void tryRegisterFormats() +{ + static struct Register { Register() { DB::registerFormats(); } } registered; +} diff --git a/src/Common/tests/gtest_global_register_functions.h b/src/Common/tests/gtest_global_register_functions.h deleted file mode 100644 index 5ca4d64522e..00000000000 --- a/src/Common/tests/gtest_global_register_functions.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once -#include -#include - -struct RegisteredFunctionsState -{ - RegisteredFunctionsState() - { - DB::registerFunctions(); - } - - RegisteredFunctionsState(RegisteredFunctionsState &&) = default; -}; - -inline void tryRegisterFunctions() -{ - static RegisteredFunctionsState registered_functions_state; -} diff --git a/src/Common/tests/gtest_sensitive_data_masker.cpp b/src/Common/tests/gtest_sensitive_data_masker.cpp index d5133b2ef95..67ad5be2f52 100644 --- a/src/Common/tests/gtest_sensitive_data_masker.cpp +++ b/src/Common/tests/gtest_sensitive_data_masker.cpp @@ -165,6 +165,7 @@ TEST(Common, SensitiveDataMasker) )END"); + Poco::AutoPtr xml_config = new Poco::Util::XMLConfiguration(xml_isteam_bad); DB::SensitiveDataMasker masker_xml_based_exception_check(*xml_config, "query_masking_rules"); diff --git a/src/Compression/tests/compressed_buffer.cpp b/src/Compression/tests/compressed_buffer.cpp index 89bfe0d0cce..c018fc95995 100644 --- a/src/Compression/tests/compressed_buffer.cpp +++ b/src/Compression/tests/compressed_buffer.cpp @@ -52,6 +52,7 @@ int main(int, char **) if (x != i) { std::stringstream s; + s.exceptions(std::ios::failbit); s << "Failed!, read: " << x << ", expected: " << i; throw DB::Exception(s.str(), 0); } diff --git a/src/Core/MySQL/IMySQLReadPacket.cpp b/src/Core/MySQL/IMySQLReadPacket.cpp index 5f6bbc7bceb..676f3986ba4 100644 --- a/src/Core/MySQL/IMySQLReadPacket.cpp +++ b/src/Core/MySQL/IMySQLReadPacket.cpp @@ -22,6 +22,7 @@ void IMySQLReadPacket::readPayload(ReadBuffer & in, uint8_t & sequence_id) if (!payload.eof()) { std::stringstream tmp; + tmp.exceptions(std::ios::failbit); tmp << "Packet payload is not fully read. Stopped after " << payload.count() << " bytes, while " << payload.available() << " bytes are in buffer."; throw Exception(tmp.str(), ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT); } diff --git a/src/Core/MySQL/IMySQLWritePacket.cpp b/src/Core/MySQL/IMySQLWritePacket.cpp index f5bc339b079..3e97800177c 100644 --- a/src/Core/MySQL/IMySQLWritePacket.cpp +++ b/src/Core/MySQL/IMySQLWritePacket.cpp @@ -16,6 +16,7 @@ void IMySQLWritePacket::writePayload(WriteBuffer & buffer, uint8_t & sequence_id if (buf.remainingPayloadSize()) { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << "Incomplete payload. Written " << getPayloadSize() - buf.remainingPayloadSize() << " bytes, expected " << getPayloadSize() << " bytes."; throw Exception(ss.str(), 0); } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4d4712dcba7..13b4f74ac3e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -374,9 +374,8 @@ class IColumn; M(Bool, optimize_monotonous_functions_in_order_by, true, "Replace monotonous function with its argument in ORDER BY", 0) \ M(Bool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \ M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \ - \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ - M(Bool, use_compact_format_in_distributed_parts_names, false, "Changes format of directories names for distributed table insert parts.", 0) \ + M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \ M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \ M(Seconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \ @@ -385,7 +384,6 @@ class IColumn; M(Seconds, lock_acquire_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "How long locking request should wait before failing", 0) \ M(Bool, materialize_ttl_after_modify, true, "Apply TTL for old data, after ALTER MODIFY TTL query", 0) \ M(String, function_implementation, "", "Choose function implementation for specific target or variant (experimental). If empty enable all of them.", 0) \ - \ M(Bool, allow_experimental_geo_types, false, "Allow geo data types such as Point, Ring, Polygon, MultiPolygon", 0) \ M(Bool, allow_experimental_bigint_types, false, "Allow Int128, Int256, UInt256 and Decimal256 types", 0) \ M(Bool, data_type_default_nullable, false, "Data types without NULL or NOT NULL will make Nullable", 0) \ @@ -394,20 +392,18 @@ class IColumn; M(Bool, allow_experimental_database_materialize_mysql, false, "Allow to create database with Engine=MaterializeMySQL(...).", 0) \ M(Bool, system_events_show_zero_values, false, "Include all metrics, even with zero values", 0) \ M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \ + M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \ + M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \ + M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) \ + M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \ \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ \ - M(Bool, partial_merge_join, false, "Obsolete. Use join_algorithm='prefer_partial_merge' instead.", 0) \ - M(UInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \ M(UInt64, multiple_joins_rewriter_version, 0, "Obsolete setting, does nothing. Will be removed after 2021-03-31", 0) \ - \ - M(Bool, force_optimize_skip_unused_shards_no_nested, false, "Obsolete setting, does nothing. Will be removed after 2020-12-01. Use force_optimize_skip_unused_shards_nesting instead.", 0) \ M(Bool, experimental_use_processors, true, "Obsolete setting, does nothing. Will be removed after 2020-11-29.", 0) \ - M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \ + M(Bool, force_optimize_skip_unused_shards_no_nested, false, "Obsolete setting, does nothing. Will be removed after 2020-12-01. Use force_optimize_skip_unused_shards_nesting instead.", 0) \ + M(Bool, enable_debug_queries, false, "Enabled debug queries, but now is obsolete", 0) \ M(Bool, allow_experimental_database_atomic, true, "Obsolete setting, does nothing. Will be removed after 2021-02-12", 0) \ - M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \ - M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) \ - M(Bool, enable_debug_queries, false, "Enabled debug queries, but now is obsolete", 0) // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS below. diff --git a/src/Core/SortCursor.h b/src/Core/SortCursor.h index 4c90cc723bf..7a222f70199 100644 --- a/src/Core/SortCursor.h +++ b/src/Core/SortCursor.h @@ -96,7 +96,7 @@ struct SortCursorImpl : column_desc.column_number; sort_columns.push_back(columns[column_number].get()); - need_collation[j] = desc[j].collator != nullptr && typeid_cast(sort_columns.back()); /// TODO Nullable(String) + need_collation[j] = desc[j].collator != nullptr && sort_columns.back()->isCollationSupported(); /// TODO Nullable(String) has_collation |= need_collation[j]; } @@ -201,10 +201,7 @@ struct SortCursorWithCollation : SortCursorHelper int nulls_direction = desc.nulls_direction; int res; if (impl->need_collation[i]) - { - const ColumnString & column_string = assert_cast(*impl->sort_columns[i]); - res = column_string.compareAtWithCollation(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), *impl->desc[i].collator); - } + res = impl->sort_columns[i]->compareAtWithCollation(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction, *impl->desc[i].collator); else res = impl->sort_columns[i]->compareAt(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction); diff --git a/src/Core/SortDescription.h b/src/Core/SortDescription.h index bd3b7bc45ff..98229bb73d7 100644 --- a/src/Core/SortDescription.h +++ b/src/Core/SortDescription.h @@ -61,6 +61,7 @@ struct SortColumnDescription std::string dump() const { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << column_name << ":" << column_number << ":dir " << direction << "nulls " << nulls_direction; return ss.str(); } diff --git a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index 7c0253c6731..0deee7c224b 100644 --- a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -60,6 +60,7 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) if (!value) { std::stringstream exception_message; + exception_message.exceptions(std::ios::failbit); exception_message << "Constraint " << backQuote(constraint_ptr->name) << " for table " << table_id.getNameForLogs() @@ -87,6 +88,7 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) Names related_columns = constraint_expr->getRequiredColumns(); std::stringstream exception_message; + exception_message.exceptions(std::ios::failbit); exception_message << "Constraint " << backQuote(constraint_ptr->name) << " for table " << table_id.getNameForLogs() diff --git a/src/DataStreams/IBlockInputStream.cpp b/src/DataStreams/IBlockInputStream.cpp index e954225fdf9..23ba9ff2970 100644 --- a/src/DataStreams/IBlockInputStream.cpp +++ b/src/DataStreams/IBlockInputStream.cpp @@ -360,6 +360,7 @@ Block IBlockInputStream::getExtremes() String IBlockInputStream::getTreeID() const { std::stringstream s; + s.exceptions(std::ios::failbit); s << getName(); if (!children.empty()) diff --git a/src/DataStreams/RemoteQueryExecutor.cpp b/src/DataStreams/RemoteQueryExecutor.cpp index 1b1bf2af8ef..38486aa6368 100644 --- a/src/DataStreams/RemoteQueryExecutor.cpp +++ b/src/DataStreams/RemoteQueryExecutor.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -314,6 +315,8 @@ void RemoteQueryExecutor::sendScalars() void RemoteQueryExecutor::sendExternalTables() { + SelectQueryInfo query_info; + size_t count = multiplexed_connections->size(); { @@ -328,11 +331,12 @@ void RemoteQueryExecutor::sendExternalTables() { StoragePtr cur = table.second; auto metadata_snapshot = cur->getInMemoryMetadataPtr(); - QueryProcessingStage::Enum read_from_table_stage = cur->getQueryProcessingStage(context); + QueryProcessingStage::Enum read_from_table_stage = cur->getQueryProcessingStage( + context, QueryProcessingStage::Complete, query_info); Pipe pipe = cur->read( metadata_snapshot->getColumns().getNamesOfPhysical(), - metadata_snapshot, {}, context, + metadata_snapshot, query_info, context, read_from_table_stage, DEFAULT_BLOCK_SIZE, 1); auto data = std::make_unique(); diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp index 2583f4f2753..3fe17a4bbfc 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp @@ -33,6 +33,7 @@ static const std::vector supported_functions{"any", "anyLast", "min", String DataTypeCustomSimpleAggregateFunction::getName() const { std::stringstream stream; + stream.exceptions(std::ios::failbit); stream << "SimpleAggregateFunction(" << function->getName(); if (!parameters.empty()) diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp index 5aefd39fb16..bd4329f6f58 100644 --- a/src/DataTypes/DataTypesDecimal.cpp +++ b/src/DataTypes/DataTypesDecimal.cpp @@ -30,6 +30,7 @@ template std::string DataTypeDecimal::doGetName() const { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << "Decimal(" << this->precision << ", " << this->scale << ")"; return ss.str(); } diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 4d7fcd4e248..83e70a25f87 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -95,6 +95,7 @@ String getObjectDefinitionFromCreateQuery(const ASTPtr & query) if (!create) { std::ostringstream query_stream; + query_stream.exceptions(std::ios::failbit); formatAST(*query, query_stream, true); throw Exception("Query '" + query_stream.str() + "' is not CREATE query", ErrorCodes::LOGICAL_ERROR); } @@ -121,6 +122,7 @@ String getObjectDefinitionFromCreateQuery(const ASTPtr & query) create->table = TABLE_WITH_UUID_NAME_PLACEHOLDER; std::ostringstream statement_stream; + statement_stream.exceptions(std::ios::failbit); formatAST(*create, statement_stream, false); statement_stream << '\n'; return statement_stream.str(); diff --git a/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp index e26f5c2fd52..d86462a41bc 100644 --- a/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp @@ -128,6 +128,7 @@ static String checkVariableAndGetVersion(const mysqlxx::Pool::Entry & connection bool first = true; std::stringstream error_message; + error_message.exceptions(std::ios::failbit); error_message << "Illegal MySQL variables, the MaterializeMySQL engine requires "; for (const auto & [variable_name, variable_error_message] : variables_error_message) { @@ -239,6 +240,7 @@ static inline BlockOutputStreamPtr getTableOutput(const String & database_name, const StoragePtr & storage = DatabaseCatalog::instance().getTable(StorageID(database_name, table_name), query_context); std::stringstream insert_columns_str; + insert_columns_str.exceptions(std::ios::failbit); const StorageInMemoryMetadata & storage_metadata = storage->getInMemoryMetadata(); const ColumnsDescription & storage_columns = storage_metadata.getColumns(); const NamesAndTypesList & insert_columns_names = insert_materialized ? storage_columns.getAllPhysical() : storage_columns.getOrdinary(); @@ -330,6 +332,7 @@ std::optional MaterializeMySQLSyncThread::prepareSynchroniz const auto & position_message = [&]() { std::stringstream ss; + ss.exceptions(std::ios::failbit); position.dump(ss); return ss.str(); }; @@ -372,6 +375,7 @@ void MaterializeMySQLSyncThread::flushBuffersData(Buffers & buffers, Materialize const auto & position_message = [&]() { std::stringstream ss; + ss.exceptions(std::ios::failbit); client.getPosition().dump(ss); return ss.str(); }; @@ -643,6 +647,7 @@ void MaterializeMySQLSyncThread::onEvent(Buffers & buffers, const BinlogEventPtr const auto & dump_event_message = [&]() { std::stringstream ss; + ss.exceptions(std::ios::failbit); receive_event->dump(ss); return ss.str(); }; diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.h b/src/Dictionaries/ComplexKeyCacheDictionary.h index e055d1a6b33..2663fee266d 100644 --- a/src/Dictionaries/ComplexKeyCacheDictionary.h +++ b/src/Dictionaries/ComplexKeyCacheDictionary.h @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include "DictionaryStructure.h" #include "IDictionary.h" diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp index fca56442e6f..4c7cc5b4118 100644 --- a/src/Dictionaries/DictionaryStructure.cpp +++ b/src/Dictionaries/DictionaryStructure.cpp @@ -231,6 +231,7 @@ std::string DictionaryStructure::getKeyDescription() const return "UInt64"; std::ostringstream out; + out.exceptions(std::ios::failbit); out << '('; diff --git a/src/Dictionaries/tests/gtest_dictionary_configuration.cpp b/src/Dictionaries/tests/gtest_dictionary_configuration.cpp index 453ce2b81f0..62422124bd8 100644 --- a/src/Dictionaries/tests/gtest_dictionary_configuration.cpp +++ b/src/Dictionaries/tests/gtest_dictionary_configuration.cpp @@ -19,6 +19,7 @@ static std::string configurationToString(const DictionaryConfigurationPtr & conf { const Poco::Util::XMLConfiguration * xml_config = dynamic_cast(config.get()); std::ostringstream oss; + oss.exceptions(std::ios::failbit); xml_config->save(oss); return oss.str(); } diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 4dc5b816420..9e04d717949 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -333,150 +333,6 @@ void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegm target = std::move(file_segmentation_engine); } -/// File Segmentation Engines for parallel reading - -void registerFileSegmentationEngineTabSeparated(FormatFactory & factory); -void registerFileSegmentationEngineCSV(FormatFactory & factory); -void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory); -void registerFileSegmentationEngineRegexp(FormatFactory & factory); -void registerFileSegmentationEngineJSONAsString(FormatFactory & factory); -void registerFileSegmentationEngineLineAsString(FormatFactory & factory); - -/// Formats for both input/output. - -void registerInputFormatNative(FormatFactory & factory); -void registerOutputFormatNative(FormatFactory & factory); - -void registerInputFormatProcessorNative(FormatFactory & factory); -void registerOutputFormatProcessorNative(FormatFactory & factory); -void registerInputFormatProcessorRowBinary(FormatFactory & factory); -void registerOutputFormatProcessorRowBinary(FormatFactory & factory); -void registerInputFormatProcessorTabSeparated(FormatFactory & factory); -void registerOutputFormatProcessorTabSeparated(FormatFactory & factory); -void registerInputFormatProcessorValues(FormatFactory & factory); -void registerOutputFormatProcessorValues(FormatFactory & factory); -void registerInputFormatProcessorCSV(FormatFactory & factory); -void registerOutputFormatProcessorCSV(FormatFactory & factory); -void registerInputFormatProcessorTSKV(FormatFactory & factory); -void registerOutputFormatProcessorTSKV(FormatFactory & factory); -void registerInputFormatProcessorJSONEachRow(FormatFactory & factory); -void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory); -void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory); -void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory); -void registerInputFormatProcessorProtobuf(FormatFactory & factory); -void registerOutputFormatProcessorProtobuf(FormatFactory & factory); -void registerInputFormatProcessorTemplate(FormatFactory & factory); -void registerOutputFormatProcessorTemplate(FormatFactory & factory); -void registerInputFormatProcessorMsgPack(FormatFactory & factory); -void registerOutputFormatProcessorMsgPack(FormatFactory & factory); -void registerInputFormatProcessorORC(FormatFactory & factory); -void registerOutputFormatProcessorORC(FormatFactory & factory); -void registerInputFormatProcessorParquet(FormatFactory & factory); -void registerOutputFormatProcessorParquet(FormatFactory & factory); -void registerInputFormatProcessorArrow(FormatFactory & factory); -void registerOutputFormatProcessorArrow(FormatFactory & factory); -void registerInputFormatProcessorAvro(FormatFactory & factory); -void registerOutputFormatProcessorAvro(FormatFactory & factory); -void registerInputFormatProcessorRawBLOB(FormatFactory & factory); -void registerOutputFormatProcessorRawBLOB(FormatFactory & factory); - -/// Output only (presentational) formats. - -void registerOutputFormatNull(FormatFactory & factory); - -void registerOutputFormatProcessorPretty(FormatFactory & factory); -void registerOutputFormatProcessorPrettyCompact(FormatFactory & factory); -void registerOutputFormatProcessorPrettySpace(FormatFactory & factory); -void registerOutputFormatProcessorVertical(FormatFactory & factory); -void registerOutputFormatProcessorJSON(FormatFactory & factory); -void registerOutputFormatProcessorJSONCompact(FormatFactory & factory); -void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factory); -void registerOutputFormatProcessorXML(FormatFactory & factory); -void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory); -void registerOutputFormatProcessorNull(FormatFactory & factory); -void registerOutputFormatProcessorMySQLWire(FormatFactory & factory); -void registerOutputFormatProcessorMarkdown(FormatFactory & factory); -void registerOutputFormatProcessorPostgreSQLWire(FormatFactory & factory); - -/// Input only formats. - -void registerInputFormatProcessorRegexp(FormatFactory & factory); -void registerInputFormatProcessorJSONAsString(FormatFactory & factory); -void registerInputFormatProcessorLineAsString(FormatFactory & factory); -void registerInputFormatProcessorCapnProto(FormatFactory & factory); - -FormatFactory::FormatFactory() -{ - registerFileSegmentationEngineTabSeparated(*this); - registerFileSegmentationEngineCSV(*this); - registerFileSegmentationEngineJSONEachRow(*this); - registerFileSegmentationEngineRegexp(*this); - registerFileSegmentationEngineJSONAsString(*this); - registerFileSegmentationEngineLineAsString(*this); - - registerInputFormatNative(*this); - registerOutputFormatNative(*this); - - registerInputFormatProcessorNative(*this); - registerOutputFormatProcessorNative(*this); - registerInputFormatProcessorRowBinary(*this); - registerOutputFormatProcessorRowBinary(*this); - registerInputFormatProcessorTabSeparated(*this); - registerOutputFormatProcessorTabSeparated(*this); - registerInputFormatProcessorValues(*this); - registerOutputFormatProcessorValues(*this); - registerInputFormatProcessorCSV(*this); - registerOutputFormatProcessorCSV(*this); - registerInputFormatProcessorTSKV(*this); - registerOutputFormatProcessorTSKV(*this); - registerInputFormatProcessorJSONEachRow(*this); - registerOutputFormatProcessorJSONEachRow(*this); - registerInputFormatProcessorJSONCompactEachRow(*this); - registerOutputFormatProcessorJSONCompactEachRow(*this); - registerInputFormatProcessorProtobuf(*this); - registerOutputFormatProcessorProtobuf(*this); - registerInputFormatProcessorTemplate(*this); - registerOutputFormatProcessorTemplate(*this); - registerInputFormatProcessorMsgPack(*this); - registerOutputFormatProcessorMsgPack(*this); - registerInputFormatProcessorRawBLOB(*this); - registerOutputFormatProcessorRawBLOB(*this); - -#if !defined(ARCADIA_BUILD) - registerInputFormatProcessorORC(*this); - registerOutputFormatProcessorORC(*this); - registerInputFormatProcessorParquet(*this); - registerOutputFormatProcessorParquet(*this); - registerInputFormatProcessorArrow(*this); - registerOutputFormatProcessorArrow(*this); - registerInputFormatProcessorAvro(*this); - registerOutputFormatProcessorAvro(*this); -#endif - - registerOutputFormatNull(*this); - - registerOutputFormatProcessorPretty(*this); - registerOutputFormatProcessorPrettyCompact(*this); - registerOutputFormatProcessorPrettySpace(*this); - registerOutputFormatProcessorVertical(*this); - registerOutputFormatProcessorJSON(*this); - registerOutputFormatProcessorJSONCompact(*this); - registerOutputFormatProcessorJSONEachRowWithProgress(*this); - registerOutputFormatProcessorXML(*this); - registerOutputFormatProcessorODBCDriver2(*this); - registerOutputFormatProcessorNull(*this); - registerOutputFormatProcessorMySQLWire(*this); - registerOutputFormatProcessorMarkdown(*this); - registerOutputFormatProcessorPostgreSQLWire(*this); - - registerInputFormatProcessorRegexp(*this); - registerInputFormatProcessorJSONAsString(*this); - registerInputFormatProcessorLineAsString(*this); - -#if !defined(ARCADIA_BUILD) - registerInputFormatProcessorCapnProto(*this); -#endif -} FormatFactory & FormatFactory::instance() { diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index d49414e3944..dbf6a3d65b2 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -96,7 +96,6 @@ private: using FormatsDictionary = std::unordered_map; public: - static FormatFactory & instance(); BlockInputStreamPtr getInput( @@ -137,8 +136,6 @@ public: private: FormatsDictionary dict; - FormatFactory(); - const Creators & getCreators(const String & name) const; }; diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp new file mode 100644 index 00000000000..96b2c4ee384 --- /dev/null +++ b/src/Formats/registerFormats.cpp @@ -0,0 +1,160 @@ +#if !defined(ARCADIA_BUILD) +# include +#endif + +#include + + +namespace DB +{ + +/// File Segmentation Engines for parallel reading + +void registerFileSegmentationEngineTabSeparated(FormatFactory & factory); +void registerFileSegmentationEngineCSV(FormatFactory & factory); +void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory); +void registerFileSegmentationEngineRegexp(FormatFactory & factory); +void registerFileSegmentationEngineJSONAsString(FormatFactory & factory); +void registerFileSegmentationEngineLineAsString(FormatFactory & factory); + +/// Formats for both input/output. + +void registerInputFormatNative(FormatFactory & factory); +void registerOutputFormatNative(FormatFactory & factory); + +void registerInputFormatProcessorNative(FormatFactory & factory); +void registerOutputFormatProcessorNative(FormatFactory & factory); +void registerInputFormatProcessorRowBinary(FormatFactory & factory); +void registerOutputFormatProcessorRowBinary(FormatFactory & factory); +void registerInputFormatProcessorTabSeparated(FormatFactory & factory); +void registerOutputFormatProcessorTabSeparated(FormatFactory & factory); +void registerInputFormatProcessorValues(FormatFactory & factory); +void registerOutputFormatProcessorValues(FormatFactory & factory); +void registerInputFormatProcessorCSV(FormatFactory & factory); +void registerOutputFormatProcessorCSV(FormatFactory & factory); +void registerInputFormatProcessorTSKV(FormatFactory & factory); +void registerOutputFormatProcessorTSKV(FormatFactory & factory); +void registerInputFormatProcessorJSONEachRow(FormatFactory & factory); +void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory); +void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory); +void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory); +void registerInputFormatProcessorProtobuf(FormatFactory & factory); +void registerOutputFormatProcessorProtobuf(FormatFactory & factory); +void registerInputFormatProcessorTemplate(FormatFactory & factory); +void registerOutputFormatProcessorTemplate(FormatFactory & factory); +void registerInputFormatProcessorMsgPack(FormatFactory & factory); +void registerOutputFormatProcessorMsgPack(FormatFactory & factory); +void registerInputFormatProcessorORC(FormatFactory & factory); +void registerOutputFormatProcessorORC(FormatFactory & factory); +void registerInputFormatProcessorParquet(FormatFactory & factory); +void registerOutputFormatProcessorParquet(FormatFactory & factory); +void registerInputFormatProcessorArrow(FormatFactory & factory); +void registerOutputFormatProcessorArrow(FormatFactory & factory); +void registerInputFormatProcessorAvro(FormatFactory & factory); +void registerOutputFormatProcessorAvro(FormatFactory & factory); +void registerInputFormatProcessorRawBLOB(FormatFactory & factory); +void registerOutputFormatProcessorRawBLOB(FormatFactory & factory); + +/// Output only (presentational) formats. + +void registerOutputFormatNull(FormatFactory & factory); + +void registerOutputFormatProcessorPretty(FormatFactory & factory); +void registerOutputFormatProcessorPrettyCompact(FormatFactory & factory); +void registerOutputFormatProcessorPrettySpace(FormatFactory & factory); +void registerOutputFormatProcessorVertical(FormatFactory & factory); +void registerOutputFormatProcessorJSON(FormatFactory & factory); +void registerOutputFormatProcessorJSONCompact(FormatFactory & factory); +void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factory); +void registerOutputFormatProcessorXML(FormatFactory & factory); +void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory); +void registerOutputFormatProcessorNull(FormatFactory & factory); +void registerOutputFormatProcessorMySQLWire(FormatFactory & factory); +void registerOutputFormatProcessorMarkdown(FormatFactory & factory); +void registerOutputFormatProcessorPostgreSQLWire(FormatFactory & factory); + +/// Input only formats. + +void registerInputFormatProcessorRegexp(FormatFactory & factory); +void registerInputFormatProcessorJSONAsString(FormatFactory & factory); +void registerInputFormatProcessorLineAsString(FormatFactory & factory); +void registerInputFormatProcessorCapnProto(FormatFactory & factory); + + +void registerFormats() +{ + auto & factory = FormatFactory::instance(); + + registerFileSegmentationEngineTabSeparated(factory); + registerFileSegmentationEngineCSV(factory); + registerFileSegmentationEngineJSONEachRow(factory); + registerFileSegmentationEngineRegexp(factory); + registerFileSegmentationEngineJSONAsString(factory); + registerFileSegmentationEngineLineAsString(factory); + + registerInputFormatNative(factory); + registerOutputFormatNative(factory); + + registerInputFormatProcessorNative(factory); + registerOutputFormatProcessorNative(factory); + registerInputFormatProcessorRowBinary(factory); + registerOutputFormatProcessorRowBinary(factory); + registerInputFormatProcessorTabSeparated(factory); + registerOutputFormatProcessorTabSeparated(factory); + registerInputFormatProcessorValues(factory); + registerOutputFormatProcessorValues(factory); + registerInputFormatProcessorCSV(factory); + registerOutputFormatProcessorCSV(factory); + registerInputFormatProcessorTSKV(factory); + registerOutputFormatProcessorTSKV(factory); + registerInputFormatProcessorJSONEachRow(factory); + registerOutputFormatProcessorJSONEachRow(factory); + registerInputFormatProcessorJSONCompactEachRow(factory); + registerOutputFormatProcessorJSONCompactEachRow(factory); + registerInputFormatProcessorProtobuf(factory); + registerOutputFormatProcessorProtobuf(factory); + registerInputFormatProcessorTemplate(factory); + registerOutputFormatProcessorTemplate(factory); + registerInputFormatProcessorMsgPack(factory); + registerOutputFormatProcessorMsgPack(factory); + registerInputFormatProcessorRawBLOB(factory); + registerOutputFormatProcessorRawBLOB(factory); + +#if !defined(ARCADIA_BUILD) + registerInputFormatProcessorORC(factory); + registerOutputFormatProcessorORC(factory); + registerInputFormatProcessorParquet(factory); + registerOutputFormatProcessorParquet(factory); + registerInputFormatProcessorArrow(factory); + registerOutputFormatProcessorArrow(factory); + registerInputFormatProcessorAvro(factory); + registerOutputFormatProcessorAvro(factory); +#endif + + registerOutputFormatNull(factory); + + registerOutputFormatProcessorPretty(factory); + registerOutputFormatProcessorPrettyCompact(factory); + registerOutputFormatProcessorPrettySpace(factory); + registerOutputFormatProcessorVertical(factory); + registerOutputFormatProcessorJSON(factory); + registerOutputFormatProcessorJSONCompact(factory); + registerOutputFormatProcessorJSONEachRowWithProgress(factory); + registerOutputFormatProcessorXML(factory); + registerOutputFormatProcessorODBCDriver2(factory); + registerOutputFormatProcessorNull(factory); + registerOutputFormatProcessorMySQLWire(factory); + registerOutputFormatProcessorMarkdown(factory); + registerOutputFormatProcessorPostgreSQLWire(factory); + + registerInputFormatProcessorRegexp(factory); + registerInputFormatProcessorJSONAsString(factory); + registerInputFormatProcessorLineAsString(factory); + +#if !defined(ARCADIA_BUILD) + registerInputFormatProcessorCapnProto(factory); +#endif +} + +} + diff --git a/src/Formats/registerFormats.h b/src/Formats/registerFormats.h new file mode 100644 index 00000000000..e4ff79248d0 --- /dev/null +++ b/src/Formats/registerFormats.h @@ -0,0 +1,9 @@ +#pragma once + +namespace DB +{ + +void registerFormats(); + +} + diff --git a/src/Formats/ya.make b/src/Formats/ya.make index 8e797de39f8..2dc3adc021d 100644 --- a/src/Formats/ya.make +++ b/src/Formats/ya.make @@ -22,6 +22,7 @@ SRCS( ProtobufReader.cpp ProtobufSchemas.cpp ProtobufWriter.cpp + registerFormats.cpp verbosePrintString.cpp ) diff --git a/src/Functions/abtesting.cpp b/src/Functions/abtesting.cpp index c57119d1c34..051b1f6f0ef 100644 --- a/src/Functions/abtesting.cpp +++ b/src/Functions/abtesting.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #define STATS_ENABLE_STDVEC_WRAPPERS #include @@ -139,31 +139,29 @@ Variants bayesian_ab_test(String distribution, PODArray & xs, PODArray< String convertToJson(const PODArray & variant_names, const Variants & variants) { FormatSettings settings; - std::stringstream s; + WriteBufferFromOwnString buf; + + writeCString("{\"data\":[", buf); + for (size_t i = 0; i < variants.size(); ++i) { - WriteBufferFromOStream buf(s); - - writeCString("{\"data\":[", buf); - for (size_t i = 0; i < variants.size(); ++i) - { - writeCString("{\"variant_name\":", buf); - writeJSONString(variant_names[i], buf, settings); - writeCString(",\"x\":", buf); - writeText(variants[i].x, buf); - writeCString(",\"y\":", buf); - writeText(variants[i].y, buf); - writeCString(",\"beats_control\":", buf); - writeText(variants[i].beats_control, buf); - writeCString(",\"to_be_best\":", buf); - writeText(variants[i].best, buf); - writeCString("}", buf); - if (i != variant_names.size() -1) writeCString(",", buf); - } - writeCString("]}", buf); + writeCString("{\"variant_name\":", buf); + writeJSONString(variant_names[i], buf, settings); + writeCString(",\"x\":", buf); + writeText(variants[i].x, buf); + writeCString(",\"y\":", buf); + writeText(variants[i].y, buf); + writeCString(",\"beats_control\":", buf); + writeText(variants[i].beats_control, buf); + writeCString(",\"to_be_best\":", buf); + writeText(variants[i].best, buf); + writeCString("}", buf); + if (i != variant_names.size() -1) + writeCString(",", buf); } + writeCString("]}", buf); - return s.str(); + return buf.str(); } class FunctionBayesAB : public IFunction diff --git a/src/Functions/tests/gtest_abtesting.cpp b/src/Functions/tests/gtest_abtesting.cpp index b388a187479..e7ef5b5c3cf 100644 --- a/src/Functions/tests/gtest_abtesting.cpp +++ b/src/Functions/tests/gtest_abtesting.cpp @@ -10,39 +10,44 @@ Variants test_bayesab(std::string dist, PODArray xs, PODArray { Variants variants; - std::cout << std::fixed; + //std::cout << std::fixed; if (dist == "beta") { - std::cout << dist << "\nclicks: "; - for (auto x : xs) std::cout << x << " "; +/* std::cout << dist << "\nclicks: "; + for (auto x : xs) + std::cout << x << " "; std::cout <<"\tconversions: "; - for (auto y : ys) std::cout << y << " "; + for (auto y : ys) + std::cout << y << " "; - std::cout << "\n"; + std::cout << "\n";*/ variants = bayesian_ab_test(dist, xs, ys); } else if (dist == "gamma") { - std::cout << dist << "\nclicks: "; - for (auto x : xs) std::cout << x << " "; +/* std::cout << dist << "\nclicks: "; + for (auto x : xs) + std::cout << x << " "; std::cout <<"\tcost: "; - for (auto y : ys) std::cout << y << " "; + for (auto y : ys) + std::cout << y << " "; + + std::cout << "\n";*/ - std::cout << "\n"; variants = bayesian_ab_test(dist, xs, ys); } - for (size_t i = 0; i < variants.size(); ++i) +/* for (size_t i = 0; i < variants.size(); ++i) std::cout << i << " beats 0: " << variants[i].beats_control << std::endl; for (size_t i = 0; i < variants.size(); ++i) std::cout << i << " to be best: " << variants[i].best << std::endl; std::cout << convertToJson({"0", "1", "2"}, variants) << std::endl; - +*/ Float64 max_val = 0.0, min_val = 2.0; for (size_t i = 0; i < variants.size(); ++i) { diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 6b7f30cd9b6..04fec145775 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -20,6 +20,7 @@ # include # include # include +# include #endif #include @@ -68,27 +69,27 @@ namespace throw Exception("Unsupported scheme in URI '" + uri.toString() + "'", ErrorCodes::UNSUPPORTED_URI_SCHEME); } - HTTPSessionPtr makeHTTPSessionImpl(const std::string & host, UInt16 port, bool https, bool keep_alive, bool resolve_host=true) + HTTPSessionPtr makeHTTPSessionImpl(const std::string & host, UInt16 port, bool https, bool keep_alive, bool resolve_host = true) { HTTPSessionPtr session; if (https) + { #if USE_SSL - session = std::make_shared(); + /// Cannot resolve host in advance, otherwise SNI won't work in Poco. + session = std::make_shared(host, port); #else throw Exception("ClickHouse was built without HTTPS support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); #endif + } else - session = std::make_shared(); + { + String resolved_host = resolve_host ? DNSResolver::instance().resolveHost(host).toString() : host; + session = std::make_shared(resolved_host, port); + } ProfileEvents::increment(ProfileEvents::CreatedHTTPConnections); - if (resolve_host) - session->setHost(DNSResolver::instance().resolveHost(host).toString()); - else - session->setHost(host); - session->setPort(port); - /// doesn't work properly without patch #if defined(POCO_CLICKHOUSE_PATCH) session->setKeepAlive(keep_alive); @@ -239,6 +240,7 @@ void assertResponseIsOk(const Poco::Net::HTTPRequest & request, Poco::Net::HTTPR if (!(status == Poco::Net::HTTPResponse::HTTP_OK || (isRedirect(status) && allow_redirects))) { std::stringstream error_message; + error_message.exceptions(std::ios::failbit); error_message << "Received error from remote server " << request.getURI() << ". HTTP status code: " << status << " " << response.getReason() << ", body: " << istr.rdbuf(); diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h index 66764b1c805..db0abe8fc6e 100644 --- a/src/IO/HTTPCommon.h +++ b/src/IO/HTTPCommon.h @@ -13,6 +13,7 @@ #include + namespace Poco { namespace Net @@ -24,6 +25,7 @@ namespace Net namespace DB { + constexpr int HTTP_TOO_MANY_REQUESTS = 429; class SingleEndpointHTTPSessionPool : public PoolBase @@ -39,6 +41,7 @@ private: public: SingleEndpointHTTPSessionPool(const std::string & host_, UInt16 port_, bool https_, size_t max_pool_size_); }; + using PooledHTTPSessionPtr = SingleEndpointHTTPSessionPool::Entry; using HTTPSessionPtr = std::shared_ptr; @@ -59,5 +62,7 @@ bool isRedirect(const Poco::Net::HTTPResponse::HTTPStatus status); */ std::istream * receiveResponse( Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, bool allow_redirects); -void assertResponseIsOk(const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, const bool allow_redirects = false); + +void assertResponseIsOk( + const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, const bool allow_redirects = false); } diff --git a/src/IO/MySQLPacketPayloadReadBuffer.cpp b/src/IO/MySQLPacketPayloadReadBuffer.cpp index 16b1cd5de19..f6f899e0ac7 100644 --- a/src/IO/MySQLPacketPayloadReadBuffer.cpp +++ b/src/IO/MySQLPacketPayloadReadBuffer.cpp @@ -27,20 +27,14 @@ bool MySQLPacketPayloadReadBuffer::nextImpl() in.readStrict(reinterpret_cast(&payload_length), 3); if (payload_length > MAX_PACKET_LENGTH) - { - std::ostringstream tmp; - tmp << "Received packet with payload larger than max_packet_size: " << payload_length; - throw Exception(tmp.str(), ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT); - } + throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT, + "Received packet with payload larger than max_packet_size: {}", payload_length); size_t packet_sequence_id = 0; in.read(reinterpret_cast(packet_sequence_id)); if (packet_sequence_id != sequence_id) - { - std::ostringstream tmp; - tmp << "Received packet with wrong sequence-id: " << packet_sequence_id << ". Expected: " << static_cast(sequence_id) << '.'; - throw Exception(tmp.str(), ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT); - } + throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT, + "Received packet with wrong sequence-id: {}. Expected: {}.", packet_sequence_id, static_cast(sequence_id)); sequence_id++; if (payload_length == 0) diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp index acd0414ef5d..5c66c3209f6 100644 --- a/src/IO/ReadBufferFromPocoSocket.cpp +++ b/src/IO/ReadBufferFromPocoSocket.cpp @@ -67,7 +67,7 @@ ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds) { - return offset() != buffer().size() || socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR); + return available() || socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR); } } diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp deleted file mode 100644 index 4d046bfe2c6..00000000000 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index ee6fcc58ab0..267800d8900 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -72,10 +72,7 @@ public: } else { - std::stringstream error_message; - error_message << "Too many redirects while trying to access " << initial_uri.toString(); - - throw Exception(error_message.str(), ErrorCodes::TOO_MANY_REDIRECTS); + throw Exception(ErrorCodes::TOO_MANY_REDIRECTS, "Too many redirects while trying to access {}", initial_uri.toString()); } } diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index a649b76730b..49ccb6dc1b3 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -248,6 +248,7 @@ void PocoHTTPClient::makeRequestInternal( response->SetContentType(poco_response.getContentType()); std::stringstream headers_ss; + headers_ss.exceptions(std::ios::failbit); for (const auto & [header_name, header_value] : poco_response) { response->AddHeader(header_name, header_value); diff --git a/src/IO/tests/gtest_bit_io.cpp b/src/IO/tests/gtest_bit_io.cpp index f75abf92f30..dce146eaad7 100644 --- a/src/IO/tests/gtest_bit_io.cpp +++ b/src/IO/tests/gtest_bit_io.cpp @@ -77,6 +77,7 @@ std::string dumpContents(const T& container, { std::stringstream sstr; + sstr.exceptions(std::ios::failbit); dumpBuffer(std::begin(container), std::end(container), &sstr, col_sep, row_sep, cols_in_row); return sstr.str(); diff --git a/src/IO/tests/hashing_read_buffer.cpp b/src/IO/tests/hashing_read_buffer.cpp index dbec6b2374b..a1140160c32 100644 --- a/src/IO/tests/hashing_read_buffer.cpp +++ b/src/IO/tests/hashing_read_buffer.cpp @@ -23,6 +23,7 @@ static void test(size_t data_size) { std::cout << "block size " << read_buffer_block_size << std::endl; std::stringstream io; + io.exceptions(std::ios::failbit); DB::WriteBufferFromOStream out_impl(io); DB::HashingWriteBuffer out(out_impl); out.write(data, data_size); diff --git a/src/IO/tests/limit_read_buffer2.cpp b/src/IO/tests/limit_read_buffer2.cpp index 826fb048a0c..416eae0966b 100644 --- a/src/IO/tests/limit_read_buffer2.cpp +++ b/src/IO/tests/limit_read_buffer2.cpp @@ -21,6 +21,7 @@ try using namespace DB; std::stringstream s; + s.exceptions(std::ios::failbit); { std::string src = "1"; diff --git a/src/IO/tests/write_buffer.cpp b/src/IO/tests/write_buffer.cpp index 14beb6d0539..c0e9150d372 100644 --- a/src/IO/tests/write_buffer.cpp +++ b/src/IO/tests/write_buffer.cpp @@ -17,6 +17,7 @@ int main(int, char **) DB::String d = "'xyz\\"; std::stringstream s; + s.exceptions(std::ios::failbit); { DB::WriteBufferFromOStream out(s); diff --git a/src/IO/ya.make b/src/IO/ya.make index d20c01faaa3..f935a00f385 100644 --- a/src/IO/ya.make +++ b/src/IO/ya.make @@ -38,7 +38,6 @@ SRCS( ReadBufferFromMemory.cpp ReadBufferFromPocoSocket.cpp ReadHelpers.cpp - ReadWriteBufferFromHTTP.cpp SeekAvoidingReadBuffer.cpp UseSSL.cpp WriteBufferAIO.cpp diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 9c2766ae7d6..218502e7f43 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -46,6 +46,14 @@ inline bool isLocalImpl(const Cluster::Address & address, const Poco::Net::Socke return address.default_database.empty() && isLocalAddress(resolved_address, clickhouse_port); } +void concatInsertPath(std::string & insert_path, const std::string & dir_name) +{ + if (insert_path.empty()) + insert_path = dir_name; + else + insert_path += "," + dir_name; +} + } /// Implementation of Cluster::Address class @@ -358,9 +366,7 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, bool internal_replication = config.getBool(partial_prefix + ".internal_replication", false); - /// In case of internal_replication we will be appending names to dir_name_for_internal_replication - std::string dir_name_for_internal_replication; - std::string dir_name_for_internal_replication_with_local; + ShardInfoInsertPathForInternalReplication insert_paths; for (const auto & replica_key : replica_keys) { @@ -379,18 +385,20 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, if (internal_replication) { - auto dir_name = replica_addresses.back().toFullString(settings.use_compact_format_in_distributed_parts_names); - if (!replica_addresses.back().is_local) + /// use_compact_format=0 { - if (dir_name_for_internal_replication.empty()) - dir_name_for_internal_replication = dir_name; - else - dir_name_for_internal_replication += "," + dir_name; + auto dir_name = replica_addresses.back().toFullString(false /* use_compact_format */); + if (!replica_addresses.back().is_local) + concatInsertPath(insert_paths.prefer_localhost_replica, dir_name); + concatInsertPath(insert_paths.no_prefer_localhost_replica, dir_name); + } + /// use_compact_format=1 + { + auto dir_name = replica_addresses.back().toFullString(true /* use_compact_format */); + if (!replica_addresses.back().is_local) + concatInsertPath(insert_paths.prefer_localhost_replica_compact, dir_name); + concatInsertPath(insert_paths.no_prefer_localhost_replica_compact, dir_name); } - if (dir_name_for_internal_replication_with_local.empty()) - dir_name_for_internal_replication_with_local = dir_name; - else - dir_name_for_internal_replication_with_local += "," + dir_name; } } else @@ -425,8 +433,7 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size()); shards_info.push_back({ - std::move(dir_name_for_internal_replication), - std::move(dir_name_for_internal_replication_with_local), + std::move(insert_paths), current_shard_num, weight, std::move(shard_local_addresses), @@ -485,8 +492,7 @@ Cluster::Cluster(const Settings & settings, const std::vector; using AddressesWithFailover = std::vector; + /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication + /// + /// Contains different path for permutations of: + /// - prefer_localhost_replica + /// Notes with prefer_localhost_replica==0 will contains local nodes. + /// - use_compact_format_in_distributed_parts_names + /// See toFullString() + /// + /// This is cached to avoid looping by replicas in insertPathForInternalReplication(). + struct ShardInfoInsertPathForInternalReplication + { + /// prefer_localhost_replica == 1 && use_compact_format_in_distributed_parts_names=0 + std::string prefer_localhost_replica; + /// prefer_localhost_replica == 0 && use_compact_format_in_distributed_parts_names=0 + std::string no_prefer_localhost_replica; + /// prefer_localhost_replica == 1 && use_compact_format_in_distributed_parts_names=1 + std::string prefer_localhost_replica_compact; + /// prefer_localhost_replica == 0 && use_compact_format_in_distributed_parts_names=1 + std::string no_prefer_localhost_replica_compact; + }; + struct ShardInfo { public: @@ -141,13 +162,10 @@ public: size_t getLocalNodeCount() const { return local_addresses.size(); } bool hasInternalReplication() const { return has_internal_replication; } /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication - const std::string & pathForInsert(bool prefer_localhost_replica) const; + const std::string & insertPathForInternalReplication(bool prefer_localhost_replica, bool use_compact_format) const; public: - /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication && prefer_localhost_replica - std::string dir_name_for_internal_replication; - /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication && !prefer_localhost_replica - std::string dir_name_for_internal_replication_with_local; + ShardInfoInsertPathForInternalReplication insert_path_for_internal_replication; /// Number of the shard, the indexation begins with 1 UInt32 shard_num = 0; UInt32 weight = 1; diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 7d99cbd1d43..9e64695d1a0 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -107,6 +107,7 @@ String formattedAST(const ASTPtr & ast) if (!ast) return {}; std::stringstream ss; + ss.exceptions(std::ios::failbit); formatAST(*ast, ss, false, true); return ss.str(); } diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index eb636395fd9..24d819ad2d7 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -4,9 +4,10 @@ #include #include #include -#include #include +#include #include +#include namespace DB @@ -81,16 +82,17 @@ Context updateSettingsForCluster(const Cluster & cluster, const Context & contex } Pipe executeQuery( - IStreamFactory & stream_factory, const ClusterPtr & cluster, Poco::Logger * log, - const ASTPtr & query_ast, const Context & context, const Settings & settings, const SelectQueryInfo & query_info) + IStreamFactory & stream_factory, Poco::Logger * log, + const ASTPtr & query_ast, const Context & context, const SelectQueryInfo & query_info) { assert(log); Pipes res; + const Settings & settings = context.getSettingsRef(); const std::string query = queryToString(query_ast); - Context new_context = updateSettingsForCluster(*cluster, context, settings, log); + Context new_context = updateSettingsForCluster(*query_info.cluster, context, settings, log); ThrottlerPtr user_level_throttler; if (auto * process_list_element = context.getProcessListElement()) @@ -109,7 +111,7 @@ Pipe executeQuery( else throttler = user_level_throttler; - for (const auto & shard_info : cluster->getShardsInfo()) + for (const auto & shard_info : query_info.cluster->getShardsInfo()) stream_factory.createForShard(shard_info, query, query_ast, new_context, throttler, query_info, res); return Pipe::unitePipes(std::move(res)); diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index 3a3fca4d54c..e5d4aee2eb9 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -1,7 +1,6 @@ #pragma once #include -#include namespace DB { @@ -33,8 +32,7 @@ Context updateSettingsForCluster(const Cluster & cluster, const Context & contex /// `stream_factory` object encapsulates the logic of creating streams for a different type of query /// (currently SELECT, DESCRIBE). Pipe executeQuery( - IStreamFactory & stream_factory, const ClusterPtr & cluster, Poco::Logger * log, - const ASTPtr & query_ast, const Context & context, const Settings & settings, const SelectQueryInfo & query_info); + IStreamFactory & stream_factory, Poco::Logger * log, const ASTPtr & query_ast, const Context & context, const SelectQueryInfo & query_info); } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index befb097faf7..a22eacd9a49 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -65,6 +65,7 @@ #include #include + namespace ProfileEvents { extern const Event ContextLock; @@ -153,7 +154,7 @@ public: } else if (it->second->key.first != context.client_info.current_user) { - throw Exception("Session belongs to a different user", ErrorCodes::LOGICAL_ERROR); + throw Exception("Session belongs to a different user", ErrorCodes::SESSION_IS_LOCKED); } /// Use existing session. @@ -596,7 +597,8 @@ VolumePtr Context::setTemporaryStorage(const String & path, const String & polic { StoragePolicyPtr tmp_policy = getStoragePolicySelector(lock)->get(policy_name); if (tmp_policy->getVolumes().size() != 1) - throw Exception("Policy " + policy_name + " is used temporary files, such policy should have exactly one volume", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception("Policy " + policy_name + " is used temporary files, such policy should have exactly one volume", + ErrorCodes::NO_ELEMENTS_IN_CONFIG); shared->tmp_volume = tmp_policy->getVolume(0); } @@ -1083,11 +1085,13 @@ String Context::getInitialQueryId() const void Context::setCurrentDatabaseNameInGlobalContext(const String & name) { if (global_context != this) - throw Exception("Cannot set current database for non global context, this method should be used during server initialization", ErrorCodes::LOGICAL_ERROR); + throw Exception("Cannot set current database for non global context, this method should be used during server initialization", + ErrorCodes::LOGICAL_ERROR); auto lock = getLock(); if (!current_database.empty()) - throw Exception("Default database name cannot be changed in global context without server restart", ErrorCodes::LOGICAL_ERROR); + throw Exception("Default database name cannot be changed in global context without server restart", + ErrorCodes::LOGICAL_ERROR); current_database = name; } @@ -1470,7 +1474,7 @@ DDLWorker & Context::getDDLWorker() const { auto lock = getLock(); if (!shared->ddl_worker) - throw Exception("DDL background thread is not initialized.", ErrorCodes::LOGICAL_ERROR); + throw Exception("DDL background thread is not initialized.", ErrorCodes::NO_ELEMENTS_IN_CONFIG); return *shared->ddl_worker; } @@ -1962,6 +1966,7 @@ void Context::checkCanBeDropped(const String & database, const String & table, c String size_str = formatReadableSizeWithDecimalSuffix(size); String max_size_to_drop_str = formatReadableSizeWithDecimalSuffix(max_size_to_drop); std::stringstream ostr; + ostr.exceptions(std::ios::failbit); ostr << "Table or Partition in " << backQuoteIfNeed(database) << "." << backQuoteIfNeed(table) << " was not dropped.\n" << "Reason:\n" diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 4d006165767..aebf713a4ca 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -348,6 +348,7 @@ std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & colum std::string ExpressionActions::dumpActions() const { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << "input:\n"; for (const auto & input_column : required_columns) @@ -657,6 +658,7 @@ void ExpressionActionsChain::finalize() std::string ExpressionActionsChain::dumpChain() const { std::stringstream ss; + ss.exceptions(std::ios::failbit); for (size_t i = 0; i < steps.size(); ++i) { diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 6a8bdbea1ec..286d5269a64 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -136,6 +136,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) { /// Currently, there are no database engines, that support any arguments. std::stringstream ostr; + ostr.exceptions(std::ios::failbit); formatAST(*create.storage, ostr, false, false); throw Exception("Unknown database engine: " + ostr.str(), ErrorCodes::UNKNOWN_DATABASE_ENGINE); } @@ -182,6 +183,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) create.if_not_exists = false; std::ostringstream statement_stream; + statement_stream.exceptions(std::ios::failbit); formatAST(create, statement_stream, false); statement_stream << '\n'; String statement = statement_stream.str(); diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index a0a63dfed08..ed791f0d592 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -223,6 +223,7 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl() MutableColumns res_columns = sample_block.cloneEmptyColumns(); std::stringstream ss; + ss.exceptions(std::ios::failbit); if (ast.getKind() == ASTExplainQuery::ParsedAST) { diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 74b060fa1b7..f7cc0ffb927 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -495,8 +495,10 @@ BlockIO InterpreterSelectQuery::execute() Block InterpreterSelectQuery::getSampleBlockImpl() { + query_info.query = query_ptr; + if (storage && !options.only_analyze) - from_stage = storage->getQueryProcessingStage(*context, options.to_stage, query_ptr); + from_stage = storage->getQueryProcessingStage(*context, options.to_stage, query_info); /// Do I need to perform the first part of the pipeline - running on remote servers during distributed processing. bool first_stage = from_stage < QueryProcessingStage::WithMergeableState @@ -1427,7 +1429,6 @@ void InterpreterSelectQuery::executeFetchColumns( if (max_streams > 1 && !is_remote) max_streams *= settings.max_streams_to_max_threads_ratio; - query_info.query = query_ptr; query_info.syntax_analyzer_result = syntax_analyzer_result; query_info.sets = query_analyzer->getPreparedSets(); diff --git a/src/Interpreters/InterpreterShowAccessQuery.cpp b/src/Interpreters/InterpreterShowAccessQuery.cpp index c9541b4f5bf..5f28c49c0bc 100644 --- a/src/Interpreters/InterpreterShowAccessQuery.cpp +++ b/src/Interpreters/InterpreterShowAccessQuery.cpp @@ -35,6 +35,7 @@ BlockInputStreamPtr InterpreterShowAccessQuery::executeImpl() const /// Build the result column. MutableColumnPtr column = ColumnString::create(); std::stringstream ss; + ss.exceptions(std::ios::failbit); for (const auto & query : queries) { ss.str(""); diff --git a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp index 8d5f27e116d..749a5811e13 100644 --- a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp @@ -239,6 +239,7 @@ BlockInputStreamPtr InterpreterShowCreateAccessEntityQuery::executeImpl() /// Build the result column. MutableColumnPtr column = ColumnString::create(); std::stringstream create_query_ss; + create_query_ss.exceptions(std::ios::failbit); for (const auto & create_query : create_queries) { formatAST(*create_query, create_query_ss, false, true); @@ -248,6 +249,7 @@ BlockInputStreamPtr InterpreterShowCreateAccessEntityQuery::executeImpl() /// Prepare description of the result column. std::stringstream desc_ss; + desc_ss.exceptions(std::ios::failbit); const auto & show_query = query_ptr->as(); formatAST(show_query, desc_ss, false, true); String desc = desc_ss.str(); diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index b14baaafbb9..8861914a68a 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -79,6 +79,7 @@ BlockInputStreamPtr InterpreterShowCreateQuery::executeImpl() } std::stringstream stream; + stream.exceptions(std::ios::failbit); formatAST(*create_query, stream, false, false); String res = stream.str(); diff --git a/src/Interpreters/InterpreterShowGrantsQuery.cpp b/src/Interpreters/InterpreterShowGrantsQuery.cpp index 45e065dcfd9..7de51b6a7ee 100644 --- a/src/Interpreters/InterpreterShowGrantsQuery.cpp +++ b/src/Interpreters/InterpreterShowGrantsQuery.cpp @@ -119,6 +119,7 @@ BlockInputStreamPtr InterpreterShowGrantsQuery::executeImpl() /// Build the result column. MutableColumnPtr column = ColumnString::create(); std::stringstream grant_ss; + grant_ss.exceptions(std::ios::failbit); for (const auto & grant_query : grant_queries) { grant_ss.str(""); @@ -128,6 +129,7 @@ BlockInputStreamPtr InterpreterShowGrantsQuery::executeImpl() /// Prepare description of the result column. std::stringstream desc_ss; + desc_ss.exceptions(std::ios::failbit); const auto & show_query = query_ptr->as(); formatAST(show_query, desc_ss, false, true); String desc = desc_ss.str(); diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index cb5db386f5a..8e67cecdd5e 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -33,6 +33,7 @@ String InterpreterShowTablesQuery::getRewrittenQuery() if (query.databases) { std::stringstream rewritten_query; + rewritten_query.exceptions(std::ios::failbit); rewritten_query << "SELECT name FROM system.databases"; if (!query.like.empty()) @@ -54,6 +55,7 @@ String InterpreterShowTablesQuery::getRewrittenQuery() if (query.clusters) { std::stringstream rewritten_query; + rewritten_query.exceptions(std::ios::failbit); rewritten_query << "SELECT DISTINCT cluster FROM system.clusters"; if (!query.like.empty()) @@ -73,6 +75,7 @@ String InterpreterShowTablesQuery::getRewrittenQuery() else if (query.cluster) { std::stringstream rewritten_query; + rewritten_query.exceptions(std::ios::failbit); rewritten_query << "SELECT * FROM system.clusters"; rewritten_query << " WHERE cluster = " << std::quoted(query.cluster_str, '\''); @@ -87,6 +90,7 @@ String InterpreterShowTablesQuery::getRewrittenQuery() DatabaseCatalog::instance().assertDatabaseExists(database); std::stringstream rewritten_query; + rewritten_query.exceptions(std::ios::failbit); rewritten_query << "SELECT name FROM system."; if (query.dictionaries) diff --git a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp index b940e4e0c95..2221b7b1588 100644 --- a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp +++ b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp @@ -12,7 +12,9 @@ #include #include #include -#include +#include +#include + using namespace DB; diff --git a/src/Interpreters/QueryAliasesVisitor.cpp b/src/Interpreters/QueryAliasesVisitor.cpp index 6eae5594810..9de1d04990d 100644 --- a/src/Interpreters/QueryAliasesVisitor.cpp +++ b/src/Interpreters/QueryAliasesVisitor.cpp @@ -21,6 +21,7 @@ namespace ErrorCodes static String wrongAliasMessage(const ASTPtr & ast, const ASTPtr & prev_ast, const String & alias) { std::stringstream message; + message.exceptions(std::ios::failbit); message << "Different expressions with the same alias " << backQuoteIfNeed(alias) << ":" << std::endl; formatAST(*ast, message, false, true); message << std::endl << "and" << std::endl; diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 13606e1d54c..907cbaee243 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -343,6 +343,7 @@ void Set::checkColumnsNumber(size_t num_key_columns) const if (data_types.size() != num_key_columns) { std::stringstream message; + message.exceptions(std::ios::failbit); message << "Number of columns in section IN doesn't match. " << num_key_columns << " at left, " << data_types.size() << " at right."; throw Exception(message.str(), ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index b45533f7a7b..8d3cb123955 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -31,6 +31,7 @@ #include #include +#include namespace DB { @@ -110,6 +111,27 @@ struct CustomizeFunctionsSuffixData char ifDistinct[] = "ifdistinct"; using CustomizeIfDistinctVisitor = InDepthNodeVisitor>, true>; +/// Used to rewrite all aggregate functions to add -OrNull suffix to them if setting `aggregate_functions_null_for_empty` is set. +struct CustomizeAggregateFunctionsSuffixData +{ + using TypeToVisit = ASTFunction; + + const String & customized_func_suffix; + + void visit(ASTFunction & func, ASTPtr &) + { + const auto & instance = AggregateFunctionFactory::instance(); + if (instance.isAggregateFunctionName(func.name) && !endsWith(func.name, customized_func_suffix)) + { + auto properties = instance.tryGetProperties(func.name); + if (properties && !properties->returns_default_when_only_null) + func.name = func.name + customized_func_suffix; + } + } +}; + +using CustomizeAggregateFunctionsOrNullVisitor = InDepthNodeVisitor, true>; + /// Translate qualified names such as db.table.column, table.column, table_alias.column to names' normal form. /// Expand asterisks and qualified asterisks with column names. /// There would be columns in normal form & column aliases after translation. Column & column alias would be normalized in QueryNormalizer. @@ -531,6 +553,7 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select if (!unknown_required_source_columns.empty()) { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << "Missing columns:"; for (const auto & name : unknown_required_source_columns) ss << " '" << name << "'"; @@ -710,6 +733,13 @@ void TreeRewriter::normalize(ASTPtr & query, Aliases & aliases, const Settings & CustomizeGlobalNotInVisitor(data_global_not_null_in).visit(query); } + // Rewrite all aggregate functions to add -OrNull suffix to them + if (settings.aggregate_functions_null_for_empty) + { + CustomizeAggregateFunctionsOrNullVisitor::Data data_or_null{"OrNull"}; + CustomizeAggregateFunctionsOrNullVisitor(data_or_null).visit(query); + } + /// Creates a dictionary `aliases`: alias -> ASTPtr QueryAliasesVisitor(aliases).visit(query); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 8faccf7bc7b..cdb3d9b7d7b 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -779,6 +779,7 @@ static std::tuple executeQueryImpl( if (!internal && res.in) { std::stringstream log_str; + log_str.exceptions(std::ios::failbit); log_str << "Query pipeline:\n"; res.in->dumpTree(log_str); LOG_DEBUG(&Poco::Logger::get("executeQuery"), log_str.str()); diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp index c2436806fcd..edf911fa61c 100644 --- a/src/Interpreters/sortBlock.cpp +++ b/src/Interpreters/sortBlock.cpp @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include @@ -86,8 +88,7 @@ struct PartialSortingLessWithCollation } else if (isCollationRequired(elem.description)) { - const ColumnString & column_string = assert_cast(*elem.column); - res = column_string.compareAtWithCollation(a, b, *elem.column, *elem.description.collator); + res = elem.column->compareAtWithCollation(a, b, *elem.column, elem.description.nulls_direction, *elem.description.collator); } else res = elem.column->compareAt(a, b, *elem.column, elem.description.nulls_direction); @@ -101,7 +102,6 @@ struct PartialSortingLessWithCollation } }; - void sortBlock(Block & block, const SortDescription & description, UInt64 limit) { if (!block) @@ -120,14 +120,13 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) bool is_column_const = false; if (isCollationRequired(description[0])) { - /// it it's real string column, than we need sort - if (const ColumnString * column_string = checkAndGetColumn(column)) - column_string->getPermutationWithCollation(*description[0].collator, reverse, limit, perm); - else if (checkAndGetColumnConstData(column)) + if (!column->isCollationSupported()) + throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION); + + if (isColumnConst(*column)) is_column_const = true; else - throw Exception("Collations could be specified only for String columns.", ErrorCodes::BAD_COLLATION); - + column->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm); } else if (!isColumnConst(*column)) { @@ -163,8 +162,8 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) const IColumn * column = columns_with_sort_desc[i].column; if (isCollationRequired(description[i])) { - if (!checkAndGetColumn(column) && !checkAndGetColumnConstData(column)) - throw Exception("Collations could be specified only for String columns.", ErrorCodes::BAD_COLLATION); + if (!column->isCollationSupported()) + throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION); need_collation = true; } @@ -187,10 +186,8 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) if (isCollationRequired(column.description)) { - const ColumnString & column_string = assert_cast(*column.column); - column_string.updatePermutationWithCollation( - *column.description.collator, - column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); + column.column->updatePermutationWithCollation( + *column.description.collator, column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); } else { diff --git a/src/Interpreters/tests/CMakeLists.txt b/src/Interpreters/tests/CMakeLists.txt index 4ab7da014e4..20aa73166fb 100644 --- a/src/Interpreters/tests/CMakeLists.txt +++ b/src/Interpreters/tests/CMakeLists.txt @@ -37,9 +37,6 @@ add_executable (in_join_subqueries_preprocessor in_join_subqueries_preprocessor. target_link_libraries (in_join_subqueries_preprocessor PRIVATE clickhouse_aggregate_functions dbms clickhouse_parsers) add_check(in_join_subqueries_preprocessor) -add_executable (users users.cpp) -target_link_libraries (users PRIVATE clickhouse_aggregate_functions dbms clickhouse_common_config) - if (OS_LINUX) add_executable (internal_iotop internal_iotop.cpp) target_link_libraries (internal_iotop PRIVATE dbms) diff --git a/src/Interpreters/tests/users.cpp b/src/Interpreters/tests/users.cpp deleted file mode 100644 index acd0cfd0519..00000000000 --- a/src/Interpreters/tests/users.cpp +++ /dev/null @@ -1,282 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace -{ - -namespace fs = std::filesystem; - -struct TestEntry -{ - std::string user_name; - std::string database_name; - bool is_allowed; -}; - -using TestEntries = std::vector; - -struct TestDescriptor -{ - const char * config_content; - TestEntries entries; -}; - -using TestSet = std::vector; - -/// Tests description. - -TestSet test_set = -{ - { - "" - " " - " " - " " - " defaultdefault" - " " - " default" - " test" - " " - " " - " " - " defaultdefault" - " " - " " - " " - "", - - { - { "default", "default", true }, - { "default", "test", true }, - { "default", "stats", false }, - { "web", "default", true }, - { "web", "test", true }, - { "web", "stats", true }, - { "analytics", "default", false }, - { "analytics", "test", false }, - { "analytics", "stats", false } - } - }, - - { - "" - " " - " " - " " - " defaultdefault" - " " - " default" - " " - " " - " " - " defaultdefault" - " " - " " - " " - "", - - { - { "default", "default", true }, - { "default", "test", false }, - { "default", "stats", false }, - { "web", "default", true }, - { "web", "test", true }, - { "web", "stats", true }, - { "analytics", "default", false }, - { "analytics", "test", false }, - { "analytics", "stats", false } - } - }, - - { - "" - " " - " " - " " - " defaultdefault" - " " - " " - " " - " " - " defaultdefault" - " " - " " - " " - "", - - { - { "default", "default", true }, - { "default", "test", true }, - { "default", "stats", true }, - { "web", "default", true }, - { "web", "test", true }, - { "web", "stats", true }, - { "analytics", "default", false }, - { "analytics", "test", false }, - { "analytics", "stats", false } - } - }, - - { - "" - " " - " " - " " - " defaultdefault" - " " - " default" - " " - " " - " " - " defaultdefault" - " " - " test" - " " - " " - " " - " " - "", - - { - { "default", "default", true }, - { "default", "test", false }, - { "default", "stats", false }, - { "web", "default", false }, - { "web", "test", true }, - { "web", "stats", false }, - { "analytics", "default", false }, - { "analytics", "test", false }, - { "analytics", "stats", false } - } - } -}; - -std::string createTmpPath(const std::string & filename) -{ - char pattern[] = "/tmp/fileXXXXXX"; - char * dir = mkdtemp(pattern); - if (dir == nullptr) - throw std::runtime_error("Could not create directory"); - - return std::string(dir) + "/" + filename; -} - -void createFile(const std::string & filename, const char * data) -{ - std::ofstream ofs(filename.c_str()); - if (!ofs.is_open()) - throw std::runtime_error("Could not open file " + filename); - ofs << data; -} - -void runOneTest(const TestDescriptor & test_descriptor) -{ - const auto path_name = createTmpPath("users.xml"); - createFile(path_name, test_descriptor.config_content); - - DB::ConfigurationPtr config; - - try - { - config = DB::ConfigProcessor(path_name).loadConfig().configuration; - } - catch (const Poco::Exception & ex) - { - std::ostringstream os; - os << "Error: " << ex.what() << ": " << ex.displayText(); - throw std::runtime_error(os.str()); - } - - DB::AccessControlManager acl_manager; - - try - { - acl_manager.setUsersConfig(*config); - } - catch (const Poco::Exception & ex) - { - std::ostringstream os; - os << "Error: " << ex.what() << ": " << ex.displayText(); - throw std::runtime_error(os.str()); - } - - for (const auto & entry : test_descriptor.entries) - { - bool res; - - try - { - res = acl_manager.read(entry.user_name)->access.isGranted(DB::AccessType::ALL, entry.database_name); - } - catch (const Poco::Exception &) - { - res = false; - } - - if (res != entry.is_allowed) - { - auto to_string = [](bool access){ return (access ? "'granted'" : "'denied'"); }; - std::ostringstream os; - os << "(user=" << entry.user_name << ", database=" << entry.database_name << "): "; - os << "Expected " << to_string(entry.is_allowed) << " but got " << to_string(res); - throw std::runtime_error(os.str()); - } - } - - fs::remove_all(fs::path(path_name).parent_path().string()); -} - -auto runTestSet() -{ - size_t test_num = 1; - size_t failure_count = 0; - - for (const auto & test_descriptor : test_set) - { - try - { - runOneTest(test_descriptor); - std::cout << "Test " << test_num << " passed\n"; - } - catch (const std::runtime_error & ex) - { - std::cerr << "Test " << test_num << " failed with reason: " << ex.what() << "\n"; - ++failure_count; - } - catch (...) - { - std::cerr << "Test " << test_num << " failed with unknown reason\n"; - ++failure_count; - } - - ++test_num; - } - - return std::make_tuple(test_set.size(), failure_count); -} - -} - -int main() -{ - size_t test_count; - size_t failure_count; - - std::tie(test_count, failure_count) = runTestSet(); - - std::cout << (test_count - failure_count) << " test(s) passed out of " << test_count << "\n"; - - return (failure_count == 0) ? 0 : EXIT_FAILURE; -} diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index 730e892f8f7..27ece3e18c2 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -55,7 +55,11 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta if (type) { settings.ostr << ' '; - type->formatImpl(settings, state, frame); + + FormatStateStacked type_frame = frame; + type_frame.indent = 0; + + type->formatImpl(settings, state, type_frame); } if (null_modifier) diff --git a/src/Parsers/ASTCreateRowPolicyQuery.cpp b/src/Parsers/ASTCreateRowPolicyQuery.cpp index 640b030b6cf..6224b534851 100644 --- a/src/Parsers/ASTCreateRowPolicyQuery.cpp +++ b/src/Parsers/ASTCreateRowPolicyQuery.cpp @@ -63,6 +63,7 @@ namespace { std::vector> conditions_as_strings; std::stringstream temp_sstream; + temp_sstream.exceptions(std::ios::failbit); IAST::FormatSettings temp_settings(temp_sstream, settings); for (const auto & [condition_type, condition] : conditions) { diff --git a/src/Parsers/DumpASTNode.h b/src/Parsers/DumpASTNode.h index 430e70de8da..01447850c74 100644 --- a/src/Parsers/DumpASTNode.h +++ b/src/Parsers/DumpASTNode.h @@ -95,6 +95,7 @@ public: DebugASTLog() : log(nullptr) { + ss.exceptions(std::ios::failbit); if constexpr (_enable) log = &Poco::Logger::get("AST"); } diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index 8ee4154541b..d716a796b77 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -90,6 +90,7 @@ size_t IAST::checkDepthImpl(size_t max_depth, size_t level) const std::string IAST::formatForErrorMessage() const { std::stringstream ss; + ss.exceptions(std::ios::failbit); format(FormatSettings(ss, true /* one line */)); return ss.str(); } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index c88c80021d6..cc9e593d7cb 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -243,6 +243,7 @@ template std::string IAST::formatForErrorMessage(const AstArray & array) { std::stringstream ss; + ss.exceptions(std::ios::failbit); for (size_t i = 0; i < array.size(); ++i) { if (i > 0) diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 3c20446eb15..fbdc308d5bc 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -144,8 +144,8 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E /// doesn't check that parsed string is existing data type. In this way /// REMOVE keyword can be parsed as data type and further parsing will fail. /// So we just check this keyword and in case of success return column - /// column declaration with name only. - if (s_remove.checkWithoutMoving(pos, expected)) + /// declaration with name only. + if (!require_type && s_remove.checkWithoutMoving(pos, expected)) { if (!check_keywords_after_name) return false; @@ -165,11 +165,12 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ASTPtr codec_expression; ASTPtr ttl_expression; - if (!s_default.checkWithoutMoving(pos, expected) && - !s_materialized.checkWithoutMoving(pos, expected) && - !s_alias.checkWithoutMoving(pos, expected) && - !s_comment.checkWithoutMoving(pos, expected) && - !s_codec.checkWithoutMoving(pos, expected)) + if (!s_default.checkWithoutMoving(pos, expected) + && !s_materialized.checkWithoutMoving(pos, expected) + && !s_alias.checkWithoutMoving(pos, expected) + && (require_type + || (!s_comment.checkWithoutMoving(pos, expected) + && !s_codec.checkWithoutMoving(pos, expected)))) { if (!type_parser.parse(pos, type, expected)) return false; diff --git a/src/Parsers/formatAST.cpp b/src/Parsers/formatAST.cpp index fca5130cb89..e19dc715d51 100644 --- a/src/Parsers/formatAST.cpp +++ b/src/Parsers/formatAST.cpp @@ -16,6 +16,7 @@ void formatAST(const IAST & ast, std::ostream & s, bool hilite, bool one_line) String serializeAST(const IAST & ast, bool one_line) { std::stringstream ss; + ss.exceptions(std::ios::failbit); formatAST(ast, ss, false, one_line); return ss.str(); } diff --git a/src/Parsers/queryToString.cpp b/src/Parsers/queryToString.cpp index d214468c2a9..44ea721485f 100644 --- a/src/Parsers/queryToString.cpp +++ b/src/Parsers/queryToString.cpp @@ -12,6 +12,7 @@ namespace DB String queryToString(const IAST & query) { std::ostringstream out; + out.exceptions(std::ios::failbit); formatAST(query, out, false, true); return out.str(); } diff --git a/src/Parsers/tests/gtest_dictionary_parser.cpp b/src/Parsers/tests/gtest_dictionary_parser.cpp index c2bde5fa8f1..c418759aa21 100644 --- a/src/Parsers/tests/gtest_dictionary_parser.cpp +++ b/src/Parsers/tests/gtest_dictionary_parser.cpp @@ -18,6 +18,7 @@ using namespace DB; static String astToString(IAST * ast) { std::ostringstream oss; + oss.exceptions(std::ios::failbit); dumpAST(*ast, oss); return oss.str(); } diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index cf7a020ee0b..8a416ade740 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -161,6 +161,7 @@ static void insertNumber(IColumn & column, WhichDataType type, T value) static std::string nodeToJson(avro::NodePtr root_node) { std::ostringstream ss; + ss.exceptions(std::ios::failbit); root_node->printJson(ss, 0); return ss.str(); } diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index 628a90beefb..96a458eb49f 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -168,6 +168,11 @@ void PrettyBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind) std::stringstream middle_values_separator; std::stringstream bottom_separator; + top_separator.exceptions(std::ios::failbit); + middle_names_separator.exceptions(std::ios::failbit); + middle_values_separator.exceptions(std::ios::failbit); + bottom_separator.exceptions(std::ios::failbit); + top_separator << grid_symbols.bold_left_top_corner; middle_names_separator << grid_symbols.bold_left_separator; middle_values_separator << grid_symbols.left_separator; diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp index b81ba84c732..9320b159836 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp @@ -134,6 +134,7 @@ void PrettyCompactBlockOutputFormat::writeBottom(const Widths & max_widths) ascii_grid_symbols; /// Create delimiters std::stringstream bottom_separator; + bottom_separator.exceptions(std::ios::failbit); bottom_separator << grid_symbols.left_bottom_corner; for (size_t i = 0; i < max_widths.size(); ++i) diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp index 697ac9496b5..e7a7200ac34 100644 --- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp @@ -42,6 +42,7 @@ void CollapsingSortedAlgorithm::reportIncorrectData() return; std::stringstream s; + s.exceptions(std::ios::failbit); auto & sort_columns = *last_row.sort_columns; for (size_t i = 0, size = sort_columns.size(); i < size; ++i) { diff --git a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h index 054aec94464..531b2636747 100644 --- a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h @@ -1,9 +1,11 @@ #pragma once + #include #include #include #include + namespace DB { diff --git a/src/Processors/QueryPlan/ReadFromStorageStep.cpp b/src/Processors/QueryPlan/ReadFromStorageStep.cpp index fd38dd9218b..dfc2151f836 100644 --- a/src/Processors/QueryPlan/ReadFromStorageStep.cpp +++ b/src/Processors/QueryPlan/ReadFromStorageStep.cpp @@ -19,7 +19,7 @@ ReadFromStorageStep::ReadFromStorageStep( std::shared_ptr quota, StoragePtr storage, const Names & required_columns, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, std::shared_ptr context, QueryProcessingStage::Enum processing_stage, size_t max_block_size, diff --git a/src/Processors/QueryPlan/ReadFromStorageStep.h b/src/Processors/QueryPlan/ReadFromStorageStep.h index 0f0ac98a556..c803d8af63f 100644 --- a/src/Processors/QueryPlan/ReadFromStorageStep.h +++ b/src/Processors/QueryPlan/ReadFromStorageStep.h @@ -31,7 +31,7 @@ public: std::shared_ptr quota, StoragePtr storage, const Names & required_columns, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, std::shared_ptr context, QueryProcessingStage::Enum processing_stage, size_t max_block_size, diff --git a/src/Processors/Sources/SinkToOutputStream.cpp b/src/Processors/Sources/SinkToOutputStream.cpp index 9727b637d8b..7612ba10fb3 100644 --- a/src/Processors/Sources/SinkToOutputStream.cpp +++ b/src/Processors/Sources/SinkToOutputStream.cpp @@ -1,6 +1,7 @@ #include #include + namespace DB { diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index 1aa5c10afd7..bf7a3b8ab52 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -37,6 +37,7 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request bool ok = true; std::stringstream message; + message.exceptions(std::ios::failbit); auto databases = DatabaseCatalog::instance().getDatabases(); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 1b0cbc69b29..4dceb0aa905 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -437,6 +437,7 @@ bool TCPHandler::readDataNext(const size_t & poll_interval, const int & receive_ if (elapsed > receive_timeout) { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << "Timeout exceeded while receiving data from client."; ss << " Waited for " << static_cast(elapsed) << " seconds,"; ss << " timeout is " << receive_timeout << " seconds."; diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp index f08cdf76cbf..ef7c02163bb 100644 --- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -148,6 +148,7 @@ void DistributedBlockOutputStream::writeAsync(const Block & block) std::string DistributedBlockOutputStream::getCurrentStateDescription() { std::stringstream buffer; + buffer.exceptions(std::ios::failbit); const auto & addresses = cluster->getShardsAddresses(); buffer << "Insertion status:\n"; @@ -523,7 +524,14 @@ void DistributedBlockOutputStream::writeAsyncImpl(const Block & block, const siz /// Prefer insert into current instance directly writeToLocal(block, shard_info.getLocalNodeCount()); else - writeToShard(block, {shard_info.pathForInsert(settings.prefer_localhost_replica)}); + { + const auto & path = shard_info.insertPathForInternalReplication( + settings.prefer_localhost_replica, + settings.use_compact_format_in_distributed_parts_names); + if (path.empty()) + throw Exception("Directory name for async inserts is empty", ErrorCodes::LOGICAL_ERROR); + writeToShard(block, {path}); + } } else { diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 50b36ced19c..d2305f6416e 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -83,7 +83,7 @@ TableExclusiveLockHolder IStorage::lockExclusively(const String & query_id, cons Pipe IStorage::read( const Names & /*column_names*/, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, @@ -100,7 +100,7 @@ void IStorage::read( SizeLimits & leaf_limits, std::shared_ptr quota, const Names & column_names, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, std::shared_ptr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index ef337465a54..5fa2a915978 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -55,6 +54,7 @@ using StoragePolicyPtr = std::shared_ptr; struct StreamLocalLimits; class EnabledQuota; +struct SelectQueryInfo; struct ColumnSize { @@ -212,15 +212,12 @@ public: * * SelectQueryInfo is required since the stage can depends on the query * (see Distributed() engine and optimize_skip_unused_shards). + * And to store optimized cluster (after optimize_skip_unused_shards). * * QueryProcessingStage::Enum required for Distributed over Distributed, * since it cannot return Complete for intermediate queries never. */ - QueryProcessingStage::Enum getQueryProcessingStage(const Context & context) const - { - return getQueryProcessingStage(context, QueryProcessingStage::Complete, {}); - } - virtual QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const ASTPtr &) const + virtual QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, SelectQueryInfo &) const { return QueryProcessingStage::FetchColumns; } @@ -278,7 +275,7 @@ public: virtual Pipe read( const Names & /*column_names*/, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, @@ -294,7 +291,7 @@ public: SizeLimits & leaf_limits, std::shared_ptr quota, const Names & column_names, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, std::shared_ptr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index a63a4309775..03eef37802f 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -247,6 +247,7 @@ Names StorageKafka::parseTopics(String topic_list) String StorageKafka::getDefaultClientId(const StorageID & table_id_) { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << VERSION_NAME << "-" << getFQDNOrHostName() << "-" << table_id_.database_name << "-" << table_id_.table_name; return ss.str(); } @@ -255,7 +256,7 @@ String StorageKafka::getDefaultClientId(const StorageID & table_id_) Pipe StorageKafka::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /* query_info */, + SelectQueryInfo & /* query_info */, const Context & context, QueryProcessingStage::Enum /* processed_stage */, size_t /* max_block_size */, @@ -400,6 +401,7 @@ ConsumerBufferPtr StorageKafka::createReadBuffer(const size_t consumer_number) if (num_consumers > 1) { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << client_id << "-" << consumer_number; conf.set("client.id", ss.str()); } diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index 52d8651b064..4257f1c1854 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -46,7 +46,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/LiveView/StorageBlocks.h b/src/Storages/LiveView/StorageBlocks.h index 37861b55568..4ad0ffb93ca 100644 --- a/src/Storages/LiveView/StorageBlocks.h +++ b/src/Storages/LiveView/StorageBlocks.h @@ -33,12 +33,12 @@ public: bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } - QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const ASTPtr &) const override { return to_stage; } + QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, SelectQueryInfo &) const override { return to_stage; } Pipe read( const Names & /*column_names*/, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 33fb1f4e2fa..bdf82307b07 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -423,7 +423,7 @@ void StorageLiveView::refresh() Pipe StorageLiveView::read( const Names & /*column_names*/, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 940d901e833..9c843dc71de 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -129,7 +129,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 39ee4684af9..e6880250c64 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -33,6 +34,7 @@ namespace ErrorCodes String Range::toString() const { std::stringstream str; + str.exceptions(std::ios::failbit); if (!left_bounded) str << "(-inf, "; @@ -485,6 +487,29 @@ static std::pair applyFunctionForFieldOfUnknownType( } +/// Same as above but for binary operators +static std::pair applyBinaryFunctionForFieldOfUnknownType( + const FunctionOverloadResolverPtr & func, + const DataTypePtr & arg_type, + const Field & arg_value, + const DataTypePtr & arg_type2, + const Field & arg_value2) +{ + ColumnsWithTypeAndName arguments{ + {arg_type->createColumnConst(1, arg_value), arg_type, "x"}, {arg_type2->createColumnConst(1, arg_value2), arg_type2, "y"}}; + + FunctionBasePtr func_base = func->build(arguments); + + DataTypePtr return_type = func_base->getResultType(); + + auto col = func_base->execute(arguments, return_type, 1); + + Field result = (*col)[0]; + + return {std::move(result), std::move(return_type)}; +} + + static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field) { /// Fallback for fields without block reference. @@ -568,7 +593,15 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( if (!sample_block.has(expr_name)) return false; + /// TODO Nullable index is not yet landed. + if (out_value.isNull()) + return false; + bool found_transformation = false; + auto input_column = sample_block.getByName(expr_name); + auto const_column = out_type->createColumnConst(1, out_value); + out_value = (*castColumn({const_column, out_type, "c"}, input_column.type))[0]; + out_type = input_column.type; for (const auto & action : key_expr->getActions()) { /** The key functional expression constraint may be inferred from a plain column in the expression. @@ -616,6 +649,76 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( return found_transformation; } +/// Looking for possible transformation of `column = constant` into `partition_expr = function(constant)` +bool KeyCondition::canConstantBeWrappedByFunctions( + const ASTPtr & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type) +{ + if (strict) + return false; + + String expr_name = node->getColumnName(); + const auto & sample_block = key_expr->getSampleBlock(); + if (!sample_block.has(expr_name)) + return false; + + /// TODO Nullable index is not yet landed. + if (out_value.isNull()) + return false; + + bool found_transformation = false; + auto input_column = sample_block.getByName(expr_name); + auto const_column = out_type->createColumnConst(1, out_value); + out_value = (*castColumn({const_column, out_type, "c"}, input_column.type))[0]; + out_type = input_column.type; + Block transform({{input_column.type->createColumn(), input_column.type, input_column.name}}); + for (const auto & action : key_expr->getActions()) + { + if (action.node->type == ActionsDAG::Type::FUNCTION) + { + if (!action.node->function_base->isDeterministic()) + return false; + if (action.node->children.size() == 1 && action.node->children[0]->result_name == expr_name) + { + std::tie(out_value, out_type) = applyFunctionForFieldOfUnknownType(action.node->function_builder, out_type, out_value); + } + else if (action.node->children.size() == 2) + { + if (!transform.has(action.node->children[0]->result_name) || !transform.has(action.node->children[1]->result_name)) + return false; + auto left = transform.getByName(action.node->children[0]->result_name); + auto right = transform.getByName(action.node->children[1]->result_name); + if (isColumnConst(*left.column)) + { + auto left_arg_type = left.type; + auto left_arg_value = (*left.column)[0]; + std::tie(out_value, out_type) = applyBinaryFunctionForFieldOfUnknownType( + action.node->function_builder, left_arg_type, left_arg_value, out_type, out_value); + } + else if (isColumnConst(*right.column)) + { + auto right_arg_type = right.type; + auto right_arg_value = (*right.column)[0]; + std::tie(out_value, out_type) = applyBinaryFunctionForFieldOfUnknownType( + action.node->function_builder, out_type, out_value, right_arg_type, right_arg_value); + } + } + + expr_name = action.node->result_name; + auto it = key_columns.find(expr_name); + if (key_columns.end() != it) + { + out_key_column_num = it->second; + out_key_column_type = sample_block.getByName(it->first).type; + found_transformation = true; + break; + } + } + action.execute(transform, true); + } + + return found_transformation; +} + bool KeyCondition::tryPrepareSetIndex( const ASTs & args, const Context & context, @@ -862,33 +965,57 @@ bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, const Context & cont bool is_set_const = false; bool is_constant_transformed = false; - if (functionIsInOrGlobalInOperator(func_name) - && tryPrepareSetIndex(args, context, out, key_column_num)) + if (functionIsInOrGlobalInOperator(func_name)) { - key_arg_pos = 0; - is_set_const = true; + if (tryPrepareSetIndex(args, context, out, key_column_num)) + { + key_arg_pos = 0; + is_set_const = true; + } + else + return false; } - else if (getConstant(args[1], block_with_constants, const_value, const_type) - && isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain)) + else if (getConstant(args[1], block_with_constants, const_value, const_type)) { - key_arg_pos = 0; + if (isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain)) + { + key_arg_pos = 0; + } + else if (canConstantBeWrappedByMonotonicFunctions(args[0], key_column_num, key_expr_type, const_value, const_type)) + { + key_arg_pos = 0; + is_constant_transformed = true; + } + else if ( + single_point && func_name == "equals" + && canConstantBeWrappedByFunctions(args[0], key_column_num, key_expr_type, const_value, const_type)) + { + key_arg_pos = 0; + is_constant_transformed = true; + } + else + return false; } - else if (getConstant(args[1], block_with_constants, const_value, const_type) - && canConstantBeWrappedByMonotonicFunctions(args[0], key_column_num, key_expr_type, const_value, const_type)) + else if (getConstant(args[0], block_with_constants, const_value, const_type)) { - key_arg_pos = 0; - is_constant_transformed = true; - } - else if (getConstant(args[0], block_with_constants, const_value, const_type) - && isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain)) - { - key_arg_pos = 1; - } - else if (getConstant(args[0], block_with_constants, const_value, const_type) - && canConstantBeWrappedByMonotonicFunctions(args[1], key_column_num, key_expr_type, const_value, const_type)) - { - key_arg_pos = 1; - is_constant_transformed = true; + if (isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain)) + { + key_arg_pos = 1; + } + else if (canConstantBeWrappedByMonotonicFunctions(args[1], key_column_num, key_expr_type, const_value, const_type)) + { + key_arg_pos = 1; + is_constant_transformed = true; + } + else if ( + single_point && func_name == "equals" + && canConstantBeWrappedByFunctions(args[1], key_column_num, key_expr_type, const_value, const_type)) + { + key_arg_pos = 0; + is_constant_transformed = true; + } + else + return false; } else return false; @@ -1443,6 +1570,7 @@ String KeyCondition::RPNElement::toString() const }; std::ostringstream ss; + ss.exceptions(std::ios::failbit); switch (function) { case FUNCTION_AND: diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 909ec01bf9f..265bc01be49 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -402,6 +402,9 @@ private: Field & out_value, DataTypePtr & out_type); + bool canConstantBeWrappedByFunctions( + const ASTPtr & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type); + /// If it's possible to make an RPNElement /// that will filter values (possibly tuples) by the content of 'prepared_set', /// do it and return true. diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 2583b52ded3..43dfdfe7beb 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3206,6 +3206,7 @@ void MergeTreeData::Transaction::rollbackPartsToTemporaryState() if (!isEmpty()) { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << " Rollbacking parts state to temporary and removing from working set:"; for (const auto & part : precommitted_parts) ss << " " << part->relative_path; @@ -3224,6 +3225,7 @@ void MergeTreeData::Transaction::rollback() if (!isEmpty()) { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << " Removing parts:"; for (const auto & part : precommitted_parts) ss << " " << part->relative_path; @@ -3759,6 +3761,7 @@ bool MergeTreeData::canUsePolymorphicParts(const MergeTreeSettings & settings, S || settings.min_rows_for_compact_part != 0 || settings.min_bytes_for_compact_part != 0)) { std::ostringstream message; + message.exceptions(std::ios::failbit); message << "Table can't create parts with adaptive granularity, but settings" << " min_rows_for_wide_part = " << settings.min_rows_for_wide_part << ", min_bytes_for_wide_part = " << settings.min_bytes_for_wide_part diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index c75970f6cc1..58bdbcdcdcd 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -443,6 +443,7 @@ void MinimalisticDataPartChecksums::checkEqualImpl(const MinimalisticDataPartChe if (num_compressed_files != rhs.num_compressed_files || num_uncompressed_files != rhs.num_uncompressed_files) { std::stringstream error_msg; + error_msg.exceptions(std::ios::failbit); error_msg << "Different number of files: " << rhs.num_compressed_files << " compressed (expected " << num_compressed_files << ")" << " and " << rhs.num_uncompressed_files << " uncompressed ones (expected " << num_uncompressed_files << ")"; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 12ca88ae274..de7d06a7471 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -82,6 +83,17 @@ static Block getBlockWithPartColumn(const MergeTreeData::DataPartsVector & parts return Block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "_part")}; } +/// Check if ORDER BY clause of the query has some expression. +static bool sortingDescriptionHasExpressions(const SortDescription & sort_description, const StorageMetadataPtr & metadata_snapshot) +{ + auto all_columns = metadata_snapshot->getColumns(); + for (const auto & sort_column : sort_description) + { + if (!all_columns.has(sort_column.column_name)) + return true; + } + return false; +} size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead( const MergeTreeData::DataPartsVector & parts, @@ -217,6 +229,7 @@ Pipe MergeTreeDataSelectExecutor::readFromParts( if (settings.force_primary_key && key_condition.alwaysUnknownOrTrue()) { std::stringstream exception_message; + exception_message.exceptions(std::ios::failbit); exception_message << "Primary key ("; for (size_t i = 0, size = primary_key_columns.size(); i < size; ++i) exception_message << (i == 0 ? "" : ", ") << primary_key_columns[i]; @@ -226,13 +239,15 @@ Pipe MergeTreeDataSelectExecutor::readFromParts( } std::optional minmax_idx_condition; + std::optional partition_pruner; if (data.minmax_idx_expr) { minmax_idx_condition.emplace(query_info, context, data.minmax_idx_columns, data.minmax_idx_expr); + partition_pruner.emplace(metadata_snapshot->getPartitionKey(), query_info, context, false /* strict */); - if (settings.force_index_by_date && minmax_idx_condition->alwaysUnknownOrTrue()) + if (settings.force_index_by_date && (minmax_idx_condition->alwaysUnknownOrTrue() && partition_pruner->isUseless())) { - String msg = "MinMax index by columns ("; + String msg = "Neither MinMax index by columns ("; bool first = true; for (const String & col : data.minmax_idx_columns) { @@ -242,7 +257,7 @@ Pipe MergeTreeDataSelectExecutor::readFromParts( msg += ", "; msg += col; } - msg += ") is not used and setting 'force_index_by_date' is set"; + msg += ") nor partition expr is used and setting 'force_index_by_date' is set"; throw Exception(msg, ErrorCodes::INDEX_NOT_USED); } @@ -266,6 +281,12 @@ Pipe MergeTreeDataSelectExecutor::readFromParts( part->minmax_idx.hyperrectangle, data.minmax_idx_column_types).can_be_true) continue; + if (partition_pruner) + { + if (partition_pruner->canBePruned(part)) + continue; + } + if (max_block_numbers_to_read) { auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id); @@ -1076,6 +1097,7 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( const size_t min_marks_per_stream = (sum_marks - 1) / num_streams + 1; bool need_preliminary_merge = (parts.size() > settings.read_in_order_two_level_merge_threshold); + size_t max_output_ports = 0; for (size_t i = 0; i < num_streams && !parts.empty(); ++i) { @@ -1185,25 +1207,43 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( }); } - if (pipe.numOutputPorts() > 1 && need_preliminary_merge) + max_output_ports = std::max(pipe.numOutputPorts(), max_output_ports); + res.emplace_back(std::move(pipe)); + } + + if (need_preliminary_merge) + { + /// If ORDER BY clause of the query contains some expression, + /// then those new columns should be added for the merge step, + /// and this should be done always, if there is at least one pipe that + /// has multiple output ports. + bool sorting_key_has_expression = sortingDescriptionHasExpressions(input_order_info->order_key_prefix_descr, metadata_snapshot); + bool force_sorting_key_transform = res.size() > 1 && max_output_ports > 1 && sorting_key_has_expression; + + for (auto & pipe : res) { SortDescription sort_description; - for (size_t j = 0; j < input_order_info->order_key_prefix_descr.size(); ++j) - sort_description.emplace_back(metadata_snapshot->getSortingKey().column_names[j], - input_order_info->direction, 1); - /// Drop temporary columns, added by 'sorting_key_prefix_expr' - out_projection = createProjection(pipe); - pipe.addSimpleTransform([sorting_key_prefix_expr](const Block & header) + if (pipe.numOutputPorts() > 1 || force_sorting_key_transform) { - return std::make_shared(header, sorting_key_prefix_expr); - }); + for (size_t j = 0; j < input_order_info->order_key_prefix_descr.size(); ++j) + sort_description.emplace_back(metadata_snapshot->getSortingKey().column_names[j], + input_order_info->direction, 1); - pipe.addTransform(std::make_shared( - pipe.getHeader(), pipe.numOutputPorts(), sort_description, max_block_size)); + /// Drop temporary columns, added by 'sorting_key_prefix_expr' + out_projection = createProjection(pipe); + pipe.addSimpleTransform([sorting_key_prefix_expr](const Block & header) + { + return std::make_shared(header, sorting_key_prefix_expr); + }); + } + + if (pipe.numOutputPorts() > 1) + { + pipe.addTransform(std::make_shared( + pipe.getHeader(), pipe.numOutputPorts(), sort_description, max_block_size)); + } } - - res.emplace_back(std::move(pipe)); } return Pipe::unitePipes(std::move(res)); diff --git a/src/Storages/MergeTree/PartitionPruner.cpp b/src/Storages/MergeTree/PartitionPruner.cpp new file mode 100644 index 00000000000..8888367ebe5 --- /dev/null +++ b/src/Storages/MergeTree/PartitionPruner.cpp @@ -0,0 +1,25 @@ +#include + +namespace DB +{ + +bool PartitionPruner::canBePruned(const DataPartPtr & part) +{ + if (part->isEmpty()) + return true; + const auto & partition_id = part->info.partition_id; + bool is_valid; + if (auto it = partition_filter_map.find(partition_id); it != partition_filter_map.end()) + is_valid = it->second; + else + { + const auto & partition_value = part->partition.value; + std::vector index_value(partition_value.begin(), partition_value.end()); + is_valid = partition_condition.mayBeTrueInRange( + partition_value.size(), index_value.data(), index_value.data(), partition_key.data_types); + partition_filter_map.emplace(partition_id, is_valid); + } + return !is_valid; +} + +} diff --git a/src/Storages/MergeTree/PartitionPruner.h b/src/Storages/MergeTree/PartitionPruner.h new file mode 100644 index 00000000000..74b02d671bb --- /dev/null +++ b/src/Storages/MergeTree/PartitionPruner.h @@ -0,0 +1,37 @@ +#pragma once + +#include + +#include +#include +#include + +namespace DB +{ + +/// Pruning partitions in verbatim way using KeyCondition +class PartitionPruner +{ +private: + std::unordered_map partition_filter_map; + const KeyDescription & partition_key; + KeyCondition partition_condition; + bool useless; + using DataPart = IMergeTreeDataPart; + using DataPartPtr = std::shared_ptr; + +public: + PartitionPruner(const KeyDescription & partition_key_, const SelectQueryInfo & query_info, const Context & context, bool strict) + : partition_key(partition_key_) + , partition_condition( + query_info, context, partition_key.column_names, partition_key.expression, true /* single_point */, strict) + , useless(partition_condition.alwaysUnknownOrTrue()) + { + } + + bool canBePruned(const DataPartPtr & part); + + bool isUseless() const { return useless; } +}; + +} diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 4aa186473a9..597ff5e8fee 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -942,6 +942,7 @@ size_t ReplicatedMergeTreeQueue::getConflictsCountForRange( if (out_description) { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << "Can't execute command for range " << range.getPartName() << " (entry " << entry.znode_name << "). "; ss << "There are " << conflicts.size() << " currently executing entries blocking it: "; for (const auto & conflict : conflicts) @@ -1693,6 +1694,7 @@ std::vector ReplicatedMergeTreeQueue::getMutationsStatu for (const MutationCommand & command : entry.commands) { std::stringstream ss; + ss.exceptions(std::ios::failbit); formatAST(*command.ast, ss, false, true); result.push_back(MergeTreeMutationStatus { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 378b1b284a3..48f05b50675 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -19,6 +19,7 @@ static String formattedAST(const ASTPtr & ast) if (!ast) return ""; std::stringstream ss; + ss.exceptions(std::ios::failbit); formatAST(*ast, ss, false, true); return ss.str(); } diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index c13f540ad34..aa24ddcf33c 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -21,7 +21,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp index ba998dd5951..53c9b50cb9d 100644 --- a/src/Storages/MutationCommands.cpp +++ b/src/Storages/MutationCommands.cpp @@ -127,6 +127,7 @@ std::shared_ptr MutationCommands::ast() const void MutationCommands::writeText(WriteBuffer & out) const { std::stringstream commands_ss; + commands_ss.exceptions(std::ios::failbit); formatAST(*ast(), commands_ss, /* hilite = */ false, /* one_line = */ true); out << escape << commands_ss.str(); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 9735c4d7fd3..f8128e6223a 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -187,6 +187,7 @@ AMQP::ExchangeType StorageRabbitMQ::defineExchangeType(String exchange_type_) String StorageRabbitMQ::getTableBasedName(String name, const StorageID & table_id) { std::stringstream ss; + ss.exceptions(std::ios::failbit); if (name.empty()) ss << table_id.database_name << "_" << table_id.table_name; @@ -530,7 +531,7 @@ void StorageRabbitMQ::unbindExchange() Pipe StorageRabbitMQ::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /* query_info */, + SelectQueryInfo & /* query_info */, const Context & context, QueryProcessingStage::Enum /* processed_stage */, size_t /* max_block_size */, diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index d7891aed0a7..37627178e8b 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -38,7 +38,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 9f1d1c10a20..00f20ac3893 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -82,6 +82,8 @@ using TreeRewriterResultPtr = std::shared_ptr; class ReadInOrderOptimizer; using ReadInOrderOptimizerPtr = std::shared_ptr; +class Cluster; +using ClusterPtr = std::shared_ptr; /** Query along with some additional data, * that can be used during query processing @@ -92,13 +94,17 @@ struct SelectQueryInfo ASTPtr query; ASTPtr view_query; /// Optimized VIEW query + /// For optimize_skip_unused_shards. + /// Can be modified in getQueryProcessingStage() + ClusterPtr cluster; + TreeRewriterResultPtr syntax_analyzer_result; PrewhereInfoPtr prewhere_info; ReadInOrderOptimizerPtr order_optimizer; - /// We can modify it while reading from storage - mutable InputOrderInfoPtr input_order_info; + /// Can be modified while reading from storage + InputOrderInfoPtr input_order_info; /// Prepared sets are used for indices by storage engine. /// Example: x IN (1, 2, 3) diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 9a24623d789..412dadab5bb 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -130,7 +130,7 @@ private: }; -QueryProcessingStage::Enum StorageBuffer::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, const ASTPtr & query_ptr) const +QueryProcessingStage::Enum StorageBuffer::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, SelectQueryInfo & query_info) const { if (destination_id) { @@ -139,7 +139,7 @@ QueryProcessingStage::Enum StorageBuffer::getQueryProcessingStage(const Context if (destination.get() == this) throw Exception("Destination table is myself. Read will cause infinite loop.", ErrorCodes::INFINITE_LOOP); - return destination->getQueryProcessingStage(context, to_stage, query_ptr); + return destination->getQueryProcessingStage(context, to_stage, query_info); } return QueryProcessingStage::FetchColumns; @@ -149,7 +149,7 @@ QueryProcessingStage::Enum StorageBuffer::getQueryProcessingStage(const Context Pipe StorageBuffer::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index b18b574ec6c..455560e22da 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -54,12 +54,12 @@ public: std::string getName() const override { return "Buffer"; } - QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const ASTPtr &) const override; + QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, SelectQueryInfo &) const override; Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index e859baa702e..ee08dd5a824 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -82,6 +82,7 @@ NamesAndTypesList StorageDictionary::getNamesAndTypes(const DictionaryStructure String StorageDictionary::generateNamesAndTypesDescription(const NamesAndTypesList & list) { std::stringstream ss; + ss.exceptions(std::ios::failbit); bool first = true; for (const auto & name_and_type : list) { @@ -132,7 +133,7 @@ void StorageDictionary::checkTableCanBeDropped() const Pipe StorageDictionary::read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h index 5c7beb88d88..576cc2de064 100644 --- a/src/Storages/StorageDictionary.h +++ b/src/Storages/StorageDictionary.h @@ -19,7 +19,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 6e1b613fab9..4a98bc4673f 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -176,6 +176,7 @@ UInt64 getMaximumFileNumber(const std::string & dir_path) std::string makeFormattedListOfShards(const ClusterPtr & cluster) { std::ostringstream os; + os.exceptions(std::ios::failbit); bool head = true; os << "["; @@ -419,11 +420,32 @@ StorageDistributed::StorageDistributed( remote_table_function_ptr = std::move(remote_table_function_ptr_); } -QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Context &context, QueryProcessingStage::Enum to_stage, const ASTPtr & query_ptr) const +QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( + const Context & context, QueryProcessingStage::Enum to_stage, SelectQueryInfo & query_info) const { const auto & settings = context.getSettingsRef(); auto metadata_snapshot = getInMemoryMetadataPtr(); + ClusterPtr cluster = getCluster(); + query_info.cluster = cluster; + + /// Always calculate optimized cluster here, to avoid conditions during read() + /// (Anyway it will be calculated in the read()) + if (settings.optimize_skip_unused_shards) + { + ClusterPtr optimized_cluster = getOptimizedCluster(context, metadata_snapshot, query_info.query); + if (optimized_cluster) + { + LOG_DEBUG(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}", makeFormattedListOfShards(optimized_cluster)); + cluster = optimized_cluster; + query_info.cluster = cluster; + } + else + { + LOG_DEBUG(log, "Unable to figure out irrelevant shards from WHERE/PREWHERE clauses - the query will be sent to all shards of the cluster{}", has_sharding_key ? "" : " (no sharding key)"); + } + } + if (settings.distributed_group_by_no_merge) { if (settings.distributed_group_by_no_merge == DISTRIBUTED_GROUP_BY_NO_MERGE_AFTER_AGGREGATION) @@ -437,14 +459,6 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Con if (to_stage == QueryProcessingStage::WithMergeableState) return QueryProcessingStage::WithMergeableState; - ClusterPtr cluster = getCluster(); - if (settings.optimize_skip_unused_shards) - { - ClusterPtr optimized_cluster = getOptimizedCluster(context, metadata_snapshot, query_ptr); - if (optimized_cluster) - cluster = optimized_cluster; - } - /// If there is only one node, the query can be fully processed by the /// shard, initiator will work as a proxy only. if (getClusterQueriedNodes(settings, cluster) == 1) @@ -456,7 +470,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Con (settings.allow_nondeterministic_optimize_skip_unused_shards || sharding_key_is_deterministic)) { Block sharding_key_block = sharding_key_expr->getSampleBlock(); - auto stage = getOptimizedQueryProcessingStage(query_ptr, settings.extremes, sharding_key_block); + auto stage = getOptimizedQueryProcessingStage(query_info.query, settings.extremes, sharding_key_block); if (stage) { LOG_DEBUG(log, "Force processing stage to {}", QueryProcessingStage::toString(*stage)); @@ -470,29 +484,12 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Con Pipe StorageDistributed::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, const size_t /*max_block_size*/, const unsigned /*num_streams*/) { - const auto & settings = context.getSettingsRef(); - - ClusterPtr cluster = getCluster(); - if (settings.optimize_skip_unused_shards) - { - ClusterPtr optimized_cluster = getOptimizedCluster(context, metadata_snapshot, query_info.query); - if (optimized_cluster) - { - LOG_DEBUG(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}", makeFormattedListOfShards(optimized_cluster)); - cluster = optimized_cluster; - } - else - { - LOG_DEBUG(log, "Unable to figure out irrelevant shards from WHERE/PREWHERE clauses - the query will be sent to all shards of the cluster{}", has_sharding_key ? "" : " (no sharding key)"); - } - } - const auto & modified_query_ast = rewriteSelectQuery( query_info.query, remote_database, remote_table, remote_table_function_ptr); @@ -511,8 +508,7 @@ Pipe StorageDistributed::read( : ClusterProxy::SelectStreamFactory( header, processed_stage, StorageID{remote_database, remote_table}, scalars, has_virtual_shard_num_column, context.getExternalTables()); - return ClusterProxy::executeQuery(select_stream_factory, cluster, log, - modified_query_ast, context, context.getSettingsRef(), query_info); + return ClusterProxy::executeQuery(select_stream_factory, log, modified_query_ast, context, query_info); } @@ -749,6 +745,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster(const Context & context, cons if (force) { std::stringstream exception_message; + exception_message.exceptions(std::ios::failbit); if (!has_sharding_key) exception_message << "No sharding key"; else if (!sharding_key_is_usable) diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 5adc4ca5627..c4f56f1c2a1 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -51,12 +51,12 @@ public: bool isRemote() const override { return true; } - QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum to_stage, const ASTPtr &) const override; + QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, SelectQueryInfo &) const override; Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 0cc3aa807c4..db27d2072c0 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -395,7 +395,7 @@ private: Pipe StorageFile::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index eb6f0cb4d87..de35881f297 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -27,7 +27,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index 62d8259f705..f1e822be2d1 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -441,7 +441,7 @@ void registerStorageGenerateRandom(StorageFactory & factory) Pipe StorageGenerateRandom::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/StorageGenerateRandom.h b/src/Storages/StorageGenerateRandom.h index e0f037f9a08..965c5b3a9d3 100644 --- a/src/Storages/StorageGenerateRandom.h +++ b/src/Storages/StorageGenerateRandom.h @@ -18,7 +18,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageHDFS.cpp b/src/Storages/StorageHDFS.cpp index 1d72107bc7b..0a4729b1c06 100644 --- a/src/Storages/StorageHDFS.cpp +++ b/src/Storages/StorageHDFS.cpp @@ -264,7 +264,7 @@ Strings LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, c Pipe StorageHDFS::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context_, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/StorageHDFS.h b/src/Storages/StorageHDFS.h index fdeaf4ae1b3..4172bce1cd1 100644 --- a/src/Storages/StorageHDFS.h +++ b/src/Storages/StorageHDFS.h @@ -22,7 +22,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index f410fa34f59..5cc435f91fa 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -428,6 +428,7 @@ namespace String listOfColumns(const NamesAndTypesList & available_columns) { std::stringstream ss; + ss.exceptions(std::ios::failbit); for (auto it = available_columns.begin(); it != available_columns.end(); ++it) { if (it != available_columns.begin()) diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp index 5e525210548..1f881bccf07 100644 --- a/src/Storages/StorageInput.cpp +++ b/src/Storages/StorageInput.cpp @@ -61,7 +61,7 @@ void StorageInput::setInputStream(BlockInputStreamPtr input_stream_) Pipe StorageInput::read( const Names & /*column_names*/, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, diff --git a/src/Storages/StorageInput.h b/src/Storages/StorageInput.h index c19b19e4703..3cb64993d45 100644 --- a/src/Storages/StorageInput.h +++ b/src/Storages/StorageInput.h @@ -20,7 +20,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index c6d85174e68..8cf170b04ea 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -451,7 +451,7 @@ private: Pipe StorageJoin::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index 857f3646441..a202327e037 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -40,7 +40,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 944dc0e5804..86cc6afe33f 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -613,7 +613,7 @@ const StorageLog::Marks & StorageLog::getMarksWithRealRowCount(const StorageMeta Pipe StorageLog::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index 306383a1235..51fd334d882 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -27,7 +27,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageMaterializeMySQL.cpp b/src/Storages/StorageMaterializeMySQL.cpp index a582b83c5a2..7c9ec35f3c5 100644 --- a/src/Storages/StorageMaterializeMySQL.cpp +++ b/src/Storages/StorageMaterializeMySQL.cpp @@ -34,7 +34,7 @@ StorageMaterializeMySQL::StorageMaterializeMySQL(const StoragePtr & nested_stora Pipe StorageMaterializeMySQL::read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageMaterializeMySQL.h b/src/Storages/StorageMaterializeMySQL.h index 236fbedb421..19532ec9c89 100644 --- a/src/Storages/StorageMaterializeMySQL.h +++ b/src/Storages/StorageMaterializeMySQL.h @@ -24,7 +24,7 @@ public: StorageMaterializeMySQL(const StoragePtr & nested_storage_, const DatabaseMaterializeMySQL * database_); Pipe read( - const Names & column_names, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, + const Names & column_names, const StorageMetadataPtr & metadata_snapshot, SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; BlockOutputStreamPtr write(const ASTPtr &, const StorageMetadataPtr &, const Context &) override { throwNotAllowed(); } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 6287e2ad278..c66cb7869c9 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -101,15 +101,15 @@ StorageMaterializedView::StorageMaterializedView( DatabaseCatalog::instance().addDependency(select.select_table_id, getStorageID()); } -QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, const ASTPtr & query_ptr) const +QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, SelectQueryInfo & query_info) const { - return getTargetTable()->getQueryProcessingStage(context, to_stage, query_ptr); + return getTargetTable()->getQueryProcessingStage(context, to_stage, query_info); } Pipe StorageMaterializedView::read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, const size_t max_block_size, diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index c5188108c18..fe96261e974 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -65,7 +65,7 @@ public: void checkTableCanBeDropped() const override; void checkPartitionCanBeDropped(const ASTPtr & partition) override; - QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const ASTPtr &) const override; + QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, SelectQueryInfo &) const override; StoragePtr getTargetTable() const; StoragePtr tryGetTargetTable() const; @@ -75,7 +75,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index f16df6983ed..ab8ce1306ca 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -128,7 +128,7 @@ StorageMemory::StorageMemory(const StorageID & table_id_, ColumnsDescription col Pipe StorageMemory::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index 25f23d131b6..2e08bd7d65b 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -32,7 +32,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 12588654872..9b15ae00fc8 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -132,7 +132,7 @@ bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, cons } -QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, const ASTPtr & query_ptr) const +QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, SelectQueryInfo & query_info) const { auto stage_in_source_tables = QueryProcessingStage::FetchColumns; @@ -146,7 +146,7 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context & if (table && table.get() != this) { ++selected_table_size; - stage_in_source_tables = std::max(stage_in_source_tables, table->getQueryProcessingStage(context, to_stage, query_ptr)); + stage_in_source_tables = std::max(stage_in_source_tables, table->getQueryProcessingStage(context, to_stage, query_info)); } iterator->next(); @@ -159,7 +159,7 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context & Pipe StorageMerge::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, const size_t max_block_size, @@ -259,7 +259,7 @@ Pipe StorageMerge::read( Pipe StorageMerge::createSources( const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const QueryProcessingStage::Enum & processed_stage, const UInt64 max_block_size, const Block & header, @@ -293,7 +293,7 @@ Pipe StorageMerge::createSources( return pipe; } - auto storage_stage = storage->getQueryProcessingStage(*modified_context, QueryProcessingStage::Complete, modified_query_info.query); + auto storage_stage = storage->getQueryProcessingStage(*modified_context, QueryProcessingStage::Complete, modified_query_info); if (processed_stage <= storage_stage) { /// If there are only virtual columns in query, you must request at least one other column. diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 9da71745e2c..cdd8778d69f 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -27,12 +27,12 @@ public: bool supportsFinal() const override { return true; } bool supportsIndexForIn() const override { return true; } - QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const ASTPtr &) const override; + QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, SelectQueryInfo &) const override; Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, @@ -78,7 +78,7 @@ protected: Pipe createSources( const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const QueryProcessingStage::Enum & processed_stage, const UInt64 max_block_size, const Block & header, diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 11e8859e76c..4ec6d748738 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -174,7 +175,7 @@ StorageMergeTree::~StorageMergeTree() Pipe StorageMergeTree::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, @@ -192,31 +193,14 @@ std::optional StorageMergeTree::totalRows() const std::optional StorageMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const { auto metadata_snapshot = getInMemoryMetadataPtr(); - const auto & partition_key = metadata_snapshot->getPartitionKey(); - Names partition_key_columns = partition_key.column_names; - KeyCondition key_condition( - query_info, context, partition_key_columns, partition_key.expression, true /* single_point */, true /* strict */); - if (key_condition.alwaysUnknownOrTrue()) + PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context, true /* strict */); + if (partition_pruner.isUseless()) return {}; - std::unordered_map partition_filter_map; size_t res = 0; auto lock = lockParts(); for (const auto & part : getDataPartsStateRange(DataPartState::Committed)) { - if (part->isEmpty()) - continue; - const auto & partition_id = part->info.partition_id; - bool is_valid; - if (auto it = partition_filter_map.find(partition_id); it != partition_filter_map.end()) - is_valid = it->second; - else - { - const auto & partition_value = part->partition.value; - std::vector index_value(partition_value.begin(), partition_value.end()); - is_valid = key_condition.mayBeTrueInRange(partition_value.size(), index_value.data(), index_value.data(), partition_key.data_types); - partition_filter_map.emplace(partition_id, is_valid); - } - if (is_valid) + if (!partition_pruner.canBePruned(part)) res += part->rows_count; } return res; @@ -555,6 +539,7 @@ std::vector StorageMergeTree::getMutationsStatus() cons for (const MutationCommand & command : entry.commands) { std::stringstream ss; + ss.exceptions(std::ios::failbit); formatAST(*command.ast, ss, false, true); result.push_back(MergeTreeMutationStatus { @@ -1030,6 +1015,7 @@ bool StorageMergeTree::optimize( if (!merge(true, partition_id, true, deduplicate, &disable_reason)) { std::stringstream message; + message.exceptions(std::ios::failbit); message << "Cannot OPTIMIZE table"; if (!disable_reason.empty()) message << ": " << disable_reason; @@ -1052,6 +1038,7 @@ bool StorageMergeTree::optimize( if (!merge(true, partition_id, final, deduplicate, &disable_reason)) { std::stringstream message; + message.exceptions(std::ios::failbit); message << "Cannot OPTIMIZE table"; if (!disable_reason.empty()) message << ": " << disable_reason; diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 71a32fbd203..fd24df468cb 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -40,7 +40,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 210dc09ba86..b9ac2443472 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -56,7 +56,7 @@ StorageMongoDB::StorageMongoDB( Pipe StorageMongoDB::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/StorageMongoDB.h b/src/Storages/StorageMongoDB.h index 7503792a479..3ed03576478 100644 --- a/src/Storages/StorageMongoDB.h +++ b/src/Storages/StorageMongoDB.h @@ -36,7 +36,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index afbca0d9430..e5c59a794e1 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -67,7 +67,7 @@ StorageMySQL::StorageMySQL( Pipe StorageMySQL::read( const Names & column_names_, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info_, + SelectQueryInfo & query_info_, const Context & context_, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size_, diff --git a/src/Storages/StorageMySQL.h b/src/Storages/StorageMySQL.h index a7f98c4379b..acab8f9290e 100644 --- a/src/Storages/StorageMySQL.h +++ b/src/Storages/StorageMySQL.h @@ -40,7 +40,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index 44dbda3b15d..2bf698a6288 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -24,7 +24,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo &, + SelectQueryInfo &, const Context & /*context*/, QueryProcessingStage::Enum /*processing_stage*/, size_t, diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index e2a6438ecfe..f50235b7e7c 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -31,7 +31,9 @@ public: ColumnSizeByName getColumnSizes() const override { return getNested()->getColumnSizes(); } NamesAndTypesList getVirtuals() const override { return getNested()->getVirtuals(); } - QueryProcessingStage::Enum getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, const ASTPtr & ast) const override + + QueryProcessingStage::Enum getQueryProcessingStage( + const Context & context, QueryProcessingStage::Enum to_stage, SelectQueryInfo & ast) const override { return getNested()->getQueryProcessingStage(context, to_stage, ast); } @@ -50,7 +52,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7412031c595..17de704ba40 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -460,6 +461,7 @@ void StorageReplicatedMergeTree::waitMutationToFinishOnReplicas( if (!inactive_replicas.empty()) { std::stringstream exception_message; + exception_message.exceptions(std::ios::failbit); exception_message << "Mutation is not finished because"; if (!inactive_replicas.empty()) @@ -1017,6 +1019,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) parts_to_fetch_blocks += get_blocks_count_in_data_part(name); std::stringstream sanity_report; + sanity_report.exceptions(std::ios::failbit); sanity_report << "There are " << unexpected_parts.size() << " unexpected parts with " << unexpected_parts_rows << " rows (" << unexpected_parts_nonnew << " of them is not just-written with " << unexpected_parts_rows << " rows), " @@ -1041,6 +1044,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) if (insane && !skip_sanity_checks) { std::stringstream why; + why.exceptions(std::ios::failbit); why << "The local set of parts of table " << getStorageID().getNameForLogs() << " doesn't look like the set of parts " << "in ZooKeeper: " << formatReadableQuantity(unexpected_parts_rows) << " rows of " << formatReadableQuantity(total_rows_on_filesystem) @@ -1342,6 +1346,7 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry) // Log source part names just in case { std::stringstream source_parts_msg; + source_parts_msg.exceptions(std::ios::failbit); for (auto i : ext::range(0, entry.source_parts.size())) source_parts_msg << (i != 0 ? ", " : "") << entry.source_parts[i]; @@ -3613,7 +3618,7 @@ ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock StorageReplicatedMerg Pipe StorageReplicatedMergeTree::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, @@ -3662,28 +3667,13 @@ std::optional StorageReplicatedMergeTree::totalRows() const std::optional StorageReplicatedMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const { auto metadata_snapshot = getInMemoryMetadataPtr(); - const auto & partition_key = metadata_snapshot->getPartitionKey(); - Names partition_key_columns = partition_key.column_names; - KeyCondition key_condition( - query_info, context, partition_key_columns, partition_key.expression, true /* single_point */, true /* strict */); - if (key_condition.alwaysUnknownOrTrue()) + PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context, true /* strict */); + if (partition_pruner.isUseless()) return {}; - std::unordered_map partition_filter_map; size_t res = 0; foreachCommittedParts([&](auto & part) { - const auto & partition_id = part->info.partition_id; - bool is_valid; - if (auto it = partition_filter_map.find(partition_id); it != partition_filter_map.end()) - is_valid = it->second; - else - { - const auto & partition_value = part->partition.value; - std::vector index_value(partition_value.begin(), partition_value.end()); - is_valid = key_condition.mayBeTrueInRange(partition_value.size(), index_value.data(), index_value.data(), partition_key.data_types); - partition_filter_map.emplace(partition_id, is_valid); - } - if (is_valid) + if (!partition_pruner.canBePruned(part)) res += part->rows_count; }); return res; @@ -3824,6 +3814,7 @@ bool StorageReplicatedMergeTree::optimize( if (!selected) { std::stringstream message; + message.exceptions(std::ios::failbit); message << "Cannot select parts for optimization"; if (!disable_reason.empty()) message << ": " << disable_reason; diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 92e17412ecc..9cff2ccd9b3 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -90,7 +90,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index e4228c0d4ec..ce9ebbd53b3 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -255,6 +255,7 @@ Strings listFilesWithRegexpMatching(Aws::S3::S3Client & client, const S3::URI & if (!outcome.IsSuccess()) { std::ostringstream message; + message.exceptions(std::ios::failbit); message << "Could not list objects in bucket " << quoteString(request.GetBucket()) << " with prefix " << quoteString(request.GetPrefix()); @@ -287,7 +288,7 @@ Strings listFilesWithRegexpMatching(Aws::S3::S3Client & client, const S3::URI & Pipe StorageS3::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 5a702aa8785..1ecc9409671 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -44,7 +44,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index d24c171caa5..a265df856c1 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -299,7 +299,7 @@ void StorageStripeLog::rename(const String & new_path_to_table_data, const Stora Pipe StorageStripeLog::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/StorageStripeLog.h b/src/Storages/StorageStripeLog.h index ea2b1693169..1f30ddc8d8b 100644 --- a/src/Storages/StorageStripeLog.h +++ b/src/Storages/StorageStripeLog.h @@ -28,7 +28,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 9b698cb3954..e0488ecf06e 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -72,7 +72,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index a59480f0a0d..81eec735c8a 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -427,7 +427,7 @@ void StorageTinyLog::rename(const String & new_path_to_table_data, const Storage Pipe StorageTinyLog::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, diff --git a/src/Storages/StorageTinyLog.h b/src/Storages/StorageTinyLog.h index 39c2994aaed..7d2b7473a21 100644 --- a/src/Storages/StorageTinyLog.h +++ b/src/Storages/StorageTinyLog.h @@ -27,7 +27,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 55c16496ba5..6afff8fb01b 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -196,7 +196,7 @@ std::function IStorageURLBase::getReadPOSTDataCallback( Pipe IStorageURLBase::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 7983ad71520..e4ce87af550 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -22,7 +22,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index 140a3c62eae..500deac5f25 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -23,7 +23,7 @@ StorageValues::StorageValues( Pipe StorageValues::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, diff --git a/src/Storages/StorageValues.h b/src/Storages/StorageValues.h index 8a1a06eeb54..5729f245149 100644 --- a/src/Storages/StorageValues.h +++ b/src/Storages/StorageValues.h @@ -18,7 +18,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 4b7733c1cd2..f710a1289aa 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -50,7 +50,7 @@ StorageView::StorageView( Pipe StorageView::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index 682c7424b98..56c9b0b4b1f 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -24,7 +24,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index 3350a4352db..0b4251bc912 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -87,7 +87,7 @@ std::function StorageXDBC::getReadPOSTDataCallback( Pipe StorageXDBC::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h index 7f93cbcd320..8524a03503a 100644 --- a/src/Storages/StorageXDBC.h +++ b/src/Storages/StorageXDBC.h @@ -18,7 +18,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index 81650d669dc..d83a71c2592 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -35,7 +35,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 17378f8d6bf..b0371fa12de 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -242,7 +242,7 @@ private: Pipe StorageSystemColumns::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, diff --git a/src/Storages/System/StorageSystemColumns.h b/src/Storages/System/StorageSystemColumns.h index d90cec763c9..c4f35485612 100644 --- a/src/Storages/System/StorageSystemColumns.h +++ b/src/Storages/System/StorageSystemColumns.h @@ -20,7 +20,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index bc5b3593991..f96566026b1 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -32,7 +32,7 @@ StorageSystemDetachedParts::StorageSystemDetachedParts(const StorageID & table_i Pipe StorageSystemDetachedParts::read( const Names & /* column_names */, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/System/StorageSystemDetachedParts.h b/src/Storages/System/StorageSystemDetachedParts.h index c0f1db51642..4c6970dadd6 100644 --- a/src/Storages/System/StorageSystemDetachedParts.h +++ b/src/Storages/System/StorageSystemDetachedParts.h @@ -25,7 +25,7 @@ protected: Pipe read( const Names & /* column_names */, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index 4e9a654025a..fbbee51e34e 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -29,7 +29,7 @@ StorageSystemDisks::StorageSystemDisks(const StorageID & table_id_) Pipe StorageSystemDisks::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/System/StorageSystemDisks.h b/src/Storages/System/StorageSystemDisks.h index d2075c3c784..cff05242019 100644 --- a/src/Storages/System/StorageSystemDisks.h +++ b/src/Storages/System/StorageSystemDisks.h @@ -23,7 +23,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemDistributionQueue.cpp b/src/Storages/System/StorageSystemDistributionQueue.cpp index 39ccea64e26..c8d8c88ec08 100644 --- a/src/Storages/System/StorageSystemDistributionQueue.cpp +++ b/src/Storages/System/StorageSystemDistributionQueue.cpp @@ -10,6 +10,77 @@ #include #include +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +} + + +namespace +{ + +using namespace DB; + +/// Drop "password" from the path. +/// +/// In case of use_compact_format_in_distributed_parts_names=0 the path format is: +/// +/// user[:password]@host:port#default_database format +/// +/// And password should be masked out. +/// +/// See: +/// - Cluster::Address::fromFullString() +/// - Cluster::Address::toFullString() +std::string maskDataPath(const std::string & path) +{ + std::string masked_path = path; + + if (!masked_path.ends_with('/')) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid path format"); + + masked_path.pop_back(); + + size_t node_pos = masked_path.rfind('/'); + /// Loop through each node, that separated with a comma + while (node_pos != std::string::npos) + { + ++node_pos; + + size_t user_pw_end = masked_path.find('@', node_pos); + if (user_pw_end == std::string::npos) + { + /// Likey new format (use_compact_format_in_distributed_parts_names=1) + return path; + } + + size_t pw_start = masked_path.find(':', node_pos); + if (pw_start > user_pw_end) + { + /// No password in path + return path; + } + ++pw_start; + + size_t pw_length = user_pw_end - pw_start; + /// Replace with a single '*' to hide even the password length. + masked_path.replace(pw_start, pw_length, 1, '*'); + + /// "," cannot be in the node specification since it will be encoded in hex. + node_pos = masked_path.find(',', node_pos); + } + + masked_path.push_back('/'); + + return masked_path; +} + +} namespace DB { @@ -103,7 +174,7 @@ void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, cons size_t col_num = 0; res_columns[col_num++]->insert(database); res_columns[col_num++]->insert(table); - res_columns[col_num++]->insert(status.path); + res_columns[col_num++]->insert(maskDataPath(status.path)); res_columns[col_num++]->insert(status.is_blocked); res_columns[col_num++]->insert(status.error_count); res_columns[col_num++]->insert(status.files_count); diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index af8073e06dc..677e0c02400 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -125,7 +125,7 @@ StorageSystemNumbers::StorageSystemNumbers(const StorageID & table_id, bool mult Pipe StorageSystemNumbers::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo &, + SelectQueryInfo &, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemNumbers.h b/src/Storages/System/StorageSystemNumbers.h index f907f3d5f93..d12c28c1ce2 100644 --- a/src/Storages/System/StorageSystemNumbers.h +++ b/src/Storages/System/StorageSystemNumbers.h @@ -32,7 +32,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp index 57b87e165a9..c456b22e97b 100644 --- a/src/Storages/System/StorageSystemOne.cpp +++ b/src/Storages/System/StorageSystemOne.cpp @@ -23,7 +23,7 @@ StorageSystemOne::StorageSystemOne(const StorageID & table_id_) Pipe StorageSystemOne::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo &, + SelectQueryInfo &, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/System/StorageSystemOne.h b/src/Storages/System/StorageSystemOne.h index 3469d6ccb29..8228ce465e0 100644 --- a/src/Storages/System/StorageSystemOne.h +++ b/src/Storages/System/StorageSystemOne.h @@ -24,7 +24,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index d10346af89f..5a3d7027dd4 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -225,7 +225,7 @@ StoragesInfo StoragesInfoStream::next() Pipe StorageSystemPartsBase::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 2cb19f8f17d..eec6d5ab331 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -58,7 +58,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index ab54d760873..0af67ab6986 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -59,7 +59,7 @@ StorageSystemReplicas::StorageSystemReplicas(const StorageID & table_id_) Pipe StorageSystemReplicas::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/System/StorageSystemReplicas.h b/src/Storages/System/StorageSystemReplicas.h index c198cc29ddc..d9e364a28c0 100644 --- a/src/Storages/System/StorageSystemReplicas.h +++ b/src/Storages/System/StorageSystemReplicas.h @@ -21,7 +21,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemStoragePolicies.cpp b/src/Storages/System/StorageSystemStoragePolicies.cpp index 86ce9de081c..7a10b986c11 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.cpp +++ b/src/Storages/System/StorageSystemStoragePolicies.cpp @@ -38,7 +38,7 @@ StorageSystemStoragePolicies::StorageSystemStoragePolicies(const StorageID & tab Pipe StorageSystemStoragePolicies::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/System/StorageSystemStoragePolicies.h b/src/Storages/System/StorageSystemStoragePolicies.h index 15e5e497785..afd5e672d66 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.h +++ b/src/Storages/System/StorageSystemStoragePolicies.h @@ -23,7 +23,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 78a6ef9b1e0..a17c10c84c9 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -487,7 +487,7 @@ private: Pipe StorageSystemTables::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, diff --git a/src/Storages/System/StorageSystemTables.h b/src/Storages/System/StorageSystemTables.h index 259eb096ea7..2e0b3386f8c 100644 --- a/src/Storages/System/StorageSystemTables.h +++ b/src/Storages/System/StorageSystemTables.h @@ -21,7 +21,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index 9f0b81263e3..70c67683b25 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -97,6 +97,7 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, const Context & auth_params_json.set("server", authentication.getServerName()); std::ostringstream oss; + oss.exceptions(std::ios::failbit); Poco::JSON::Stringifier::stringify(auth_params_json, oss); const auto str = oss.str(); diff --git a/src/Storages/System/StorageSystemZeros.cpp b/src/Storages/System/StorageSystemZeros.cpp index 270dcc81cc1..ed5ab93369a 100644 --- a/src/Storages/System/StorageSystemZeros.cpp +++ b/src/Storages/System/StorageSystemZeros.cpp @@ -93,7 +93,7 @@ StorageSystemZeros::StorageSystemZeros(const StorageID & table_id_, bool multith Pipe StorageSystemZeros::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo &, + SelectQueryInfo &, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemZeros.h b/src/Storages/System/StorageSystemZeros.h index 41de3ce6246..04733f550c1 100644 --- a/src/Storages/System/StorageSystemZeros.h +++ b/src/Storages/System/StorageSystemZeros.h @@ -23,7 +23,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/tests/gtest_storage_log.cpp b/src/Storages/tests/gtest_storage_log.cpp index 8de14b53471..821fbc4b279 100644 --- a/src/Storages/tests/gtest_storage_log.cpp +++ b/src/Storages/tests/gtest_storage_log.cpp @@ -10,8 +10,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -114,10 +116,12 @@ std::string readData(DB::StoragePtr & table, const DB::Context & context) Names column_names; column_names.push_back("a"); - QueryProcessingStage::Enum stage = table->getQueryProcessingStage(context); + SelectQueryInfo query_info; + QueryProcessingStage::Enum stage = table->getQueryProcessingStage( + context, QueryProcessingStage::Complete, query_info); QueryPipeline pipeline; - pipeline.init(table->read(column_names, metadata_snapshot, {}, context, stage, 8192, 1)); + pipeline.init(table->read(column_names, metadata_snapshot, query_info, context, stage, 8192, 1)); BlockInputStreamPtr in = std::make_shared(std::move(pipeline)); Block sample; @@ -127,7 +131,10 @@ std::string readData(DB::StoragePtr & table, const DB::Context & context) sample.insert(std::move(col)); } + tryRegisterFormats(); + std::ostringstream ss; + ss.exceptions(std::ios::failbit); WriteBufferFromOStream out_buf(ss); BlockOutputStreamPtr output = FormatFactory::instance().getOutput("Values", out_buf, sample, context); diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 31fc49582ad..48811c1c86a 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include using namespace DB; diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index 2556cd10648..3148ab1112a 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -221,6 +221,7 @@ String transformQueryForExternalDatabase( dropAliases(select_ptr); std::stringstream out; + out.exceptions(std::ios::failbit); IAST::FormatSettings settings(out, true); settings.identifier_quoting_style = identifier_quoting_style; settings.always_quote_identifiers = identifier_quoting_style != IdentifierQuotingStyle::None; diff --git a/src/Storages/ya.make b/src/Storages/ya.make index 0f14826d859..e0c6cab602f 100644 --- a/src/Storages/ya.make +++ b/src/Storages/ya.make @@ -85,6 +85,7 @@ SRCS( MergeTree/MergeType.cpp MergeTree/MergedBlockOutputStream.cpp MergeTree/MergedColumnOnlyOutputStream.cpp + MergeTree/PartitionPruner.cpp MergeTree/ReplicatedFetchList.cpp MergeTree/ReplicatedMergeTreeAddress.cpp MergeTree/ReplicatedMergeTreeAltersSequence.cpp diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index 2e34e82ce36..22a07a4d284 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -243,6 +243,7 @@ TableFunctionRemote::TableFunctionRemote(const std::string & name_, bool secure_ is_cluster_function = (name == "cluster" || name == "clusterAllReplicas"); std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << "Table function '" << name + "' requires from 2 to " << (is_cluster_function ? 3 : 5) << " parameters" << ": , , " << (is_cluster_function ? "" : ", [username, [password]]."); diff --git a/tests/config/config.d/test_cluster_with_incorrect_pw.xml b/tests/config/config.d/test_cluster_with_incorrect_pw.xml new file mode 100644 index 00000000000..109e35afc37 --- /dev/null +++ b/tests/config/config.d/test_cluster_with_incorrect_pw.xml @@ -0,0 +1,21 @@ + + + + + true + + 127.0.0.1 + 9000 + + foo + + + 127.0.0.2 + 9000 + + foo + + + + + diff --git a/tests/config/install.sh b/tests/config/install.sh index ff96e46c947..f6fae181ac8 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -27,6 +27,7 @@ ln -sf $SRC_PATH/config.d/secure_ports.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/clusters.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/graphite.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/database_atomic.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/test_cluster_with_incorrect_pw.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/users.d/log_queries.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/readonly.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/access_management.xml $DEST_SERVER_PATH/users.d/ diff --git a/tests/integration/test_distributed_format/test.py b/tests/integration/test_distributed_format/test.py index 607154e37f1..22054077544 100644 --- a/tests/integration/test_distributed_format/test.py +++ b/tests/integration/test_distributed_format/test.py @@ -47,10 +47,12 @@ def test_single_file(started_cluster, cluster): def test_two_files(started_cluster, cluster): node.query( "create table test.distr_2 (x UInt64, s String) engine = Distributed('{}', database, table)".format(cluster)) - node.query("insert into test.distr_2 values (0, '_'), (1, 'a')", - settings={"use_compact_format_in_distributed_parts_names": "1"}) - node.query("insert into test.distr_2 values (2, 'bb'), (3, 'ccc')", - settings={"use_compact_format_in_distributed_parts_names": "1"}) + node.query("insert into test.distr_2 values (0, '_'), (1, 'a')", settings={ + "use_compact_format_in_distributed_parts_names": "1", + }) + node.query("insert into test.distr_2 values (2, 'bb'), (3, 'ccc')", settings={ + "use_compact_format_in_distributed_parts_names": "1", + }) query = "select * from file('/var/lib/clickhouse/data/test/distr_2/shard1_replica1/{1,2,3,4}.bin', 'Distributed') order by x" out = node.exec_in_container(['/usr/bin/clickhouse', 'local', '--stacktrace', '-q', query]) @@ -70,7 +72,9 @@ def test_two_files(started_cluster, cluster): def test_single_file_old(started_cluster, cluster): node.query( "create table test.distr_3 (x UInt64, s String) engine = Distributed('{}', database, table)".format(cluster)) - node.query("insert into test.distr_3 values (1, 'a'), (2, 'bb'), (3, 'ccc')") + node.query("insert into test.distr_3 values (1, 'a'), (2, 'bb'), (3, 'ccc')", settings={ + "use_compact_format_in_distributed_parts_names": "0", + }) query = "select * from file('/var/lib/clickhouse/data/test/distr_3/default@not_existing:9000/1.bin', 'Distributed')" out = node.exec_in_container(['/usr/bin/clickhouse', 'local', '--stacktrace', '-q', query]) diff --git a/tests/integration/test_distributed_storage_configuration/test.py b/tests/integration/test_distributed_storage_configuration/test.py index d293b96399d..976fc5211a7 100644 --- a/tests/integration/test_distributed_storage_configuration/test.py +++ b/tests/integration/test_distributed_storage_configuration/test.py @@ -48,7 +48,9 @@ def test_insert(start_cluster): # manual only (but only for remote node) node.query('SYSTEM STOP DISTRIBUTED SENDS test.dist_foo') - node.query('INSERT INTO test.dist_foo SELECT * FROM numbers(100)') + node.query('INSERT INTO test.dist_foo SELECT * FROM numbers(100)', settings={ + 'use_compact_format_in_distributed_parts_names': '0', + }) assert _files_in_dist_mon(node, 'disk1', 'dist_foo') == 1 assert _files_in_dist_mon(node, 'disk2', 'dist_foo') == 0 @@ -61,7 +63,9 @@ def test_insert(start_cluster): # node.query('RENAME TABLE test.dist_foo TO test.dist2_foo') - node.query('INSERT INTO test.dist2_foo SELECT * FROM numbers(100)') + node.query('INSERT INTO test.dist2_foo SELECT * FROM numbers(100)', settings={ + 'use_compact_format_in_distributed_parts_names': '0', + }) assert _files_in_dist_mon(node, 'disk1', 'dist2_foo') == 0 assert _files_in_dist_mon(node, 'disk2', 'dist2_foo') == 1 diff --git a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/social.proto b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/social.proto new file mode 100644 index 00000000000..3bf82737fa5 --- /dev/null +++ b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/social.proto @@ -0,0 +1,6 @@ +syntax = "proto3"; + +message User { + string username = 1; + int32 timestamp = 2; +} \ No newline at end of file diff --git a/tests/integration/test_storage_kafka/social_pb2.py b/tests/integration/test_storage_kafka/social_pb2.py new file mode 100644 index 00000000000..eeba5efc8b1 --- /dev/null +++ b/tests/integration/test_storage_kafka/social_pb2.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: social.proto + +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='social.proto', + package='', + syntax='proto3', + serialized_options=None, + serialized_pb=b'\n\x0csocial.proto\"+\n\x04User\x12\x10\n\x08username\x18\x01 \x01(\t\x12\x11\n\ttimestamp\x18\x02 \x01(\x05\x62\x06proto3' +) + + + + +_USER = _descriptor.Descriptor( + name='User', + full_name='User', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='username', full_name='User.username', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='timestamp', full_name='User.timestamp', index=1, + number=2, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=16, + serialized_end=59, +) + +DESCRIPTOR.message_types_by_name['User'] = _USER +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +User = _reflection.GeneratedProtocolMessageType('User', (_message.Message,), { + 'DESCRIPTOR' : _USER, + '__module__' : 'social_pb2' + # @@protoc_insertion_point(class_scope:User) + }) +_sym_db.RegisterMessage(User) + + +# @@protoc_insertion_point(module_scope) diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 6ef37c1e231..5d943361414 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -30,6 +30,8 @@ libprotoc 3.0.0 protoc --python_out=. kafka.proto """ from . import kafka_pb2 +from . import social_pb2 + # TODO: add test for run-time offset update in CH, if we manually update it on Kafka side. # TODO: add test for SELECT LIMIT is working. @@ -115,6 +117,20 @@ def kafka_produce_protobuf_messages_no_delimeters(topic, start_index, num_messag producer.flush() print("Produced {} messages for topic {}".format(num_messages, topic)) +def kafka_produce_protobuf_social(topic, start_index, num_messages): + data = b'' + for i in range(start_index, start_index + num_messages): + msg = social_pb2.User() + msg.username='John Doe {}'.format(i) + msg.timestamp=1000000+i + serialized_msg = msg.SerializeToString() + data = data + _VarintBytes(len(serialized_msg)) + serialized_msg + producer = KafkaProducer(bootstrap_servers="localhost:9092", value_serializer=producer_serializer) + producer.send(topic=topic, value=data) + producer.flush() + print(("Produced {} messages for topic {}".format(num_messages, topic))) + + def avro_confluent_message(schema_registry_client, value): # type: (CachedSchemaRegistryClient, dict) -> str @@ -982,6 +998,84 @@ def test_kafka_protobuf(kafka_cluster): kafka_check_result(result, True) +@pytest.mark.timeout(180) +def test_kafka_string_field_on_first_position_in_protobuf(kafka_cluster): +# https://github.com/ClickHouse/ClickHouse/issues/12615 + + instance.query(''' +CREATE TABLE test.kafka ( + username String, + timestamp Int32 + ) ENGINE = Kafka() +SETTINGS + kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'string_field_on_first_position_in_protobuf', + kafka_group_name = 'string_field_on_first_position_in_protobuf', + kafka_format = 'Protobuf', + kafka_schema = 'social:User'; + + SELECT * FROM test.kafka; + ''') + + kafka_produce_protobuf_social('string_field_on_first_position_in_protobuf', 0, 20) + kafka_produce_protobuf_social('string_field_on_first_position_in_protobuf', 20, 1) + kafka_produce_protobuf_social('string_field_on_first_position_in_protobuf', 21, 29) + + result = instance.query('SELECT * FROM test.kafka', ignore_error=True) + expected = '''\ +John Doe 0 1000000 +John Doe 1 1000001 +John Doe 2 1000002 +John Doe 3 1000003 +John Doe 4 1000004 +John Doe 5 1000005 +John Doe 6 1000006 +John Doe 7 1000007 +John Doe 8 1000008 +John Doe 9 1000009 +John Doe 10 1000010 +John Doe 11 1000011 +John Doe 12 1000012 +John Doe 13 1000013 +John Doe 14 1000014 +John Doe 15 1000015 +John Doe 16 1000016 +John Doe 17 1000017 +John Doe 18 1000018 +John Doe 19 1000019 +John Doe 20 1000020 +John Doe 21 1000021 +John Doe 22 1000022 +John Doe 23 1000023 +John Doe 24 1000024 +John Doe 25 1000025 +John Doe 26 1000026 +John Doe 27 1000027 +John Doe 28 1000028 +John Doe 29 1000029 +John Doe 30 1000030 +John Doe 31 1000031 +John Doe 32 1000032 +John Doe 33 1000033 +John Doe 34 1000034 +John Doe 35 1000035 +John Doe 36 1000036 +John Doe 37 1000037 +John Doe 38 1000038 +John Doe 39 1000039 +John Doe 40 1000040 +John Doe 41 1000041 +John Doe 42 1000042 +John Doe 43 1000043 +John Doe 44 1000044 +John Doe 45 1000045 +John Doe 46 1000046 +John Doe 47 1000047 +John Doe 48 1000048 +John Doe 49 1000049 +''' + assert TSV(result) == TSV(expected) + @pytest.mark.timeout(30) def test_kafka_protobuf_no_delimiter(kafka_cluster): instance.query(''' @@ -2117,7 +2211,7 @@ def test_kafka_duplicates_when_commit_failed(kafka_cluster): kafka_format = 'JSONEachRow', kafka_max_block_size = 20, kafka_flush_interval_ms = 1000; - + SELECT * FROM test.kafka LIMIT 1; /* do subscription & assignment in advance (it can take different time, test rely on timing, so can flap otherwise) */ ''') diff --git a/tests/performance/consistent_hashes.xml b/tests/performance/consistent_hashes.xml index 087187497ed..3610579f545 100644 --- a/tests/performance/consistent_hashes.xml +++ b/tests/performance/consistent_hashes.xml @@ -18,7 +18,4 @@ SELECT {hash_func}(number, {buckets}) FROM numbers(10000000) FORMAT Null - - - SELECT sumburConsistentHash(toUInt32(number), 2) FROM numbers(10000000) FORMAT Null diff --git a/tests/performance/quantile_merge.xml b/tests/performance/quantile_merge.xml index 7f4d85a254c..0ddb688d8eb 100644 --- a/tests/performance/quantile_merge.xml +++ b/tests/performance/quantile_merge.xml @@ -1,3 +1,3 @@ - SELECT quantileMerge(arrayJoin(arrayMap(x -> state, range(1000000)))) FROM (SELECT quantileState(rand()) AS state FROM numbers(10000)) + SELECT quantileMerge(arrayJoin(arrayMap(x -> state, range(500000)))) FROM (SELECT quantileState(rand()) AS state FROM numbers(10000)) diff --git a/tests/queries/0_stateless/01304_direct_io.sh b/tests/queries/0_stateless/01304_direct_io.sh index dcf2adbd64f..244e4c6e02d 100755 --- a/tests/queries/0_stateless/01304_direct_io.sh +++ b/tests/queries/0_stateless/01304_direct_io.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT --multiquery --query " INSERT INTO bug SELECT rand64(), '2020-06-07' FROM numbers(50000000); OPTIMIZE TABLE bug FINAL;" -$CLICKHOUSE_BENCHMARK --database "$CLICKHOUSE_DATABASE" --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" 1>/dev/null 2>"$CLICKHOUSE_TMP"/err +$CLICKHOUSE_BENCHMARK --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" 1>/dev/null 2>"$CLICKHOUSE_TMP"/err cat "$CLICKHOUSE_TMP"/err | grep Exception cat "$CLICKHOUSE_TMP"/err | grep Loaded diff --git a/tests/queries/0_stateless/01458_named_tuple_millin.reference b/tests/queries/0_stateless/01458_named_tuple_millin.reference index b826566c74b..d6d6d7ae8d4 100644 --- a/tests/queries/0_stateless/01458_named_tuple_millin.reference +++ b/tests/queries/0_stateless/01458_named_tuple_millin.reference @@ -1,12 +1,12 @@ CREATE TABLE default.tuple ( - `j` Tuple( a Int8, b String) + `j` Tuple(a Int8, b String) ) ENGINE = Memory j Tuple(a Int8, b String) CREATE TABLE default.tuple ( - `j` Tuple( a Int8, b String) + `j` Tuple(a Int8, b String) ) ENGINE = Memory j Tuple(a Int8, b String) diff --git a/tests/queries/0_stateless/01458_named_tuple_millin.sql b/tests/queries/0_stateless/01458_named_tuple_millin.sql index 7687dd4c158..ea730e65bb7 100644 --- a/tests/queries/0_stateless/01458_named_tuple_millin.sql +++ b/tests/queries/0_stateless/01458_named_tuple_millin.sql @@ -10,9 +10,7 @@ SHOW CREATE TABLE tuple FORMAT TSVRaw; DESC tuple; DROP TABLE tuple; -CREATE TABLE tuple -ENGINE = Memory AS -SELECT CAST((1, 'Test'), 'Tuple(a Int8, b String)') AS j; +CREATE TABLE tuple ENGINE = Memory AS SELECT CAST((1, 'Test'), 'Tuple(a Int8, b String)') AS j; SHOW CREATE TABLE tuple FORMAT TSVRaw; DESC tuple; diff --git a/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.reference b/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.reference new file mode 100644 index 00000000000..9c6ae9c65ab --- /dev/null +++ b/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.reference @@ -0,0 +1,16 @@ +0 +\N +0 +\N +\N +\N +0 +\N +45 +45 +10 +10 +45 +45 +10 +10 diff --git a/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.sql b/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.sql new file mode 100644 index 00000000000..e76ce667bbc --- /dev/null +++ b/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.sql @@ -0,0 +1,36 @@ +DROP TABLE IF EXISTS defaults; + +CREATE TABLE defaults +( + n Int8 +)ENGINE = Memory(); + +SELECT sum(n) FROM defaults; +SELECT sumOrNull(n) FROM defaults; +SELECT count(n) FROM defaults; +SELECT countOrNull(n) FROM defaults; + +SET aggregate_functions_null_for_empty=1; + +SELECT sum(n) FROM defaults; +SELECT sumOrNull(n) FROM defaults; +SELECT count(n) FROM defaults; +SELECT countOrNull(n) FROM defaults; + +INSERT INTO defaults SELECT * FROM numbers(10); + +SET aggregate_functions_null_for_empty=0; + +SELECT sum(n) FROM defaults; +SELECT sumOrNull(n) FROM defaults; +SELECT count(n) FROM defaults; +SELECT countOrNull(n) FROM defaults; + +SET aggregate_functions_null_for_empty=1; + +SELECT sum(n) FROM defaults; +SELECT sumOrNull(n) FROM defaults; +SELECT count(n) FROM defaults; +SELECT countOrNull(n) FROM defaults; + +DROP TABLE defaults; diff --git a/tests/queries/0_stateless/01532_collate_in_low_cardinality.reference b/tests/queries/0_stateless/01532_collate_in_low_cardinality.reference new file mode 100644 index 00000000000..fbffea8df5a --- /dev/null +++ b/tests/queries/0_stateless/01532_collate_in_low_cardinality.reference @@ -0,0 +1,64 @@ +Order by without collate +1 Ё +2 А +2 Я +1 а +2 я +1 ё +Order by with collate +1 а +2 А +1 ё +1 Ё +2 я +2 Я +Order by tuple without collate +1 Ё +1 а +1 ё +2 А +2 Я +2 я +Order by tuple with collate +1 а +1 ё +1 Ё +2 А +2 я +2 Я +Order by without collate +1 Ё +2 А +2 Я +1 а +2 я +1 ё +1 \N +2 \N +Order by with collate +1 а +2 А +1 ё +1 Ё +2 я +2 Я +1 \N +2 \N +Order by tuple without collate +1 Ё +1 а +1 ё +1 \N +2 А +2 Я +2 я +2 \N +Order by tuple with collate +1 а +1 ё +1 Ё +1 \N +2 А +2 я +2 Я +2 \N diff --git a/tests/queries/0_stateless/01532_collate_in_low_cardinality.sql b/tests/queries/0_stateless/01532_collate_in_low_cardinality.sql new file mode 100644 index 00000000000..b6fba26eb2d --- /dev/null +++ b/tests/queries/0_stateless/01532_collate_in_low_cardinality.sql @@ -0,0 +1,33 @@ +DROP TABLE IF EXISTS test_collate; +DROP TABLE IF EXISTS test_collate_null; + +CREATE TABLE test_collate (x UInt32, s LowCardinality(String)) ENGINE=Memory(); +CREATE TABLE test_collate_null (x UInt32, s LowCardinality(Nullable(String))) ENGINE=Memory(); + +INSERT INTO test_collate VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (2, 'А'), (2, 'я'), (2, 'Я'); +INSERT INTO test_collate_null VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (2, 'А'), (2, 'я'), (2, 'Я'), (1, null), (2, null); + + +SELECT 'Order by without collate'; +SELECT * FROM test_collate ORDER BY s; +SELECT 'Order by with collate'; +SELECT * FROM test_collate ORDER BY s COLLATE 'ru'; + +SELECT 'Order by tuple without collate'; +SELECT * FROM test_collate ORDER BY x, s; +SELECT 'Order by tuple with collate'; +SELECT * FROM test_collate ORDER BY x, s COLLATE 'ru'; + +SELECT 'Order by without collate'; +SELECT * FROM test_collate_null ORDER BY s; +SELECT 'Order by with collate'; +SELECT * FROM test_collate_null ORDER BY s COLLATE 'ru'; + +SELECT 'Order by tuple without collate'; +SELECT * FROM test_collate_null ORDER BY x, s; +SELECT 'Order by tuple with collate'; +SELECT * FROM test_collate_null ORDER BY x, s COLLATE 'ru'; + + +DROP TABLE test_collate; +DROP TABLE test_collate_null; diff --git a/tests/queries/0_stateless/01533_collate_in_nullable.reference b/tests/queries/0_stateless/01533_collate_in_nullable.reference new file mode 100644 index 00000000000..6bb06cbc8b5 --- /dev/null +++ b/tests/queries/0_stateless/01533_collate_in_nullable.reference @@ -0,0 +1,36 @@ +Order by without collate +1 Ё +2 А +2 Я +1 а +2 я +1 ё +1 \N +2 \N +Order by with collate +1 а +2 А +1 ё +1 Ё +2 я +2 Я +1 \N +2 \N +Order by tuple without collate +1 Ё +1 а +1 ё +1 \N +2 А +2 Я +2 я +2 \N +Order by tuple with collate +1 а +1 ё +1 Ё +1 \N +2 А +2 я +2 Я +2 \N diff --git a/tests/queries/0_stateless/01533_collate_in_nullable.sql b/tests/queries/0_stateless/01533_collate_in_nullable.sql new file mode 100644 index 00000000000..40b48bee465 --- /dev/null +++ b/tests/queries/0_stateless/01533_collate_in_nullable.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS test_collate; + +CREATE TABLE test_collate (x UInt32, s Nullable(String)) ENGINE=Memory(); + +INSERT INTO test_collate VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (1, null), (2, 'А'), (2, 'я'), (2, 'Я'), (2, null); + +SELECT 'Order by without collate'; +SELECT * FROM test_collate ORDER BY s; +SELECT 'Order by with collate'; +SELECT * FROM test_collate ORDER BY s COLLATE 'ru'; + +SELECT 'Order by tuple without collate'; +SELECT * FROM test_collate ORDER BY x, s; +SELECT 'Order by tuple with collate'; +SELECT * FROM test_collate ORDER BY x, s COLLATE 'ru'; + +DROP TABLE test_collate; + diff --git a/tests/queries/0_stateless/01540_verbatim_partition_pruning.reference b/tests/queries/0_stateless/01540_verbatim_partition_pruning.reference new file mode 100644 index 00000000000..90888a9eaf5 --- /dev/null +++ b/tests/queries/0_stateless/01540_verbatim_partition_pruning.reference @@ -0,0 +1,3 @@ +2 3 +9 5 +8 4 diff --git a/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql b/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql new file mode 100644 index 00000000000..2ef9c9e8917 --- /dev/null +++ b/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql @@ -0,0 +1,23 @@ +drop table if exists xy; + +create table xy(x int, y int) engine MergeTree partition by intHash64(x) % 2 order by y settings index_granularity = 1; + +-- intHash64(0) % 2 = 0 +-- intHash64(2) % 2 = 1 +-- intHash64(8) % 2 = 0 +-- intHash64(9) % 2 = 1 +insert into xy values (0, 2), (2, 3), (8, 4), (9, 5); + +-- Now we have two partitions: 0 and 1, each of which contains 2 values. +-- minmax index for the first partition is 0 <= x <= 8 +-- minmax index for the second partition is 2 <= x <= 9 + +SET max_rows_to_read = 2; + +select * from xy where intHash64(x) % 2 = intHash64(2) % 2; + +-- Equality is another special operator that can be treated as an always monotonic indicator for deterministic functions. +-- minmax index is not enough. +select * from xy where x = 8; + +drop table if exists xy; diff --git a/tests/queries/0_stateless/01542_collate_in_array.reference b/tests/queries/0_stateless/01542_collate_in_array.reference new file mode 100644 index 00000000000..2c5a23066f3 --- /dev/null +++ b/tests/queries/0_stateless/01542_collate_in_array.reference @@ -0,0 +1,50 @@ +1 ['а'] +2 ['А'] +1 ['ё'] +1 ['ё','а'] +2 ['ё','а','а'] +1 ['ё','я'] +1 ['Ё'] +2 ['я','а'] +2 ['Я'] + +1 ['а'] +1 ['ё'] +1 ['ё','а'] +1 ['ё','я'] +1 ['Ё'] +2 ['А'] +2 ['ё','а','а'] +2 ['я','а'] +2 ['Я'] + +1 ['а'] +2 ['А'] +1 ['ё'] +1 ['ё','а'] +2 ['ё','а','а',NULL] +1 ['ё',NULL,'я'] +1 ['Ё'] +2 ['я'] +2 [NULL,'Я'] + +1 ['а'] +1 ['ё'] +1 ['ё','а'] +1 ['ё',NULL,'я'] +1 ['Ё'] +2 ['А'] +2 ['ё','а','а',NULL] +2 ['я'] +2 [NULL,'Я'] + +2 [['а','а'],['я','ё']] +1 [['а','Ё'],['ё','я']] +1 [['а','я'],['а','ё']] +2 [['ё']] + +1 [['а','Ё'],['ё','я']] +1 [['а','я'],['а','ё']] +2 [['а','а'],['я','ё']] +2 [['ё']] + diff --git a/tests/queries/0_stateless/01542_collate_in_array.sql b/tests/queries/0_stateless/01542_collate_in_array.sql new file mode 100644 index 00000000000..dd0ec769e7d --- /dev/null +++ b/tests/queries/0_stateless/01542_collate_in_array.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS collate_test1; +DROP TABLE IF EXISTS collate_test2; +DROP TABLE IF EXISTS collate_test3; + +CREATE TABLE collate_test1 (x UInt32, s Array(String)) ENGINE=Memory(); +CREATE TABLE collate_test2 (x UInt32, s Array(LowCardinality(Nullable(String)))) ENGINE=Memory(); +CREATE TABLE collate_test3 (x UInt32, s Array(Array(String))) ENGINE=Memory(); + +INSERT INTO collate_test1 VALUES (1, ['Ё']), (1, ['ё']), (1, ['а']), (2, ['А']), (2, ['я', 'а']), (2, ['Я']), (1, ['ё','а']), (1, ['ё', 'я']), (2, ['ё', 'а', 'а']); +INSERT INTO collate_test2 VALUES (1, ['Ё']), (1, ['ё']), (1, ['а']), (2, ['А']), (2, ['я']), (2, [null, 'Я']), (1, ['ё','а']), (1, ['ё', null, 'я']), (2, ['ё', 'а', 'а', null]); +INSERT INTO collate_test3 VALUES (1, [['а', 'я'], ['а', 'ё']]), (1, [['а', 'Ё'], ['ё', 'я']]), (2, [['ё']]), (2, [['а', 'а'], ['я', 'ё']]); + +SELECT * FROM collate_test1 ORDER BY s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test1 ORDER BY x, s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test2 ORDER BY s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test2 ORDER BY x, s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test3 ORDER BY s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test3 ORDER BY x, s COLLATE 'ru'; +SELECT ''; + +DROP TABLE collate_test1; +DROP TABLE collate_test2; +DROP TABLE collate_test3; + diff --git a/tests/queries/0_stateless/01543_collate_in_tuple.reference b/tests/queries/0_stateless/01543_collate_in_tuple.reference new file mode 100644 index 00000000000..fe8f935f0a6 --- /dev/null +++ b/tests/queries/0_stateless/01543_collate_in_tuple.reference @@ -0,0 +1,60 @@ +1 (1,'а') +1 (1,'ё') +1 (1,'Ё') +2 (1,'я') +1 (2,'а') +2 (2,'А') +2 (2,'Я') +1 (3,'я') + +1 (1,'а') +1 (1,'ё') +1 (1,'Ё') +1 (2,'а') +1 (3,'я') +2 (1,'я') +2 (2,'А') +2 (2,'Я') + +1 (1,'а') +1 (1,'ё') +1 (1,'Ё') +2 (1,'я') +1 (1,NULL) +2 (2,'А') +2 (2,'Я') +1 (2,NULL) +2 (2,NULL) +1 (3,'я') + +1 (1,'а') +1 (1,'ё') +1 (1,'Ё') +1 (1,NULL) +1 (2,NULL) +1 (3,'я') +2 (1,'я') +2 (2,'А') +2 (2,'Я') +2 (2,NULL) + +2 (1,(1,['А'])) +2 (1,(1,['ё','а','а'])) +1 (1,(1,['Ё'])) +2 (1,(1,['Я'])) +1 (1,(2,['а'])) +1 (1,(2,['ё','я'])) +1 (2,(1,['ё'])) +1 (2,(1,['ё','а'])) +2 (2,(1,['я'])) + +1 (1,(1,['Ё'])) +1 (1,(2,['а'])) +1 (1,(2,['ё','я'])) +1 (2,(1,['ё'])) +1 (2,(1,['ё','а'])) +2 (1,(1,['А'])) +2 (1,(1,['ё','а','а'])) +2 (1,(1,['Я'])) +2 (2,(1,['я'])) + diff --git a/tests/queries/0_stateless/01543_collate_in_tuple.sql b/tests/queries/0_stateless/01543_collate_in_tuple.sql new file mode 100644 index 00000000000..17d9426cf45 --- /dev/null +++ b/tests/queries/0_stateless/01543_collate_in_tuple.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS collate_test1; +DROP TABLE IF EXISTS collate_test2; +DROP TABLE IF EXISTS collate_test3; + +CREATE TABLE collate_test1 (x UInt32, s Tuple(UInt32, String)) ENGINE=Memory(); +CREATE TABLE collate_test2 (x UInt32, s Tuple(UInt32, LowCardinality(Nullable(String)))) ENGINE=Memory(); +CREATE TABLE collate_test3 (x UInt32, s Tuple(UInt32, Tuple(UInt32, Array(String)))) ENGINE=Memory(); + +INSERT INTO collate_test1 VALUES (1, (1, 'Ё')), (1, (1, 'ё')), (1, (1, 'а')), (2, (2, 'А')), (2, (1, 'я')), (2, (2, 'Я')), (1, (2,'а')), (1, (3, 'я')); +INSERT INTO collate_test2 VALUES (1, (1, 'Ё')), (1, (1, 'ё')), (1, (1, 'а')), (2, (2, 'А')), (2, (1, 'я')), (2, (2, 'Я')), (1, (2, null)), (1, (3, 'я')), (1, (1, null)), (2, (2, null)); +INSERT INTO collate_test3 VALUES (1, (1, (1, ['Ё']))), (1, (2, (1, ['ё']))), (1, (1, (2, ['а']))), (2, (1, (1, ['А']))), (2, (2, (1, ['я']))), (2, (1, (1, ['Я']))), (1, (2, (1, ['ё','а']))), (1, (1, (2, ['ё', 'я']))), (2, (1, (1, ['ё', 'а', 'а']))); + +SELECT * FROM collate_test1 ORDER BY s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test1 ORDER BY x, s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test2 ORDER BY s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test2 ORDER BY x, s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test3 ORDER BY s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test3 ORDER BY x, s COLLATE 'ru'; +SELECT ''; + +DROP TABLE collate_test1; +DROP TABLE collate_test2; +DROP TABLE collate_test3; + diff --git a/tests/queries/0_stateless/01548_create_table_compound_column_format.reference b/tests/queries/0_stateless/01548_create_table_compound_column_format.reference new file mode 100644 index 00000000000..c6c4dcdfa4a --- /dev/null +++ b/tests/queries/0_stateless/01548_create_table_compound_column_format.reference @@ -0,0 +1,12 @@ +CREATE TABLE test +( + `a` Int64, + `b` NESTED(a Int64) +) +ENGINE = TinyLog +CREATE TABLE test +( + `a` Int64, + `b` TUPLE(a Int64) +) +ENGINE = TinyLog diff --git a/tests/queries/0_stateless/01548_create_table_compound_column_format.sh b/tests/queries/0_stateless/01548_create_table_compound_column_format.sh new file mode 100755 index 00000000000..6c9384e01c1 --- /dev/null +++ b/tests/queries/0_stateless/01548_create_table_compound_column_format.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +echo "CREATE TABLE test(a Int64, b NESTED(a Int64)) ENGINE=TinyLog" | $CLICKHOUSE_FORMAT + +echo "CREATE TABLE test(a Int64, b TUPLE(a Int64)) ENGINE=TinyLog" | $CLICKHOUSE_FORMAT \ No newline at end of file diff --git a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.reference b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.reference new file mode 100644 index 00000000000..fc10b4707a9 --- /dev/null +++ b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.reference @@ -0,0 +1,11 @@ +(Expression) +ExpressionTransform + (Expression) + ExpressionTransform + (Aggregating) + FinalizingSimpleTransform + AggregatingSortedTransform 3 → 1 + AggregatingInOrderTransform × 3 + (Expression) + ExpressionTransform × 3 + (ReadFromStorage) diff --git a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql new file mode 100644 index 00000000000..831a7282861 --- /dev/null +++ b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS data_01551; + +CREATE TABLE data_01551 +( + key UInt32 +) engine=AggregatingMergeTree() +PARTITION BY key%2 +ORDER BY (key, key/2) +SETTINGS index_granularity=10; + +INSERT INTO data_01551 SELECT number FROM numbers(100000); +SET max_threads=3; +SET merge_tree_min_rows_for_concurrent_read=10000; +SET optimize_aggregation_in_order=1; +SET read_in_order_two_level_merge_threshold=1; +EXPLAIN PIPELINE SELECT key FROM data_01551 GROUP BY key, key/2; diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference b/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference new file mode 100644 index 00000000000..bd0eac10816 --- /dev/null +++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference @@ -0,0 +1,4 @@ +masked +3,"default:*@127%2E0%2E0%2E1:9000,default:*@127%2E0%2E0%2E2:9000" +no masking +1,"default@localhost:9000" diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql new file mode 100644 index 00000000000..0143b8e46ed --- /dev/null +++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql @@ -0,0 +1,36 @@ +-- force data path with the user/pass in it +set use_compact_format_in_distributed_parts_names=0; +-- use async send even for localhost +set prefer_localhost_replica=0; + +drop table if exists dist_01555; +drop table if exists data_01555; +create table data_01555 (key Int) Engine=Null(); + +-- +-- masked +-- +SELECT 'masked'; +create table dist_01555 (key Int) Engine=Distributed(test_cluster_with_incorrect_pw, currentDatabase(), data_01555, key); + +insert into dist_01555 values (1)(2); +-- since test_cluster_with_incorrect_pw contains incorrect password ignore error +system flush distributed dist_01555; -- { serverError 516; } +select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1') from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV; + +drop table dist_01555; + +-- +-- no masking +-- +SELECT 'no masking'; +create table dist_01555 (key Int) Engine=Distributed(test_shard_localhost, currentDatabase(), data_01555, key); + +insert into dist_01555 values (1)(2); +-- since test_cluster_with_incorrect_pw contains incorrect password ignore error +system flush distributed dist_01555; +select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1') from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV; + +-- cleanup +drop table dist_01555; +drop table data_01555; diff --git a/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.reference b/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.reference new file mode 100644 index 00000000000..d5bdb816bf2 --- /dev/null +++ b/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.reference @@ -0,0 +1 @@ +Unknown data type family: CODEC diff --git a/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.sh b/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.sh new file mode 100755 index 00000000000..9904b6388d6 --- /dev/null +++ b/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query "CREATE TABLE t (c CODEC(NONE)) ENGINE = Memory" 2>&1 | grep -oF 'Unknown data type family: CODEC' | uniq diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index 900cc82b33f..4c4f7b29d66 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -155,8 +155,13 @@ 01509_dictionary_preallocate 01526_max_untracked_memory 01530_drop_database_atomic_sync +01532_collate_in_low_cardinality +01533_collate_in_nullable +01542_collate_in_array +01543_collate_in_tuple 01546_log_queries_min_query_duration_ms 01547_query_log_current_database 01548_query_log_query_execution_ms 01552_dict_fixedstring +01555_system_distribution_queue_mask 01557_max_parallel_replicas_no_sample.sql diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh index 9249fbc0411..0b5b0940cd7 100644 --- a/tests/queries/shell_config.sh +++ b/tests/queries/shell_config.sh @@ -7,6 +7,7 @@ export CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL [ -v CLICKHOUSE_PORT_TCP ] && CLICKHOUSE_CLIENT_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} " [ -v CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL ] && CLICKHOUSE_CLIENT_OPT0+=" --send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL} " [ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_CLIENT_OPT0+=" --database=${CLICKHOUSE_DATABASE} " +[ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_BENCHMARK_OPT0+=" --database=${CLICKHOUSE_DATABASE} " export CLICKHOUSE_BINARY=${CLICKHOUSE_BINARY:="clickhouse"} [ -x "$CLICKHOUSE_BINARY-client" ] && CLICKHOUSE_CLIENT_BINARY=${CLICKHOUSE_CLIENT_BINARY:=$CLICKHOUSE_BINARY-client} @@ -17,7 +18,7 @@ export CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:="$CLICKHOUSE_CLIENT_BINARY ${CLICK [ -x "${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_LOCAL=${CLICKHOUSE_LOCAL:="${CLICKHOUSE_BINARY} local"} export CLICKHOUSE_LOCAL=${CLICKHOUSE_LOCAL:="${CLICKHOUSE_BINARY}-local"} export CLICKHOUSE_OBFUSCATOR=${CLICKHOUSE_OBFUSCATOR:="${CLICKHOUSE_BINARY}-obfuscator"} -export CLICKHOUSE_BENCHMARK=${CLICKHOUSE_BENCHMARK:="${CLICKHOUSE_BINARY}-benchmark"} +export CLICKHOUSE_BENCHMARK=${CLICKHOUSE_BENCHMARK:="${CLICKHOUSE_BINARY}-benchmark ${CLICKHOUSE_BENCHMARK_OPT0:-}"} export CLICKHOUSE_CONFIG=${CLICKHOUSE_CONFIG:="/etc/clickhouse-server/config.xml"} export CLICKHOUSE_CONFIG_CLIENT=${CLICKHOUSE_CONFIG_CLIENT:="/etc/clickhouse-client/config.xml"} diff --git a/tests/testflows/rbac/regression.py b/tests/testflows/rbac/regression.py index c0307664061..a53de0178eb 100755 --- a/tests/testflows/rbac/regression.py +++ b/tests/testflows/rbac/regression.py @@ -19,6 +19,7 @@ issue_14674 = "https://github.com/ClickHouse/ClickHouse/issues/14674" issue_14810 = "https://github.com/ClickHouse/ClickHouse/issues/14810" issue_15165 = "https://github.com/ClickHouse/ClickHouse/issues/15165" issue_15980 = "https://github.com/ClickHouse/ClickHouse/issues/15980" +issue_16403 = "https://github.com/ClickHouse/ClickHouse/issues/16403" xfails = { "syntax/show create quota/I show create quota current": @@ -89,6 +90,12 @@ xfails = { [(Fail, ".inner table is not created as expected")], "views/materialized view/select from source table privilege granted directly or via role/select from implicit target table, privilege granted through a role": [(Fail, ".inner table is not created as expected")], + "privileges/alter move/:/:/:/:/move partition to implicit target table of a materialized view": + [(Fail, ".inner table is not created as expected")], + "privileges/alter move/:/:/:/:/user without ALTER MOVE PARTITION privilege/": + [(Fail, issue_16403)], + "privileges/alter move/:/:/:/:/user with revoked ALTER MOVE PARTITION privilege/": + [(Fail, issue_16403)], } xflags = { diff --git a/tests/testflows/rbac/tests/privileges/feature.py b/tests/testflows/rbac/tests/privileges/feature.py index 81af29a70e1..bc63824d322 100755 --- a/tests/testflows/rbac/tests/privileges/feature.py +++ b/tests/testflows/rbac/tests/privileges/feature.py @@ -7,7 +7,7 @@ from rbac.helper.common import * def feature(self): tasks = [] - pool = Pool(10) + pool = Pool(16) try: try: @@ -21,6 +21,12 @@ def feature(self): run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_constraint", "feature"), flags=TE), {}) run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_ttl", "feature"), flags=TE), {}) run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_settings", "feature"), flags=TE), {}) + run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_update", "feature"), flags=TE), {}) + run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_delete", "feature"), flags=TE), {}) + run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_freeze", "feature"), flags=TE), {}) + run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_fetch", "feature"), flags=TE), {}) + run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_move", "feature"), flags=TE), {}) + run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.grant_option", "feature"), flags=TE), {}) finally: join(tasks) finally: