Merge branch 'master' into actions-dag-f14

This commit is contained in:
Nikolai Kochetov 2020-11-09 14:57:48 +03:00
commit 6717c7a0af
276 changed files with 2557 additions and 1185 deletions

View File

@ -14,6 +14,11 @@ unset (_current_dir_name)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w")
if (SANITIZE STREQUAL "undefined")
# 3rd-party libraries usually not intended to work with UBSan.
add_compile_options(-fno-sanitize=undefined)
endif()
set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1)
add_subdirectory (boost-cmake)
@ -157,9 +162,6 @@ if(USE_INTERNAL_SNAPPY_LIBRARY)
add_subdirectory(snappy)
set (SNAPPY_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/snappy")
if(SANITIZE STREQUAL "undefined")
target_compile_options(${SNAPPY_LIBRARY} PRIVATE -fno-sanitize=undefined)
endif()
endif()
if (USE_INTERNAL_PARQUET_LIBRARY)

2
contrib/poco vendored

@ -1 +1 @@
Subproject commit 757d947235b307675cff964f29b19d388140a9eb
Subproject commit f49c6ab8d3aa71828bd1b411485c21722e8c9d82

View File

@ -240,6 +240,10 @@ TESTS_TO_SKIP=(
01354_order_by_tuple_collate_const
01355_ilike
01411_bayesian_ab_testing
01532_collate_in_low_cardinality
01533_collate_in_nullable
01542_collate_in_array
01543_collate_in_tuple
_orc_
arrow
avro

View File

@ -123,6 +123,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
- `--stacktrace` If specified, also print the stack trace if an exception occurs.
- `--config-file` The name of the configuration file.
- `--secure` If specified, will connect to server over secure connection.
- `--history_file` — Path to a file containing command history.
- `--param_<name>` — Value for a [query with parameters](#cli-queries-with-parameters).
### Configuration Files {#configuration_files}

View File

@ -36,6 +36,7 @@ toc_title: Adopters
| <a href="https://www.criteo.com/" class="favicon">Criteo</a> | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) |
| <a href="https://www.chinatelecomglobal.com/" class="favicon">Dataliance for China Telecom</a> | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) |
| <a href="https://db.com" class="favicon">Deutsche Bank</a> | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) |
| <a href="https://deeplay.io/eng/" class="favicon">Deeplay</a> | Gaming Analytics | — | — | — | [Job advertisement, 2020](https://career.habr.com/vacancies/1000062568) |
| <a href="https://www.diva-e.com" class="favicon">Diva-e</a> | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) |
| <a href="https://www.ecwid.com/" class="favicon">Ecwid</a> | E-commerce SaaS | Metrics, Logging | — | — | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf) |
| <a href="https://www.ebay.com/" class="favicon">eBay</a> | E-commerce | Logs, Metrics and Events | — | — | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/) |
@ -45,6 +46,7 @@ toc_title: Adopters
| <a href="https://fun.co/rp" class="favicon">FunCorp</a> | Games | | — | — | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) |
| <a href="https://geniee.co.jp" class="favicon">Geniee</a> | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) |
| <a href="https://www.huya.com/" class="favicon">HUYA</a> | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) |
| <a href="https://www.the-ica.com/" class="favicon">ICA</a> | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) |
| <a href="https://www.idealista.com" class="favicon">Idealista</a> | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) |
| <a href="https://www.infovista.com/" class="favicon">Infovista</a> | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) |
| <a href="https://www.innogames.com" class="favicon">InnoGames</a> | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) |
@ -68,6 +70,7 @@ toc_title: Adopters
| <a href="https://www.nuna.com/" class="favicon">Nuna Inc.</a> | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) |
| <a href="https://www.oneapm.com/" class="favicon">OneAPM</a> | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
| <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a> | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
| <a href="https://www.percona.com/" class="favicon">Percona</a> | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) |
| <a href="https://plausible.io/" class="favicon">Plausible</a> | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) |
| <a href="https://posthog.com/" class="favicon">PostHog</a> | Product Analytics | Main Product | — | — | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0) |
| <a href="https://postmates.com/" class="favicon">Postmates</a> | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) |

View File

@ -571,7 +571,7 @@ For more information, see the MergeTreeSettings.h header file.
Fine tuning for tables in the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
This setting has higher priority.
This setting has a higher priority.
For more information, see the MergeTreeSettings.h header file.

View File

@ -1765,6 +1765,23 @@ Default value: `0`.
- [Distributed Table Engine](../../engines/table-engines/special/distributed.md#distributed)
- [Managing Distributed Tables](../../sql-reference/statements/system.md#query-language-system-distributed)
## use_compact_format_in_distributed_parts_names {#use_compact_format_in_distributed_parts_names}
Uses compact format for storing blocks for async (`insert_distributed_sync`) INSERT into tables with `Distributed` engine.
Possible values:
- 0 — Uses `user[:password]@host:port#default_database` directory format.
- 1 — Uses `[shard{shard_index}[_replica{replica_index}]]` directory format.
Default value: `1`.
!!! note "Note"
- with `use_compact_format_in_distributed_parts_names=0` changes from cluster definition will not be applied for async INSERT.
- with `use_compact_format_in_distributed_parts_names=1` changing the order of the nodes in the cluster definition, will change the `shard_index`/`replica_index` so be aware.
## background_buffer_flush_schedule_pool_size {#background_buffer_flush_schedule_pool_size}
Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at the ClickHouse server start and cant be changed in a user session.

View File

@ -4,6 +4,6 @@ toc_priority: 140
# sumWithOverflow {#sumwithoverflowx}
Computes the sum of the numbers, using the same data type for the result as for the input parameters. If the sum exceeds the maximum value for this data type, the function returns an error.
Computes the sum of the numbers, using the same data type for the result as for the input parameters. If the sum exceeds the maximum value for this data type, it is calculated with overflow.
Only works for numbers.

View File

@ -0,0 +1,91 @@
---
toc_priority: 46
toc_title: Polygon Dictionaries With Grids
---
# Polygon dictionaries {#polygon-dictionaries}
Polygon dictionaries allow you to efficiently search for the polygon containing specified points.
For example: defining a city area by geographical coordinates.
Example configuration:
``` xml
<dictionary>
<structure>
<key>
<name>key</name>
<type>Array(Array(Array(Array(Float64))))</type>
</key>
<attribute>
<name>name</name>
<type>String</type>
<null_value></null_value>
</attribute>
<attribute>
<name>value</name>
<type>UInt64</type>
<null_value>0</null_value>
</attribute>
</structure>
<layout>
<polygon />
</layout>
</dictionary>
```
Tne corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md#create-dictionary-query):
``` sql
CREATE DICTIONARY polygon_dict_name (
key Array(Array(Array(Array(Float64)))),
name String,
value UInt64
)
PRIMARY KEY key
LAYOUT(POLYGON())
...
```
When configuring the polygon dictionary, the key must have one of two types:
- A simple polygon. It is an array of points.
- MultiPolygon. It is an array of polygons. Each polygon is a two-dimensional array of points. The first element of this array is the outer boundary of the polygon, and subsequent elements specify areas to be excluded from it.
Points can be specified as an array or a tuple of their coordinates. In the current implementation, only two-dimensional points are supported.
The user can [upload their own data](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) in all formats supported by ClickHouse.
There are 3 types of [in-memory storage](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) available:
- POLYGON_SIMPLE. This is a naive implementation, where a linear pass through all polygons is made for each query, and membership is checked for each one without using additional indexes.
- POLYGON_INDEX_EACH. A separate index is built for each polygon, which allows you to quickly check whether it belongs in most cases (optimized for geographical regions).
Also, a grid is superimposed on the area under consideration, which significantly narrows the number of polygons under consideration.
The grid is created by recursively dividing the cell into 16 equal parts and is configured with two parameters.
The division stops when the recursion depth reaches MAX_DEPTH or when the cell crosses no more than MIN_INTERSECTIONS polygons.
To respond to the query, there is a corresponding cell, and the index for the polygons stored in it is accessed alternately.
- POLYGON_INDEX_CELL. This placement also creates the grid described above. The same options are available. For each sheet cell, an index is built on all pieces of polygons that fall into it, which allows you to quickly respond to a request.
- POLYGON. Synonym to POLYGON_INDEX_CELL.
Dictionary queries are carried out using standard [functions](../../../sql-reference/functions/ext-dict-functions.md) for working with external dictionaries.
An important difference is that here the keys will be the points for which you want to find the polygon containing them.
Example of working with the dictionary defined above:
``` sql
CREATE TABLE points (
x Float64,
y Float64
)
...
SELECT tuple(x, y) AS key, dictGet(dict_name, 'name', key), dictGet(dict_name, 'value', key) FROM points ORDER BY x, y;
```
As a result of executing the last command for each point in the 'points' table, a minimum area polygon containing this point will be found, and the requested attributes will be output.

View File

@ -221,3 +221,85 @@ returns
│ 1970-03-12 │ 1970-01-08 │ original │
└────────────┴────────────┴──────────┘
```
## OFFSET FETCH Clause {#offset-fetch}
`OFFSET` and `FETCH` allow you to retrieve data by portions. They specify a row block which you want to get by a single query.
``` sql
OFFSET offset_row_count {ROW | ROWS}] [FETCH {FIRST | NEXT} fetch_row_count {ROW | ROWS} {ONLY | WITH TIES}]
```
The `offset_row_count` or `fetch_row_count` value can be a number or a literal constant. You can omit `fetch_row_count`; by default, it equals 1.
`OFFSET` specifies the number of rows to skip before starting to return rows from the query.
The `FETCH` specifies the maximum number of rows that can be in the result of a query.
The `ONLY` option is used to return rows that immediately follow the rows omitted by the `OFFSET`. In this case the `FETCH` is an alternative to the [LIMIT](../../../sql-reference/statements/select/limit.md) clause. For example, the following query
``` sql
SELECT * FROM test_fetch ORDER BY a OFFSET 1 ROW FETCH FIRST 3 ROWS ONLY;
```
is identical to the query
``` sql
SELECT * FROM test_fetch ORDER BY a LIMIT 3 OFFSET 1;
```
The `WITH TIES` option is used to return any additional rows that tie for the last place in the result set according to the `ORDER BY` clause. For example, if `fetch_row_count` is set to 5 but two additional rows match the values of the `ORDER BY` columns in the fifth row, the result set will contain seven rows.
!!! note "Note"
According to the standard, the `OFFSET` clause must come before the `FETCH` clause if both are present.
### Examples {#examples}
Input table:
``` text
┌─a─┬─b─┐
│ 1 │ 1 │
│ 2 │ 1 │
│ 3 │ 4 │
│ 1 │ 3 │
│ 5 │ 4 │
│ 0 │ 6 │
│ 5 │ 7 │
└───┴───┘
```
Usage of the `ONLY` option:
``` sql
SELECT * FROM test_fetch ORDER BY a OFFSET 3 ROW FETCH FIRST 3 ROWS ONLY;
```
Result:
``` text
┌─a─┬─b─┐
│ 2 │ 1 │
│ 3 │ 4 │
│ 5 │ 4 │
└───┴───┘
```
Usage of the `WITH TIES` option:
``` sql
SELECT * FROM test_fetch ORDER BY a OFFSET 3 ROW FETCH FIRST 3 ROWS WITH TIES;
```
Result:
``` text
┌─a─┬─b─┐
│ 2 │ 1 │
│ 3 │ 4 │
│ 5 │ 4 │
│ 5 │ 7 │
└───┴───┘
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/order-by/) <!--hide-->

View File

@ -64,6 +64,6 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10
Таблицы типа Buffer используются в тех случаях, когда от большого количества серверов поступает слишком много INSERT-ов в единицу времени, и нет возможности заранее самостоятельно буферизовать данные перед вставкой, в результате чего, INSERT-ы не успевают выполняться.
Заметим, что даже для таблиц типа Buffer не имеет смысла вставлять данные по одной строке, так как таким образом будет достигнута скорость всего лишь в несколько тысяч строк в секунду, тогда как при вставке более крупными блоками, достижимо более миллиона строк в секунду (смотрите раздел «Производительность»).
Заметим, что даже для таблиц типа Buffer не имеет смысла вставлять данные по одной строке, так как таким образом будет достигнута скорость всего лишь в несколько тысяч строк в секунду, тогда как при вставке более крупными блоками, достижимо более миллиона строк в секунду (смотрите раздел [«Производительность»](../../../introduction/performance/).
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/buffer/) <!--hide-->

View File

@ -555,6 +555,22 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
</merge_tree>
```
## replicated\_merge\_tree {#server_configuration_parameters-replicated_merge_tree}
Тонкая настройка таблиц в [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
Эта настройка имеет более высокий приоритет.
Подробнее смотрите в заголовочном файле MergeTreeSettings.h.
**Пример**
``` xml
<replicated_merge_tree>
<max_suspicious_broken_parts>5</max_suspicious_broken_parts>
</replicated_merge_tree>
```
## openSSL {#server_configuration_parameters-openssl}
Настройки клиента/сервера SSL.

View File

@ -1,4 +1,4 @@
# Cловари полигонов {#slovari-polygonov}
# Cловари полигонов {#polygon-dictionaries}
Словари полигонов позволяют эффективно искать полигон, в который попадают данные точки, среди множества полигонов.
Для примера: определение района города по географическим координатам.

View File

@ -214,3 +214,85 @@ ORDER BY
│ 1970-03-12 │ 1970-01-08 │ original │
└────────────┴────────────┴──────────┘
```
## Секция OFFSET FETCH {#offset-fetch}
`OFFSET` и `FETCH` позволяют извлекать данные по частям. Они указывают строки, которые вы хотите получить в результате запроса.
``` sql
OFFSET offset_row_count {ROW | ROWS}] [FETCH {FIRST | NEXT} fetch_row_count {ROW | ROWS} {ONLY | WITH TIES}]
```
`offset_row_count` или `fetch_row_count` может быть числом или литеральной константой. Если вы не используете `fetch_row_count`, то его значение равно 1.
`OFFSET` указывает количество строк, которые необходимо пропустить перед началом возврата строк из запроса.
`FETCH` указывает максимальное количество строк, которые могут быть получены в результате запроса.
Опция `ONLY` используется для возврата строк, которые следуют сразу же за строками, пропущенными секцией `OFFSET`. В этом случае `FETCH` — это альтернатива [LIMIT](../../../sql-reference/statements/select/limit.md). Например, следующий запрос
``` sql
SELECT * FROM test_fetch ORDER BY a OFFSET 1 ROW FETCH FIRST 3 ROWS ONLY;
```
идентичен запросу
``` sql
SELECT * FROM test_fetch ORDER BY a LIMIT 3 OFFSET 1;
```
Опция `WITH TIES` используется для возврата дополнительных строк, которые привязываются к последней в результате запроса. Например, если `fetch_row_count` имеет значение 5 и существуют еще 2 строки с такими же значениями столбцов, указанных в `ORDER BY`, что и у пятой строки результата, то финальный набор будет содержать 7 строк.
!!! note "Примечание"
Секция `OFFSET` должна находиться перед секцией `FETCH`, если обе присутствуют.
### Примеры {#examples}
Входная таблица:
``` text
┌─a─┬─b─┐
│ 1 │ 1 │
│ 2 │ 1 │
│ 3 │ 4 │
│ 1 │ 3 │
│ 5 │ 4 │
│ 0 │ 6 │
│ 5 │ 7 │
└───┴───┘
```
Использование опции `ONLY`:
``` sql
SELECT * FROM test_fetch ORDER BY a OFFSET 3 ROW FETCH FIRST 3 ROWS ONLY;
```
Результат:
``` text
┌─a─┬─b─┐
│ 2 │ 1 │
│ 3 │ 4 │
│ 5 │ 4 │
└───┴───┘
```
Использование опции `WITH TIES`:
``` sql
SELECT * FROM test_fetch ORDER BY a OFFSET 3 ROW FETCH FIRST 3 ROWS WITH TIES;
```
Результат:
``` text
┌─a─┬─b─┐
│ 2 │ 1 │
│ 3 │ 4 │
│ 5 │ 4 │
│ 5 │ 7 │
└───┴───┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/select/order-by/) <!--hide-->

View File

@ -227,9 +227,6 @@ else ()
install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-git-import DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-git-import)
endif ()
if(ENABLE_CLICKHOUSE_ODBC_BRIDGE)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-odbc-bridge)
endif()
install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)

View File

@ -75,6 +75,7 @@
#include <Common/InterruptListener.h>
#include <Functions/registerFunctions.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <Formats/registerFormats.h>
#include <Common/Config/configReadClient.h>
#include <Storages/ColumnsDescription.h>
#include <common/argsToConfig.h>
@ -463,6 +464,7 @@ private:
{
UseSSL use_ssl;
registerFormats();
registerFunctions();
registerAggregateFunctions();
@ -2329,6 +2331,7 @@ public:
("query-fuzzer-runs", po::value<int>()->default_value(0), "query fuzzer runs")
("opentelemetry-traceparent", po::value<std::string>(), "OpenTelemetry traceparent header as described by W3C Trace Context recommendation")
("opentelemetry-tracestate", po::value<std::string>(), "OpenTelemetry tracestate header as described by W3C Trace Context recommendation")
("history_file", po::value<std::string>(), "path to history file")
;
Settings cmd_settings;
@ -2485,6 +2488,8 @@ public:
config().setInt("suggestion_limit", options["suggestion_limit"].as<int>());
if (options.count("highlight"))
config().setBool("highlight", options["highlight"].as<bool>());
if (options.count("history_file"))
config().setString("history_file", options["history_file"].as<std::string>());
if ((query_fuzzer_runs = options["query-fuzzer-runs"].as<int>()))
{

View File

@ -1,6 +1,7 @@
#include "ClusterCopierApp.h"
#include <Common/StatusFile.h>
#include <Common/TerminalSize.h>
#include <Formats/registerFormats.h>
#include <unistd.h>
@ -122,6 +123,7 @@ void ClusterCopierApp::mainImpl()
registerStorages();
registerDictionaries();
registerDisks();
registerFormats();
static const std::string default_database = "_local";
DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared<DatabaseMemory>(default_database, *context));

View File

@ -33,6 +33,7 @@
#include <Storages/registerStorages.h>
#include <Dictionaries/registerDictionaries.h>
#include <Disks/registerDisks.h>
#include <Formats/registerFormats.h>
#include <boost/program_options/options_description.hpp>
#include <boost/program_options.hpp>
#include <common/argsToConfig.h>
@ -224,6 +225,7 @@ try
registerStorages();
registerDictionaries();
registerDisks();
registerFormats();
/// Maybe useless
if (config().has("macros"))

View File

@ -23,6 +23,7 @@
#include <Common/HashTable/HashMap.h>
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
#include <Formats/registerFormats.h>
#include <Core/Block.h>
#include <common/StringRef.h>
#include <common/DateLUT.h>
@ -1050,6 +1051,8 @@ try
using namespace DB;
namespace po = boost::program_options;
registerFormats();
po::options_description description = createOptionsDescription("Options", getTerminalWidth());
description.add_options()
("help", "produce help message")

View File

@ -10,19 +10,8 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES
PingHandler.cpp
SchemaAllowedHandler.cpp
validateODBCConnectionString.cpp
odbc-bridge.cpp
)
set (CLICKHOUSE_ODBC_BRIDGE_LINK
PRIVATE
clickhouse_parsers
clickhouse_aggregate_functions
daemon
dbms
Poco::Data
PUBLIC
Poco::Data::ODBC
)
clickhouse_program_add_library(odbc-bridge)
if (OS_LINUX)
# clickhouse-odbc-bridge is always a separate binary.
@ -30,10 +19,17 @@ if (OS_LINUX)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
endif ()
add_executable(clickhouse-odbc-bridge odbc-bridge.cpp)
set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)
add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES})
clickhouse_program_link_split_binary(odbc-bridge)
target_link_libraries(clickhouse-odbc-bridge PRIVATE
daemon
dbms
clickhouse_parsers
Poco::Data
Poco::Data::ODBC
)
set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)
if (USE_GDB_ADD_INDEX)
add_custom_command(TARGET clickhouse-odbc-bridge POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} ../clickhouse-odbc-bridge COMMENT "Adding .gdb-index to clickhouse-odbc-bridge" VERBATIM)

View File

@ -18,11 +18,13 @@
#include <Common/Exception.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/config.h>
#include <Formats/registerFormats.h>
#include <common/logger_useful.h>
#include <ext/scope_guard.h>
#include <ext/range.h>
#include <Common/SensitiveDataMasker.h>
namespace DB
{
namespace ErrorCodes
@ -160,6 +162,8 @@ int ODBCBridge::main(const std::vector<std::string> & /*args*/)
if (is_help)
return Application::EXIT_OK;
registerFormats();
LOG_INFO(log, "Starting up");
Poco::Net::ServerSocket socket;
auto address = socketBindListen(socket, hostname, port, log);

View File

@ -1,3 +1,2 @@
add_executable (validate-odbc-connection-string validate-odbc-connection-string.cpp)
clickhouse_target_link_split_lib(validate-odbc-connection-string odbc-bridge)
add_executable (validate-odbc-connection-string validate-odbc-connection-string.cpp ../validateODBCConnectionString.cpp)
target_link_libraries (validate-odbc-connection-string PRIVATE clickhouse_common_io)

View File

@ -51,6 +51,7 @@
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <Functions/registerFunctions.h>
#include <TableFunctions/registerTableFunctions.h>
#include <Formats/registerFormats.h>
#include <Storages/registerStorages.h>
#include <Dictionaries/registerDictionaries.h>
#include <Disks/registerDisks.h>
@ -266,6 +267,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
registerStorages();
registerDictionaries();
registerDisks();
registerFormats();
CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision());
CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger());

View File

@ -0,0 +1 @@
../../../tests/config/config.d/test_cluster_with_incorrect_pw.xml

View File

@ -198,6 +198,7 @@ namespace
/// Serialize the list of ATTACH queries to a string.
std::stringstream ss;
ss.exceptions(std::ios::failbit);
for (const ASTPtr & query : queries)
ss << *query << ";\n";
String file_contents = std::move(ss).str();
@ -353,6 +354,7 @@ String DiskAccessStorage::getStorageParamsJSON() const
if (readonly)
json.set("readonly", readonly.load());
std::ostringstream oss;
oss.exceptions(std::ios::failbit);
Poco::JSON::Stringifier::stringify(json, oss);
return oss.str();
}

View File

@ -151,6 +151,7 @@ String LDAPAccessStorage::getStorageParamsJSON() const
params_json.set("roles", default_role_names);
std::ostringstream oss;
oss.exceptions(std::ios::failbit);
Poco::JSON::Stringifier::stringify(params_json, oss);
return oss.str();

View File

@ -461,6 +461,7 @@ String UsersConfigAccessStorage::getStorageParamsJSON() const
if (!path.empty())
json.set("path", path);
std::ostringstream oss;
oss.exceptions(std::ios::failbit);
Poco::JSON::Stringifier::stringify(json, oss);
return oss.str();
}

View File

@ -245,6 +245,7 @@ public:
{
DB::writeIntBinary<size_t>(this->data(place).total_values, buf);
std::ostringstream rng_stream;
rng_stream.exceptions(std::ios::failbit);
rng_stream << this->data(place).rng;
DB::writeStringBinary(rng_stream.str(), buf);
}
@ -275,6 +276,7 @@ public:
std::string rng_string;
DB::readStringBinary(rng_string, buf);
std::istringstream rng_stream(rng_string);
rng_stream.exceptions(std::ios::failbit);
rng_stream >> this->data(place).rng;
}
@ -564,6 +566,7 @@ public:
{
DB::writeIntBinary<size_t>(data(place).total_values, buf);
std::ostringstream rng_stream;
rng_stream.exceptions(std::ios::failbit);
rng_stream << data(place).rng;
DB::writeStringBinary(rng_stream.str(), buf);
}
@ -598,6 +601,7 @@ public:
std::string rng_string;
DB::readStringBinary(rng_string, buf);
std::istringstream rng_stream(rng_string);
rng_stream.exceptions(std::ios::failbit);
rng_stream >> data(place).rng;
}

View File

@ -191,6 +191,7 @@ public:
std::string rng_string;
DB::readStringBinary(rng_string, buf);
std::istringstream rng_stream(rng_string);
rng_stream.exceptions(std::ios::failbit);
rng_stream >> rng;
for (size_t i = 0; i < samples.size(); ++i)
@ -205,6 +206,7 @@ public:
DB::writeIntBinary<size_t>(total_values, buf);
std::ostringstream rng_stream;
rng_stream.exceptions(std::ios::failbit);
rng_stream << rng;
DB::writeStringBinary(rng_stream.str(), buf);

View File

@ -223,6 +223,7 @@ std::string MultiplexedConnections::dumpAddressesUnlocked() const
{
bool is_first = true;
std::ostringstream os;
os.exceptions(std::ios::failbit);
for (const ReplicaState & state : replica_states)
{
const Connection * connection = state.connection;

View File

@ -324,8 +324,7 @@ void ColumnArray::popBack(size_t n)
offsets_data.resize_assume_reserved(offsets_data.size() - n);
}
int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator) const
{
const ColumnArray & rhs = assert_cast<const ColumnArray &>(rhs_);
@ -334,8 +333,15 @@ int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_dir
size_t rhs_size = rhs.sizeAt(m);
size_t min_size = std::min(lhs_size, rhs_size);
for (size_t i = 0; i < min_size; ++i)
if (int res = getData().compareAt(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint))
{
int res;
if (collator)
res = getData().compareAtWithCollation(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint, *collator);
else
res = getData().compareAt(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint);
if (res)
return res;
}
return lhs_size < rhs_size
? -1
@ -344,6 +350,16 @@ int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_dir
: 1);
}
int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
{
return compareAtImpl(n, m, rhs_, nan_direction_hint);
}
int ColumnArray::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const
{
return compareAtImpl(n, m, rhs_, nan_direction_hint, &collator);
}
void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
@ -352,27 +368,26 @@ void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num,
compare_results, direction, nan_direction_hint);
}
namespace
template <bool positive>
struct ColumnArray::Cmp
{
template <bool positive>
struct Less
const ColumnArray & parent;
int nan_direction_hint;
const Collator * collator;
Cmp(const ColumnArray & parent_, int nan_direction_hint_, const Collator * collator_=nullptr)
: parent(parent_), nan_direction_hint(nan_direction_hint_), collator(collator_) {}
int operator()(size_t lhs, size_t rhs) const
{
const ColumnArray & parent;
int nan_direction_hint;
Less(const ColumnArray & parent_, int nan_direction_hint_)
: parent(parent_), nan_direction_hint(nan_direction_hint_) {}
bool operator()(size_t lhs, size_t rhs) const
{
if (positive)
return parent.compareAt(lhs, rhs, parent, nan_direction_hint) < 0;
else
return parent.compareAt(lhs, rhs, parent, nan_direction_hint) > 0;
}
};
}
int res;
if (collator)
res = parent.compareAtWithCollation(lhs, rhs, parent, nan_direction_hint, *collator);
else
res = parent.compareAt(lhs, rhs, parent, nan_direction_hint);
return positive ? res : -res;
}
};
void ColumnArray::reserve(size_t n)
{
@ -753,7 +768,8 @@ ColumnPtr ColumnArray::indexImpl(const PaddedPODArray<T> & indexes, size_t limit
INSTANTIATE_INDEX_IMPL(ColumnArray)
void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
template <typename Comparator>
void ColumnArray::getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const
{
size_t s = size();
if (limit >= s)
@ -763,23 +779,16 @@ void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_h
for (size_t i = 0; i < s; ++i)
res[i] = i;
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
if (limit)
{
if (reverse)
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<false>(*this, nan_direction_hint));
else
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<true>(*this, nan_direction_hint));
}
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less);
else
{
if (reverse)
std::sort(res.begin(), res.end(), Less<false>(*this, nan_direction_hint));
else
std::sort(res.begin(), res.end(), Less<true>(*this, nan_direction_hint));
}
std::sort(res.begin(), res.end(), less);
}
void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const
template <typename Comparator>
void ColumnArray::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const
{
if (equal_range.empty())
return;
@ -792,20 +801,19 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
if (limit)
--number_of_ranges;
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
EqualRanges new_ranges;
for (size_t i = 0; i < number_of_ranges; ++i)
{
const auto & [first, last] = equal_range[i];
if (reverse)
std::sort(res.begin() + first, res.begin() + last, Less<false>(*this, nan_direction_hint));
else
std::sort(res.begin() + first, res.begin() + last, Less<true>(*this, nan_direction_hint));
std::sort(res.begin() + first, res.begin() + last, less);
auto new_first = first;
for (auto j = first + 1; j < last; ++j)
{
if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0)
if (cmp(res[new_first], res[j]) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
@ -827,14 +835,11 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
/// Since then we are working inside the interval.
if (reverse)
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less<false>(*this, nan_direction_hint));
else
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less<true>(*this, nan_direction_hint));
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less);
auto new_first = first;
for (auto j = first + 1; j < limit; ++j)
{
if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0)
if (cmp(res[new_first], res[j]) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
@ -845,7 +850,7 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
auto new_last = limit;
for (auto j = limit; j < last; ++j)
{
if (compareAt(res[new_first], res[j], *this, nan_direction_hint) == 0)
if (cmp(res[new_first], res[j]) == 0)
{
std::swap(res[new_last], res[j]);
++new_last;
@ -859,6 +864,39 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
equal_range = std::move(new_ranges);
}
void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
if (reverse)
getPermutationImpl(limit, res, Cmp<false>(*this, nan_direction_hint));
else
getPermutationImpl(limit, res, Cmp<true>(*this, nan_direction_hint));
}
void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const
{
if (reverse)
updatePermutationImpl(limit, res, equal_range, Cmp<false>(*this, nan_direction_hint));
else
updatePermutationImpl(limit, res, equal_range, Cmp<true>(*this, nan_direction_hint));
}
void ColumnArray::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
if (reverse)
getPermutationImpl(limit, res, Cmp<false>(*this, nan_direction_hint, &collator));
else
getPermutationImpl(limit, res, Cmp<true>(*this, nan_direction_hint, &collator));
}
void ColumnArray::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const
{
if (reverse)
updatePermutationImpl(limit, res, equal_range, Cmp<false>(*this, nan_direction_hint, &collator));
else
updatePermutationImpl(limit, res, equal_range, Cmp<true>(*this, nan_direction_hint, &collator));
}
ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const
{
if (replicate_offsets.empty())

View File

@ -77,8 +77,11 @@ public:
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const override;
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const override;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_range) const override;
void reserve(size_t n) override;
size_t byteSize() const override;
size_t allocatedBytes() const override;
@ -132,6 +135,8 @@ public:
return false;
}
bool isCollationSupported() const override { return getData().isCollationSupported(); }
private:
WrappedPtr data;
WrappedPtr offsets;
@ -169,6 +174,17 @@ private:
ColumnPtr filterTuple(const Filter & filt, ssize_t result_size_hint) const;
ColumnPtr filterNullable(const Filter & filt, ssize_t result_size_hint) const;
ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint) const;
int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator=nullptr) const;
template <typename Comparator>
void getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const;
template <typename Comparator>
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const;
template <bool positive>
struct Cmp;
};

View File

@ -248,6 +248,8 @@ public:
/// The constant value. It is valid even if the size of the column is 0.
template <typename T>
T getValue() const { return getField().safeGet<NearestFieldType<T>>(); }
bool isCollationSupported() const override { return data->isCollationSupported(); }
};
}

View File

@ -1,5 +1,6 @@
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnString.h>
#include <DataStreams/ColumnGathererStream.h>
#include <DataTypes/NumberTraits.h>
#include <Common/HashTable/HashMap.h>
@ -278,14 +279,26 @@ MutableColumnPtr ColumnLowCardinality::cloneResized(size_t size) const
return ColumnLowCardinality::create(IColumn::mutate(std::move(unique_ptr)), getIndexes().cloneResized(size));
}
int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator) const
{
const auto & low_cardinality_column = assert_cast<const ColumnLowCardinality &>(rhs);
size_t n_index = getIndexes().getUInt(n);
size_t m_index = low_cardinality_column.getIndexes().getUInt(m);
if (collator)
return getDictionary().getNestedColumn()->compareAtWithCollation(n_index, m_index, *low_cardinality_column.getDictionary().getNestedColumn(), nan_direction_hint, *collator);
return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint);
}
int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint);
}
int ColumnLowCardinality::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint, &collator);
}
void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
@ -295,14 +308,17 @@ void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num
compare_results, direction, nan_direction_hint);
}
void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
void ColumnLowCardinality::getPermutationImpl(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator) const
{
if (limit == 0)
limit = size();
size_t unique_limit = getDictionary().size();
Permutation unique_perm;
getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm);
if (collator)
getDictionary().getNestedColumn()->getPermutationWithCollation(*collator, reverse, unique_limit, nan_direction_hint, unique_perm);
else
getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm);
/// TODO: optimize with sse.
@ -330,7 +346,8 @@ void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_di
}
}
void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
template <typename Cmp>
void ColumnLowCardinality::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const
{
if (equal_ranges.empty())
return;
@ -345,20 +362,17 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
EqualRanges new_ranges;
SCOPE_EXIT({equal_ranges = std::move(new_ranges);});
auto less = [&comparator](size_t lhs, size_t rhs){ return comparator(lhs, rhs) < 0; };
for (size_t i = 0; i < number_of_ranges; ++i)
{
const auto& [first, last] = equal_ranges[i];
if (reverse)
std::sort(res.begin() + first, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; });
else
std::sort(res.begin() + first, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) < 0; });
std::sort(res.begin() + first, res.begin() + last, less);
auto new_first = first;
for (auto j = first + 1; j < last; ++j)
{
if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0)
if (comparator(res[new_first], res[j]) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
@ -379,17 +393,12 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
/// Since then we are working inside the interval.
if (reverse)
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; });
else
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) < 0; });
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less);
auto new_first = first;
for (auto j = first + 1; j < limit; ++j)
{
if (getDictionary().compareAt(getIndexes().getUInt(res[new_first]), getIndexes().getUInt(res[j]), getDictionary(), nan_direction_hint) != 0)
if (comparator(res[new_first],res[j]) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
@ -401,7 +410,7 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
auto new_last = limit;
for (auto j = limit; j < last; ++j)
{
if (getDictionary().compareAt(getIndexes().getUInt(res[new_first]), getIndexes().getUInt(res[j]), getDictionary(), nan_direction_hint) == 0)
if (comparator(res[new_first], res[j]) == 0)
{
std::swap(res[new_last], res[j]);
++new_last;
@ -412,6 +421,38 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
}
}
void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
getPermutationImpl(reverse, limit, nan_direction_hint, res);
}
void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
{
auto comparator = [this, nan_direction_hint, reverse](size_t lhs, size_t rhs)
{
int ret = getDictionary().compareAt(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), getDictionary(), nan_direction_hint);
return reverse ? -ret : ret;
};
updatePermutationImpl(limit, res, equal_ranges, comparator);
}
void ColumnLowCardinality::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
getPermutationImpl(reverse, limit, nan_direction_hint, res, &collator);
}
void ColumnLowCardinality::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const
{
auto comparator = [this, &collator, reverse, nan_direction_hint](size_t lhs, size_t rhs)
{
int ret = getDictionary().getNestedColumn()->compareAtWithCollation(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), *getDictionary().getNestedColumn(), nan_direction_hint, collator);
return reverse ? -ret : ret;
};
updatePermutationImpl(limit, res, equal_ranges, comparator);
}
std::vector<MutableColumnPtr> ColumnLowCardinality::scatter(ColumnIndex num_columns, const Selector & selector) const
{
auto columns = getIndexes().scatter(num_columns, selector);

View File

@ -125,10 +125,16 @@ public:
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator &) const override;
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_range) const override;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_range) const override;
ColumnPtr replicate(const Offsets & offsets) const override
{
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets));
@ -170,6 +176,7 @@ public:
size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); }
bool isNumeric() const override { return getDictionary().isNumeric(); }
bool lowCardinality() const override { return true; }
bool isCollationSupported() const override { return getDictionary().getNestedColumn()->isCollationSupported(); }
/**
* Checks if the dictionary column is Nullable(T).
@ -309,6 +316,13 @@ private:
void compactInplace();
void compactIfSharedDictionary();
int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const;
void getPermutationImpl(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator = nullptr) const;
template <typename Cmp>
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const;
};

View File

@ -6,6 +6,7 @@
#include <Common/WeakHash.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <DataStreams/ColumnGathererStream.h>
@ -223,7 +224,7 @@ ColumnPtr ColumnNullable::index(const IColumn & indexes, size_t limit) const
return ColumnNullable::create(indexed_data, indexed_null_map);
}
int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator * collator) const
{
/// NULL values share the properties of NaN values.
/// Here the last parameter of compareAt is called null_direction_hint
@ -245,9 +246,22 @@ int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null
}
const IColumn & nested_rhs = nullable_rhs.getNestedColumn();
if (collator)
return getNestedColumn().compareAtWithCollation(n, m, nested_rhs, null_direction_hint, *collator);
return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint);
}
int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
{
return compareAtImpl(n, m, rhs_, null_direction_hint);
}
int ColumnNullable::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator & collator) const
{
return compareAtImpl(n, m, rhs_, null_direction_hint, &collator);
}
void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
@ -256,10 +270,14 @@ void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num,
compare_results, direction, nan_direction_hint);
}
void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const
void ColumnNullable::getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator) const
{
/// Cannot pass limit because of unknown amount of NULLs.
getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res);
if (collator)
getNestedColumn().getPermutationWithCollation(*collator, reverse, 0, null_direction_hint, res);
else
getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res);
if ((null_direction_hint > 0) != reverse)
{
@ -329,7 +347,7 @@ void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_directi
}
}
void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
void ColumnNullable::updatePermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator) const
{
if (equal_ranges.empty())
return;
@ -432,12 +450,35 @@ void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_dire
}
}
getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges);
if (collator)
getNestedColumn().updatePermutationWithCollation(*collator, reverse, limit, null_direction_hint, res, new_ranges);
else
getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges);
equal_ranges = std::move(new_ranges);
std::move(null_ranges.begin(), null_ranges.end(), std::back_inserter(equal_ranges));
}
void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const
{
getPermutationImpl(reverse, limit, null_direction_hint, res);
}
void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
{
updatePermutationImpl(reverse, limit, null_direction_hint, res, equal_ranges);
}
void ColumnNullable::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const
{
getPermutationImpl(reverse, limit, null_direction_hint, res, &collator);
}
void ColumnNullable::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const
{
updatePermutationImpl(reverse, limit, null_direction_hint, res, equal_range, &collator);
}
void ColumnNullable::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);

View File

@ -6,6 +6,7 @@
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
class Collator;
namespace DB
{
@ -92,8 +93,12 @@ public:
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int null_direction_hint, const Collator &) const override;
void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override;
void updatePermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const override;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override;
void updatePermutationWithCollation(
const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_range) const override;
void reserve(size_t n) override;
size_t byteSize() const override;
size_t allocatedBytes() const override;
@ -129,6 +134,7 @@ public:
bool valuesHaveFixedSize() const override { return nested_column->valuesHaveFixedSize(); }
size_t sizeOfValueIfFixed() const override { return null_map->sizeOfValueIfFixed() + nested_column->sizeOfValueIfFixed(); }
bool onlyNull() const override { return nested_column->isDummy(); }
bool isCollationSupported() const override { return nested_column->isCollationSupported(); }
/// Return the column that represents values.
@ -164,6 +170,13 @@ private:
template <bool negative>
void applyNullMapImpl(const ColumnUInt8 & map);
int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator * collator=nullptr) const;
void getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator = nullptr) const;
void updatePermutationImpl(
bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator = nullptr) const;
};
ColumnPtr makeNullable(const ColumnPtr & column);

View File

@ -285,21 +285,22 @@ void ColumnString::compareColumn(
}
template <bool positive>
struct ColumnString::less
struct ColumnString::Cmp
{
const ColumnString & parent;
explicit less(const ColumnString & parent_) : parent(parent_) {}
bool operator()(size_t lhs, size_t rhs) const
explicit Cmp(const ColumnString & parent_) : parent(parent_) {}
int operator()(size_t lhs, size_t rhs) const
{
int res = memcmpSmallAllowOverflow15(
parent.chars.data() + parent.offsetAt(lhs), parent.sizeAt(lhs) - 1,
parent.chars.data() + parent.offsetAt(rhs), parent.sizeAt(rhs) - 1);
return positive ? (res < 0) : (res > 0);
return positive ? res : -res;
}
};
void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const
template <typename Comparator>
void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const
{
size_t s = offsets.size();
res.resize(s);
@ -309,23 +310,16 @@ void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_directio
if (limit >= s)
limit = 0;
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
if (limit)
{
if (reverse)
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less<false>(*this));
else
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less<true>(*this));
}
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less);
else
{
if (reverse)
std::sort(res.begin(), res.end(), less<false>(*this));
else
std::sort(res.begin(), res.end(), less<true>(*this));
}
std::sort(res.begin(), res.end(), less);
}
void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const
template <typename Comparator>
void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Comparator cmp) const
{
if (equal_ranges.empty())
return;
@ -340,21 +334,17 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
if (limit)
--number_of_ranges;
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
for (size_t i = 0; i < number_of_ranges; ++i)
{
const auto & [first, last] = equal_ranges[i];
if (reverse)
std::sort(res.begin() + first, res.begin() + last, less<false>(*this));
else
std::sort(res.begin() + first, res.begin() + last, less<true>(*this));
std::sort(res.begin() + first, res.begin() + last, less);
size_t new_first = first;
for (size_t j = first + 1; j < last; ++j)
{
if (memcmpSmallAllowOverflow15(
chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1,
chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) != 0)
if (cmp(res[j], res[new_first]) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
@ -375,17 +365,12 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
/// Since then we are working inside the interval.
if (reverse)
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<false>(*this));
else
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<true>(*this));
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less);
size_t new_first = first;
for (size_t j = first + 1; j < limit; ++j)
{
if (memcmpSmallAllowOverflow15(
chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1,
chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) != 0)
if (cmp(res[j], res[new_first]) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
@ -395,9 +380,7 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
size_t new_last = limit;
for (size_t j = limit; j < last; ++j)
{
if (memcmpSmallAllowOverflow15(
chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1,
chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) == 0)
if (cmp(res[j], res[new_first]) == 0)
{
std::swap(res[j], res[new_last]);
++new_last;
@ -408,6 +391,56 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
}
}
void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const
{
if (reverse)
getPermutationImpl(limit, res, Cmp<false>(*this));
else
getPermutationImpl(limit, res, Cmp<true>(*this));
}
void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const
{
if (reverse)
updatePermutationImpl(limit, res, equal_ranges, Cmp<false>(*this));
else
updatePermutationImpl(limit, res, equal_ranges, Cmp<true>(*this));
}
template <bool positive>
struct ColumnString::CmpWithCollation
{
const ColumnString & parent;
const Collator & collator;
CmpWithCollation(const ColumnString & parent_, const Collator & collator_) : parent(parent_), collator(collator_) {}
int operator()(size_t lhs, size_t rhs) const
{
int res = collator.compare(
reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(lhs)]), parent.sizeAt(lhs),
reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(rhs)]), parent.sizeAt(rhs));
return positive ? res : -res;
}
};
void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res) const
{
if (reverse)
getPermutationImpl(limit, res, CmpWithCollation<false>(*this, collator));
else
getPermutationImpl(limit, res, CmpWithCollation<true>(*this, collator));
}
void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const
{
if (reverse)
updatePermutationImpl(limit, res, equal_ranges, CmpWithCollation<false>(*this, collator));
else
updatePermutationImpl(limit, res, equal_ranges, CmpWithCollation<true>(*this, collator));
}
ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const
{
size_t col_size = size();
@ -476,13 +509,13 @@ void ColumnString::getExtremes(Field & min, Field & max) const
size_t min_idx = 0;
size_t max_idx = 0;
less<true> less_op(*this);
Cmp<true> cmp_op(*this);
for (size_t i = 1; i < col_size; ++i)
{
if (less_op(i, min_idx))
if (cmp_op(i, min_idx) < 0)
min_idx = i;
else if (less_op(max_idx, i))
else if (cmp_op(max_idx, i) < 0)
max_idx = i;
}
@ -491,7 +524,7 @@ void ColumnString::getExtremes(Field & min, Field & max) const
}
int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const
int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const
{
const ColumnString & rhs = assert_cast<const ColumnString &>(rhs_);
@ -500,134 +533,6 @@ int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs
reinterpret_cast<const char *>(&rhs.chars[rhs.offsetAt(m)]), rhs.sizeAt(m));
}
template <bool positive>
struct ColumnString::lessWithCollation
{
const ColumnString & parent;
const Collator & collator;
lessWithCollation(const ColumnString & parent_, const Collator & collator_) : parent(parent_), collator(collator_) {}
bool operator()(size_t lhs, size_t rhs) const
{
int res = collator.compare(
reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(lhs)]), parent.sizeAt(lhs),
reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(rhs)]), parent.sizeAt(rhs));
return positive ? (res < 0) : (res > 0);
}
};
void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const
{
size_t s = offsets.size();
res.resize(s);
for (size_t i = 0; i < s; ++i)
res[i] = i;
if (limit >= s)
limit = 0;
if (limit)
{
if (reverse)
std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation<false>(*this, collator));
else
std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation<true>(*this, collator));
}
else
{
if (reverse)
std::sort(res.begin(), res.end(), lessWithCollation<false>(*this, collator));
else
std::sort(res.begin(), res.end(), lessWithCollation<true>(*this, collator));
}
}
void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const
{
if (equal_ranges.empty())
return;
if (limit >= size() || limit >= equal_ranges.back().second)
limit = 0;
size_t number_of_ranges = equal_ranges.size();
if (limit)
--number_of_ranges;
EqualRanges new_ranges;
SCOPE_EXIT({equal_ranges = std::move(new_ranges);});
for (size_t i = 0; i < number_of_ranges; ++i)
{
const auto& [first, last] = equal_ranges[i];
if (reverse)
std::sort(res.begin() + first, res.begin() + last, lessWithCollation<false>(*this, collator));
else
std::sort(res.begin() + first, res.begin() + last, lessWithCollation<true>(*this, collator));
auto new_first = first;
for (auto j = first + 1; j < last; ++j)
{
if (collator.compare(
reinterpret_cast<const char *>(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]),
reinterpret_cast<const char *>(&chars[offsetAt(res[j])]), sizeAt(res[j])) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
new_first = j;
}
}
if (last - new_first > 1)
new_ranges.emplace_back(new_first, last);
}
if (limit)
{
const auto & [first, last] = equal_ranges.back();
if (limit < first || limit > last)
return;
/// Since then we are working inside the interval.
if (reverse)
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation<false>(*this, collator));
else
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation<true>(*this, collator));
auto new_first = first;
for (auto j = first + 1; j < limit; ++j)
{
if (collator.compare(
reinterpret_cast<const char *>(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]),
reinterpret_cast<const char *>(&chars[offsetAt(res[j])]), sizeAt(res[j])) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
new_first = j;
}
}
auto new_last = limit;
for (auto j = limit; j < last; ++j)
{
if (collator.compare(
reinterpret_cast<const char *>(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]),
reinterpret_cast<const char *>(&chars[offsetAt(res[j])]), sizeAt(res[j])) == 0)
{
std::swap(res[new_last], res[j]);
++new_last;
}
}
if (new_last - new_first > 1)
new_ranges.emplace_back(new_first, new_last);
}
}
void ColumnString::protect()
{
getChars().protect();

View File

@ -43,14 +43,20 @@ private:
size_t ALWAYS_INLINE sizeAt(ssize_t i) const { return offsets[i] - offsets[i - 1]; }
template <bool positive>
struct less;
struct Cmp;
template <bool positive>
struct lessWithCollation;
struct CmpWithCollation;
ColumnString() = default;
ColumnString(const ColumnString & src);
template <typename Comparator>
void getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const;
template <typename Comparator>
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Comparator cmp) const;
public:
const char * getFamilyName() const override { return "String"; }
TypeIndex getDataType() const override { return TypeIndex::String; }
@ -229,16 +235,16 @@ public:
int direction, int nan_direction_hint) const override;
/// Variant of compareAt for string comparison with respect of collation.
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const override;
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override;
void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override;
/// Sorting with respect of collation.
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res) const override;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges& equal_range) const;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override;
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
@ -270,6 +276,8 @@ public:
// Throws an exception if offsets/chars are messed up
void validate() const;
bool isCollationSupported() const override { return true; }
};

View File

@ -275,16 +275,27 @@ MutableColumns ColumnTuple::scatter(ColumnIndex num_columns, const Selector & se
return res;
}
int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator) const
{
const size_t tuple_size = columns.size();
for (size_t i = 0; i < tuple_size; ++i)
if (int res = columns[i]->compareAt(n, m, *assert_cast<const ColumnTuple &>(rhs).columns[i], nan_direction_hint))
{
int res;
if (collator && columns[i]->isCollationSupported())
res = columns[i]->compareAtWithCollation(n, m, *assert_cast<const ColumnTuple &>(rhs).columns[i], nan_direction_hint, *collator);
else
res = columns[i]->compareAt(n, m, *assert_cast<const ColumnTuple &>(rhs).columns[i], nan_direction_hint);
if (res)
return res;
}
return 0;
}
int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint);
}
void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
@ -293,14 +304,20 @@ void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num,
compare_results, direction, nan_direction_hint);
}
int ColumnTuple::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint, &collator);
}
template <bool positive>
struct ColumnTuple::Less
{
TupleColumns columns;
int nan_direction_hint;
const Collator * collator;
Less(const TupleColumns & columns_, int nan_direction_hint_)
: columns(columns_), nan_direction_hint(nan_direction_hint_)
Less(const TupleColumns & columns_, int nan_direction_hint_, const Collator * collator_=nullptr)
: columns(columns_), nan_direction_hint(nan_direction_hint_), collator(collator_)
{
}
@ -308,7 +325,11 @@ struct ColumnTuple::Less
{
for (const auto & column : columns)
{
int res = column->compareAt(a, b, *column, nan_direction_hint);
int res;
if (collator && column->isCollationSupported())
res = column->compareAtWithCollation(a, b, *column, nan_direction_hint, *collator);
else
res = column->compareAt(a, b, *column, nan_direction_hint);
if (res < 0)
return positive;
else if (res > 0)
@ -318,7 +339,8 @@ struct ColumnTuple::Less
}
};
void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
template <typename LessOperator>
void ColumnTuple::getPermutationImpl(size_t limit, Permutation & res, LessOperator less) const
{
size_t rows = size();
res.resize(rows);
@ -330,28 +352,25 @@ void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_h
if (limit)
{
if (reverse)
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<false>(columns, nan_direction_hint));
else
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<true>(columns, nan_direction_hint));
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less);
}
else
{
if (reverse)
std::sort(res.begin(), res.end(), Less<false>(columns, nan_direction_hint));
else
std::sort(res.begin(), res.end(), Less<true>(columns, nan_direction_hint));
std::sort(res.begin(), res.end(), less);
}
}
void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
void ColumnTuple::updatePermutationImpl(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges, const Collator * collator) const
{
if (equal_ranges.empty())
return;
for (const auto & column : columns)
{
column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges);
if (collator && column->isCollationSupported())
column->updatePermutationWithCollation(*collator, reverse, limit, nan_direction_hint, res, equal_ranges);
else
column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges);
while (limit && !equal_ranges.empty() && limit <= equal_ranges.back().first)
equal_ranges.pop_back();
@ -361,6 +380,32 @@ void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_directio
}
}
void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
if (reverse)
getPermutationImpl(limit, res, Less<false>(columns, nan_direction_hint));
else
getPermutationImpl(limit, res, Less<true>(columns, nan_direction_hint));
}
void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
{
updatePermutationImpl(reverse, limit, nan_direction_hint, res, equal_ranges);
}
void ColumnTuple::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
if (reverse)
getPermutationImpl(limit, res, Less<false>(columns, nan_direction_hint, &collator));
else
getPermutationImpl(limit, res, Less<true>(columns, nan_direction_hint, &collator));
}
void ColumnTuple::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const
{
updatePermutationImpl(reverse, limit, nan_direction_hint, res, equal_ranges, &collator);
}
void ColumnTuple::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
@ -433,5 +478,15 @@ bool ColumnTuple::structureEquals(const IColumn & rhs) const
return false;
}
bool ColumnTuple::isCollationSupported() const
{
for (const auto& column : columns)
{
if (column->isCollationSupported())
return true;
}
return false;
}
}

View File

@ -75,15 +75,19 @@ public:
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override;
void getExtremes(Field & min, Field & max) const override;
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const override;
void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override;
void reserve(size_t n) override;
size_t byteSize() const override;
size_t allocatedBytes() const override;
void protect() override;
void forEachSubcolumn(ColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;
bool isCollationSupported() const override;
size_t tupleSize() const { return columns.size(); }
@ -94,6 +98,15 @@ public:
Columns getColumnsCopy() const { return {columns.begin(), columns.end()}; }
const ColumnPtr & getColumnPtr(size_t idx) const { return columns[idx]; }
private:
int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const;
template <typename LessOperator>
void getPermutationImpl(size_t limit, Permutation & res, LessOperator less) const;
void updatePermutationImpl(
bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges, const Collator * collator=nullptr) const;
};

View File

@ -9,7 +9,7 @@
class SipHash;
class Collator;
namespace DB
{
@ -18,6 +18,7 @@ namespace ErrorCodes
{
extern const int CANNOT_GET_SIZE_OF_FIELD;
extern const int NOT_IMPLEMENTED;
extern const int BAD_COLLATION;
}
class Arena;
@ -250,6 +251,12 @@ public:
*/
virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0;
/// Equivalent to compareAt, but collator is used to compare values.
virtual int compareAtWithCollation(size_t, size_t, const IColumn &, int, const Collator &) const
{
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing it.", ErrorCodes::BAD_COLLATION);
}
/// Compare the whole column with single value from rhs column.
/// If row_indexes is nullptr, it's ignored. Otherwise, it is a set of rows to compare.
/// compare_results[i] will be equal to compareAt(row_indexes[i], rhs_row_num, rhs, nan_direction_hint) * direction
@ -277,6 +284,18 @@ public:
*/
virtual void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const = 0;
/** Equivalent to getPermutation and updatePermutation but collator is used to compare values.
* Supported for String, LowCardinality(String), Nullable(String) and for Array and Tuple, containing them.
*/
virtual void getPermutationWithCollation(const Collator &, bool, size_t, int, Permutation &) const
{
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);
}
virtual void updatePermutationWithCollation(const Collator &, bool, size_t, int, Permutation &, EqualRanges&) const
{
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);
}
/** Copies each element according offsets parameter.
* (i-th element should be copied offsets[i] - offsets[i - 1] times.)
* It is necessary in ARRAY JOIN operation.
@ -402,6 +421,8 @@ public:
virtual bool lowCardinality() const { return false; }
virtual bool isCollationSupported() const { return false; }
virtual ~IColumn() = default;
IColumn() = default;
IColumn(const IColumn &) = default;

View File

@ -71,7 +71,8 @@ void checkColumn(
std::unordered_map<UInt32, T> map;
size_t num_collisions = 0;
std::stringstream collitions_str;
std::stringstream collisions_str;
collisions_str.exceptions(std::ios::failbit);
for (size_t i = 0; i < eq_class.size(); ++i)
{
@ -86,14 +87,14 @@ void checkColumn(
if (num_collisions <= max_collisions_to_print)
{
collitions_str << "Collision:\n";
collitions_str << print_for_row(it->second) << '\n';
collitions_str << print_for_row(i) << std::endl;
collisions_str << "Collision:\n";
collisions_str << print_for_row(it->second) << '\n';
collisions_str << print_for_row(i) << std::endl;
}
if (num_collisions > allowed_collisions)
{
std::cerr << collitions_str.rdbuf();
std::cerr << collisions_str.rdbuf();
break;
}
}

View File

@ -538,6 +538,7 @@ XMLDocumentPtr ConfigProcessor::processConfig(
*has_zk_includes = !contributing_zk_paths.empty();
std::stringstream comment;
comment.exceptions(std::ios::failbit);
comment << " This file was generated automatically.\n";
comment << " Do not edit it: it is likely to be discarded and generated again before it's read next time.\n";
comment << " Files used to generate this file:";

View File

@ -246,6 +246,7 @@ static std::string getExtraExceptionInfo(const std::exception & e)
std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded_stacktrace /*= false*/, bool with_extra_info /*= true*/)
{
std::stringstream stream;
stream.exceptions(std::ios::failbit);
try
{
@ -365,6 +366,7 @@ void tryLogException(std::exception_ptr e, Poco::Logger * logger, const std::str
std::string getExceptionMessage(const Exception & e, bool with_stacktrace, bool check_embedded_stacktrace)
{
std::stringstream stream;
stream.exceptions(std::ios::failbit);
try
{

View File

@ -134,6 +134,7 @@ void MemoryTracker::alloc(Int64 size)
ProfileEvents::increment(ProfileEvents::QueryMemoryLimitExceeded);
std::stringstream message;
message.exceptions(std::ios::failbit);
message << "Memory tracker";
if (const auto * description = description_ptr.load(std::memory_order_relaxed))
message << " " << description;
@ -166,6 +167,7 @@ void MemoryTracker::alloc(Int64 size)
ProfileEvents::increment(ProfileEvents::QueryMemoryLimitExceeded);
std::stringstream message;
message.exceptions(std::ios::failbit);
message << "Memory limit";
if (const auto * description = description_ptr.load(std::memory_order_relaxed))
message << " " << description;

View File

@ -74,6 +74,7 @@ ShellCommand::~ShellCommand()
void ShellCommand::logCommand(const char * filename, char * const argv[])
{
std::stringstream args;
args.exceptions(std::ios::failbit);
for (int i = 0; argv != nullptr && argv[i] != nullptr; ++i)
{
if (i > 0)

View File

@ -1,76 +1,44 @@
#pragma once
#include <Common/Arena.h>
#include <ext/range.h>
#include <ext/size.h>
#include <ext/bit_cast.h>
#include <cstdlib>
#include <memory>
#include <common/unaligned.h>
namespace DB
{
/** Can allocate memory objects of fixed size with deletion support.
* For small `object_size`s allocated no less than getMinAllocationSize() bytes. */
* For small `object_size`s allocated no less than pointer size.
*/
class SmallObjectPool
{
private:
struct Block { Block * next; };
static constexpr auto getMinAllocationSize() { return sizeof(Block); }
const size_t object_size;
Arena pool;
Block * free_list{};
char * free_list = nullptr;
public:
SmallObjectPool(
const size_t object_size_, const size_t initial_size = 4096, const size_t growth_factor = 2,
const size_t linear_growth_threshold = 128 * 1024 * 1024)
: object_size{std::max(object_size_, getMinAllocationSize())},
pool{initial_size, growth_factor, linear_growth_threshold}
SmallObjectPool(size_t object_size_)
: object_size{std::max(object_size_, sizeof(char *))}
{
if (pool.size() < object_size)
return;
const auto num_objects = pool.size() / object_size;
auto head = free_list = ext::bit_cast<Block *>(pool.alloc(num_objects * object_size));
for (const auto i : ext::range(0, num_objects - 1))
{
(void) i;
head->next = ext::bit_cast<Block *>(ext::bit_cast<char *>(head) + object_size);
head = head->next;
}
head->next = nullptr;
}
char * alloc()
{
if (free_list)
{
const auto res = reinterpret_cast<char *>(free_list);
free_list = free_list->next;
char * res = free_list;
free_list = unalignedLoad<char *>(free_list);
return res;
}
return pool.alloc(object_size);
}
void free(const void * ptr)
void free(char * ptr)
{
union
{
const void * p_v;
Block * block;
};
p_v = ptr;
block->next = free_list;
free_list = block;
unalignedStore<char *>(ptr, free_list);
free_list = ptr;
}
/// The size of the allocated pool in bytes
@ -81,5 +49,4 @@ public:
};
}

View File

@ -24,6 +24,7 @@
std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext_t & context)
{
std::stringstream error;
error.exceptions(std::ios::failbit);
switch (sig)
{
case SIGSEGV:
@ -319,6 +320,7 @@ static void toStringEveryLineImpl(
std::unordered_map<std::string, DB::Dwarf> dwarfs;
std::stringstream out;
out.exceptions(std::ios::failbit);
for (size_t i = offset; i < size; ++i)
{
@ -358,6 +360,7 @@ static void toStringEveryLineImpl(
}
#else
std::stringstream out;
out.exceptions(std::ios::failbit);
for (size_t i = offset; i < size; ++i)
{
@ -373,6 +376,7 @@ static void toStringEveryLineImpl(
static std::string toStringImpl(const StackTrace::FramePointers & frame_pointers, size_t offset, size_t size)
{
std::stringstream out;
out.exceptions(std::ios::failbit);
toStringEveryLineImpl(frame_pointers, offset, size, [&](const std::string & str) { out << str << '\n'; });
return out.str();
}

View File

@ -154,6 +154,8 @@ std::pair<bool, std::string> StudentTTest::compareAndReport(size_t confidence_le
double mean_confidence_interval = table_value * t_statistic;
std::stringstream ss;
ss.exceptions(std::ios::failbit);
if (mean_difference > mean_confidence_interval && (mean_difference - mean_confidence_interval > 0.0001)) /// difference must be more than 0.0001, to take into account connection latency.
{
ss << "Difference at " << confidence_level[confidence_level_index] << "% confidence : ";

View File

@ -216,7 +216,7 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
if (!jobs.empty())
{
job = jobs.top().job;
job = std::move(jobs.top().job);
jobs.pop();
}
else

View File

@ -398,8 +398,7 @@ bool PerfEventsCounters::processThreadLocalChanges(const std::string & needed_ev
return true;
}
// Parse comma-separated list of event names. Empty means all available
// events.
// Parse comma-separated list of event names. Empty means all available events.
std::vector<size_t> PerfEventsCounters::eventIndicesFromString(const std::string & events_list)
{
std::vector<size_t> result;
@ -418,8 +417,7 @@ std::vector<size_t> PerfEventsCounters::eventIndicesFromString(const std::string
std::string event_name;
while (std::getline(iss, event_name, ','))
{
// Allow spaces at the beginning of the token, so that you can write
// 'a, b'.
// Allow spaces at the beginning of the token, so that you can write 'a, b'.
event_name.erase(0, event_name.find_first_not_of(' '));
auto entry = event_name_to_index.find(event_name);

View File

@ -80,6 +80,7 @@ void ThreadStatus::assertState(const std::initializer_list<int> & permitted_stat
}
std::stringstream ss;
ss.exceptions(std::ios::failbit);
ss << "Unexpected thread state " << getCurrentState();
if (description)
ss << ": " << description;

View File

@ -49,6 +49,7 @@ struct UInt128
String toHexString() const
{
std::ostringstream os;
os.exceptions(std::ios::failbit);
os << std::setw(16) << std::setfill('0') << std::hex << high << low;
return String(os.str());
}

View File

@ -308,6 +308,7 @@ struct ODBCBridgeMixin
path.setFileName("clickhouse-odbc-bridge");
std::stringstream command;
command.exceptions(std::ios::failbit);
#if !CLICKHOUSE_SPLIT_BINARY
cmd_args.push_back("odbc-bridge");

View File

@ -219,6 +219,7 @@ std::pair<ResponsePtr, Undo> TestKeeperCreateRequest::process(TestKeeper::Contai
++it->second.seq_num;
std::stringstream seq_num_str;
seq_num_str.exceptions(std::ios::failbit);
seq_num_str << std::setw(10) << std::setfill('0') << seq_num;
path_created += seq_num_str.str();

View File

@ -81,6 +81,7 @@ __attribute__((__weak__)) void checkStackSize()
if (stack_size * 2 > max_stack_size)
{
std::stringstream message;
message.exceptions(std::ios::failbit);
message << "Stack size too large"
<< ". Stack address: " << stack_address
<< ", frame address: " << frame_address

View File

@ -3,7 +3,6 @@
#include <re2/stringpiece.h>
#include <algorithm>
#include <sstream>
#include <cassert>
#include <iomanip>
@ -20,6 +19,7 @@ namespace DB
std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_globs)
{
std::ostringstream oss_for_escaping;
oss_for_escaping.exceptions(std::ios::failbit);
/// Escaping only characters that not used in glob syntax
for (const auto & letter : initial_str_with_globs)
{
@ -33,6 +33,7 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob
re2::StringPiece input(escaped_with_globs);
re2::StringPiece matched;
std::ostringstream oss_for_replacing;
oss_for_replacing.exceptions(std::ios::failbit);
size_t current_index = 0;
while (RE2::FindAndConsume(&input, enum_or_range, &matched))
{
@ -45,8 +46,8 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob
size_t range_end = 0;
char point;
std::istringstream iss_range(buffer);
iss_range.exceptions(std::ios::failbit);
iss_range >> range_begin >> point >> point >> range_end;
assert(!iss_range.fail());
bool leading_zeros = buffer[0] == '0';
size_t num_len = std::to_string(range_end).size();
if (leading_zeros)
@ -71,6 +72,7 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob
oss_for_replacing << escaped_with_globs.substr(current_index);
std::string almost_res = oss_for_replacing.str();
std::ostringstream oss_final_processing;
oss_final_processing.exceptions(std::ios::failbit);
for (const auto & letter : almost_res)
{
if ((letter == '?') || (letter == '*'))

View File

@ -0,0 +1,15 @@
#pragma once
#include <Functions/registerFunctions.h>
#include <Formats/registerFormats.h>
inline void tryRegisterFunctions()
{
static struct Register { Register() { DB::registerFunctions(); } } registered;
}
inline void tryRegisterFormats()
{
static struct Register { Register() { DB::registerFormats(); } } registered;
}

View File

@ -1,18 +0,0 @@
#pragma once
#include <Functions/FunctionFactory.h>
#include <Functions/registerFunctions.h>
struct RegisteredFunctionsState
{
RegisteredFunctionsState()
{
DB::registerFunctions();
}
RegisteredFunctionsState(RegisteredFunctionsState &&) = default;
};
inline void tryRegisterFunctions()
{
static RegisteredFunctionsState registered_functions_state;
}

View File

@ -165,6 +165,7 @@ TEST(Common, SensitiveDataMasker)
</rule>
</query_masking_rules>
</clickhouse>)END");
Poco::AutoPtr<Poco::Util::XMLConfiguration> xml_config = new Poco::Util::XMLConfiguration(xml_isteam_bad);
DB::SensitiveDataMasker masker_xml_based_exception_check(*xml_config, "query_masking_rules");

View File

@ -52,6 +52,7 @@ int main(int, char **)
if (x != i)
{
std::stringstream s;
s.exceptions(std::ios::failbit);
s << "Failed!, read: " << x << ", expected: " << i;
throw DB::Exception(s.str(), 0);
}

View File

@ -22,6 +22,7 @@ void IMySQLReadPacket::readPayload(ReadBuffer & in, uint8_t & sequence_id)
if (!payload.eof())
{
std::stringstream tmp;
tmp.exceptions(std::ios::failbit);
tmp << "Packet payload is not fully read. Stopped after " << payload.count() << " bytes, while " << payload.available() << " bytes are in buffer.";
throw Exception(tmp.str(), ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT);
}

View File

@ -16,6 +16,7 @@ void IMySQLWritePacket::writePayload(WriteBuffer & buffer, uint8_t & sequence_id
if (buf.remainingPayloadSize())
{
std::stringstream ss;
ss.exceptions(std::ios::failbit);
ss << "Incomplete payload. Written " << getPayloadSize() - buf.remainingPayloadSize() << " bytes, expected " << getPayloadSize() << " bytes.";
throw Exception(ss.str(), 0);
}

View File

@ -374,9 +374,8 @@ class IColumn;
M(Bool, optimize_monotonous_functions_in_order_by, true, "Replace monotonous function with its argument in ORDER BY", 0) \
M(Bool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \
M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \
\
M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
M(Bool, use_compact_format_in_distributed_parts_names, false, "Changes format of directories names for distributed table insert parts.", 0) \
M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \
M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \
M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \
M(Seconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \
@ -385,7 +384,6 @@ class IColumn;
M(Seconds, lock_acquire_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "How long locking request should wait before failing", 0) \
M(Bool, materialize_ttl_after_modify, true, "Apply TTL for old data, after ALTER MODIFY TTL query", 0) \
M(String, function_implementation, "", "Choose function implementation for specific target or variant (experimental). If empty enable all of them.", 0) \
\
M(Bool, allow_experimental_geo_types, false, "Allow geo data types such as Point, Ring, Polygon, MultiPolygon", 0) \
M(Bool, allow_experimental_bigint_types, false, "Allow Int128, Int256, UInt256 and Decimal256 types", 0) \
M(Bool, data_type_default_nullable, false, "Data types without NULL or NOT NULL will make Nullable", 0) \
@ -394,20 +392,18 @@ class IColumn;
M(Bool, allow_experimental_database_materialize_mysql, false, "Allow to create database with Engine=MaterializeMySQL(...).", 0) \
M(Bool, system_events_show_zero_values, false, "Include all metrics, even with zero values", 0) \
M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \
M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) \
M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \
\
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
\
M(Bool, partial_merge_join, false, "Obsolete. Use join_algorithm='prefer_partial_merge' instead.", 0) \
M(UInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \
M(UInt64, multiple_joins_rewriter_version, 0, "Obsolete setting, does nothing. Will be removed after 2021-03-31", 0) \
\
M(Bool, force_optimize_skip_unused_shards_no_nested, false, "Obsolete setting, does nothing. Will be removed after 2020-12-01. Use force_optimize_skip_unused_shards_nesting instead.", 0) \
M(Bool, experimental_use_processors, true, "Obsolete setting, does nothing. Will be removed after 2020-11-29.", 0) \
M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \
M(Bool, force_optimize_skip_unused_shards_no_nested, false, "Obsolete setting, does nothing. Will be removed after 2020-12-01. Use force_optimize_skip_unused_shards_nesting instead.", 0) \
M(Bool, enable_debug_queries, false, "Enabled debug queries, but now is obsolete", 0) \
M(Bool, allow_experimental_database_atomic, true, "Obsolete setting, does nothing. Will be removed after 2021-02-12", 0) \
M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) \
M(Bool, enable_debug_queries, false, "Enabled debug queries, but now is obsolete", 0)
// End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS below.

View File

@ -96,7 +96,7 @@ struct SortCursorImpl
: column_desc.column_number;
sort_columns.push_back(columns[column_number].get());
need_collation[j] = desc[j].collator != nullptr && typeid_cast<const ColumnString *>(sort_columns.back()); /// TODO Nullable(String)
need_collation[j] = desc[j].collator != nullptr && sort_columns.back()->isCollationSupported(); /// TODO Nullable(String)
has_collation |= need_collation[j];
}
@ -201,10 +201,7 @@ struct SortCursorWithCollation : SortCursorHelper<SortCursorWithCollation>
int nulls_direction = desc.nulls_direction;
int res;
if (impl->need_collation[i])
{
const ColumnString & column_string = assert_cast<const ColumnString &>(*impl->sort_columns[i]);
res = column_string.compareAtWithCollation(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), *impl->desc[i].collator);
}
res = impl->sort_columns[i]->compareAtWithCollation(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction, *impl->desc[i].collator);
else
res = impl->sort_columns[i]->compareAt(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction);

View File

@ -61,6 +61,7 @@ struct SortColumnDescription
std::string dump() const
{
std::stringstream ss;
ss.exceptions(std::ios::failbit);
ss << column_name << ":" << column_number << ":dir " << direction << "nulls " << nulls_direction;
return ss.str();
}

View File

@ -60,6 +60,7 @@ void CheckConstraintsBlockOutputStream::write(const Block & block)
if (!value)
{
std::stringstream exception_message;
exception_message.exceptions(std::ios::failbit);
exception_message << "Constraint " << backQuote(constraint_ptr->name)
<< " for table " << table_id.getNameForLogs()
@ -87,6 +88,7 @@ void CheckConstraintsBlockOutputStream::write(const Block & block)
Names related_columns = constraint_expr->getRequiredColumns();
std::stringstream exception_message;
exception_message.exceptions(std::ios::failbit);
exception_message << "Constraint " << backQuote(constraint_ptr->name)
<< " for table " << table_id.getNameForLogs()

View File

@ -360,6 +360,7 @@ Block IBlockInputStream::getExtremes()
String IBlockInputStream::getTreeID() const
{
std::stringstream s;
s.exceptions(std::ios::failbit);
s << getName();
if (!children.empty())

View File

@ -5,6 +5,7 @@
#include <Processors/Pipe.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Storages/IStorage.h>
#include <Storages/SelectQueryInfo.h>
#include <Interpreters/castColumn.h>
#include <Interpreters/Cluster.h>
#include <Interpreters/InternalTextLogsQueue.h>
@ -314,6 +315,8 @@ void RemoteQueryExecutor::sendScalars()
void RemoteQueryExecutor::sendExternalTables()
{
SelectQueryInfo query_info;
size_t count = multiplexed_connections->size();
{
@ -328,11 +331,12 @@ void RemoteQueryExecutor::sendExternalTables()
{
StoragePtr cur = table.second;
auto metadata_snapshot = cur->getInMemoryMetadataPtr();
QueryProcessingStage::Enum read_from_table_stage = cur->getQueryProcessingStage(context);
QueryProcessingStage::Enum read_from_table_stage = cur->getQueryProcessingStage(
context, QueryProcessingStage::Complete, query_info);
Pipe pipe = cur->read(
metadata_snapshot->getColumns().getNamesOfPhysical(),
metadata_snapshot, {}, context,
metadata_snapshot, query_info, context,
read_from_table_stage, DEFAULT_BLOCK_SIZE, 1);
auto data = std::make_unique<ExternalTableData>();

View File

@ -33,6 +33,7 @@ static const std::vector<String> supported_functions{"any", "anyLast", "min",
String DataTypeCustomSimpleAggregateFunction::getName() const
{
std::stringstream stream;
stream.exceptions(std::ios::failbit);
stream << "SimpleAggregateFunction(" << function->getName();
if (!parameters.empty())

View File

@ -30,6 +30,7 @@ template <typename T>
std::string DataTypeDecimal<T>::doGetName() const
{
std::stringstream ss;
ss.exceptions(std::ios::failbit);
ss << "Decimal(" << this->precision << ", " << this->scale << ")";
return ss.str();
}

View File

@ -95,6 +95,7 @@ String getObjectDefinitionFromCreateQuery(const ASTPtr & query)
if (!create)
{
std::ostringstream query_stream;
query_stream.exceptions(std::ios::failbit);
formatAST(*query, query_stream, true);
throw Exception("Query '" + query_stream.str() + "' is not CREATE query", ErrorCodes::LOGICAL_ERROR);
}
@ -121,6 +122,7 @@ String getObjectDefinitionFromCreateQuery(const ASTPtr & query)
create->table = TABLE_WITH_UUID_NAME_PLACEHOLDER;
std::ostringstream statement_stream;
statement_stream.exceptions(std::ios::failbit);
formatAST(*create, statement_stream, false);
statement_stream << '\n';
return statement_stream.str();

View File

@ -128,6 +128,7 @@ static String checkVariableAndGetVersion(const mysqlxx::Pool::Entry & connection
bool first = true;
std::stringstream error_message;
error_message.exceptions(std::ios::failbit);
error_message << "Illegal MySQL variables, the MaterializeMySQL engine requires ";
for (const auto & [variable_name, variable_error_message] : variables_error_message)
{
@ -239,6 +240,7 @@ static inline BlockOutputStreamPtr getTableOutput(const String & database_name,
const StoragePtr & storage = DatabaseCatalog::instance().getTable(StorageID(database_name, table_name), query_context);
std::stringstream insert_columns_str;
insert_columns_str.exceptions(std::ios::failbit);
const StorageInMemoryMetadata & storage_metadata = storage->getInMemoryMetadata();
const ColumnsDescription & storage_columns = storage_metadata.getColumns();
const NamesAndTypesList & insert_columns_names = insert_materialized ? storage_columns.getAllPhysical() : storage_columns.getOrdinary();
@ -330,6 +332,7 @@ std::optional<MaterializeMetadata> MaterializeMySQLSyncThread::prepareSynchroniz
const auto & position_message = [&]()
{
std::stringstream ss;
ss.exceptions(std::ios::failbit);
position.dump(ss);
return ss.str();
};
@ -372,6 +375,7 @@ void MaterializeMySQLSyncThread::flushBuffersData(Buffers & buffers, Materialize
const auto & position_message = [&]()
{
std::stringstream ss;
ss.exceptions(std::ios::failbit);
client.getPosition().dump(ss);
return ss.str();
};
@ -643,6 +647,7 @@ void MaterializeMySQLSyncThread::onEvent(Buffers & buffers, const BinlogEventPtr
const auto & dump_event_message = [&]()
{
std::stringstream ss;
ss.exceptions(std::ios::failbit);
receive_event->dump(ss);
return ss.str();
};

View File

@ -16,6 +16,8 @@
#include <common/StringRef.h>
#include <ext/bit_cast.h>
#include <ext/map.h>
#include <ext/range.h>
#include <ext/size.h>
#include <ext/scope_guard.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"

View File

@ -231,6 +231,7 @@ std::string DictionaryStructure::getKeyDescription() const
return "UInt64";
std::ostringstream out;
out.exceptions(std::ios::failbit);
out << '(';

View File

@ -19,6 +19,7 @@ static std::string configurationToString(const DictionaryConfigurationPtr & conf
{
const Poco::Util::XMLConfiguration * xml_config = dynamic_cast<const Poco::Util::XMLConfiguration *>(config.get());
std::ostringstream oss;
oss.exceptions(std::ios::failbit);
xml_config->save(oss);
return oss.str();
}

View File

@ -333,150 +333,6 @@ void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegm
target = std::move(file_segmentation_engine);
}
/// File Segmentation Engines for parallel reading
void registerFileSegmentationEngineTabSeparated(FormatFactory & factory);
void registerFileSegmentationEngineCSV(FormatFactory & factory);
void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory);
void registerFileSegmentationEngineRegexp(FormatFactory & factory);
void registerFileSegmentationEngineJSONAsString(FormatFactory & factory);
void registerFileSegmentationEngineLineAsString(FormatFactory & factory);
/// Formats for both input/output.
void registerInputFormatNative(FormatFactory & factory);
void registerOutputFormatNative(FormatFactory & factory);
void registerInputFormatProcessorNative(FormatFactory & factory);
void registerOutputFormatProcessorNative(FormatFactory & factory);
void registerInputFormatProcessorRowBinary(FormatFactory & factory);
void registerOutputFormatProcessorRowBinary(FormatFactory & factory);
void registerInputFormatProcessorTabSeparated(FormatFactory & factory);
void registerOutputFormatProcessorTabSeparated(FormatFactory & factory);
void registerInputFormatProcessorValues(FormatFactory & factory);
void registerOutputFormatProcessorValues(FormatFactory & factory);
void registerInputFormatProcessorCSV(FormatFactory & factory);
void registerOutputFormatProcessorCSV(FormatFactory & factory);
void registerInputFormatProcessorTSKV(FormatFactory & factory);
void registerOutputFormatProcessorTSKV(FormatFactory & factory);
void registerInputFormatProcessorJSONEachRow(FormatFactory & factory);
void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory);
void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory);
void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory);
void registerInputFormatProcessorProtobuf(FormatFactory & factory);
void registerOutputFormatProcessorProtobuf(FormatFactory & factory);
void registerInputFormatProcessorTemplate(FormatFactory & factory);
void registerOutputFormatProcessorTemplate(FormatFactory & factory);
void registerInputFormatProcessorMsgPack(FormatFactory & factory);
void registerOutputFormatProcessorMsgPack(FormatFactory & factory);
void registerInputFormatProcessorORC(FormatFactory & factory);
void registerOutputFormatProcessorORC(FormatFactory & factory);
void registerInputFormatProcessorParquet(FormatFactory & factory);
void registerOutputFormatProcessorParquet(FormatFactory & factory);
void registerInputFormatProcessorArrow(FormatFactory & factory);
void registerOutputFormatProcessorArrow(FormatFactory & factory);
void registerInputFormatProcessorAvro(FormatFactory & factory);
void registerOutputFormatProcessorAvro(FormatFactory & factory);
void registerInputFormatProcessorRawBLOB(FormatFactory & factory);
void registerOutputFormatProcessorRawBLOB(FormatFactory & factory);
/// Output only (presentational) formats.
void registerOutputFormatNull(FormatFactory & factory);
void registerOutputFormatProcessorPretty(FormatFactory & factory);
void registerOutputFormatProcessorPrettyCompact(FormatFactory & factory);
void registerOutputFormatProcessorPrettySpace(FormatFactory & factory);
void registerOutputFormatProcessorVertical(FormatFactory & factory);
void registerOutputFormatProcessorJSON(FormatFactory & factory);
void registerOutputFormatProcessorJSONCompact(FormatFactory & factory);
void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factory);
void registerOutputFormatProcessorXML(FormatFactory & factory);
void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory);
void registerOutputFormatProcessorNull(FormatFactory & factory);
void registerOutputFormatProcessorMySQLWire(FormatFactory & factory);
void registerOutputFormatProcessorMarkdown(FormatFactory & factory);
void registerOutputFormatProcessorPostgreSQLWire(FormatFactory & factory);
/// Input only formats.
void registerInputFormatProcessorRegexp(FormatFactory & factory);
void registerInputFormatProcessorJSONAsString(FormatFactory & factory);
void registerInputFormatProcessorLineAsString(FormatFactory & factory);
void registerInputFormatProcessorCapnProto(FormatFactory & factory);
FormatFactory::FormatFactory()
{
registerFileSegmentationEngineTabSeparated(*this);
registerFileSegmentationEngineCSV(*this);
registerFileSegmentationEngineJSONEachRow(*this);
registerFileSegmentationEngineRegexp(*this);
registerFileSegmentationEngineJSONAsString(*this);
registerFileSegmentationEngineLineAsString(*this);
registerInputFormatNative(*this);
registerOutputFormatNative(*this);
registerInputFormatProcessorNative(*this);
registerOutputFormatProcessorNative(*this);
registerInputFormatProcessorRowBinary(*this);
registerOutputFormatProcessorRowBinary(*this);
registerInputFormatProcessorTabSeparated(*this);
registerOutputFormatProcessorTabSeparated(*this);
registerInputFormatProcessorValues(*this);
registerOutputFormatProcessorValues(*this);
registerInputFormatProcessorCSV(*this);
registerOutputFormatProcessorCSV(*this);
registerInputFormatProcessorTSKV(*this);
registerOutputFormatProcessorTSKV(*this);
registerInputFormatProcessorJSONEachRow(*this);
registerOutputFormatProcessorJSONEachRow(*this);
registerInputFormatProcessorJSONCompactEachRow(*this);
registerOutputFormatProcessorJSONCompactEachRow(*this);
registerInputFormatProcessorProtobuf(*this);
registerOutputFormatProcessorProtobuf(*this);
registerInputFormatProcessorTemplate(*this);
registerOutputFormatProcessorTemplate(*this);
registerInputFormatProcessorMsgPack(*this);
registerOutputFormatProcessorMsgPack(*this);
registerInputFormatProcessorRawBLOB(*this);
registerOutputFormatProcessorRawBLOB(*this);
#if !defined(ARCADIA_BUILD)
registerInputFormatProcessorORC(*this);
registerOutputFormatProcessorORC(*this);
registerInputFormatProcessorParquet(*this);
registerOutputFormatProcessorParquet(*this);
registerInputFormatProcessorArrow(*this);
registerOutputFormatProcessorArrow(*this);
registerInputFormatProcessorAvro(*this);
registerOutputFormatProcessorAvro(*this);
#endif
registerOutputFormatNull(*this);
registerOutputFormatProcessorPretty(*this);
registerOutputFormatProcessorPrettyCompact(*this);
registerOutputFormatProcessorPrettySpace(*this);
registerOutputFormatProcessorVertical(*this);
registerOutputFormatProcessorJSON(*this);
registerOutputFormatProcessorJSONCompact(*this);
registerOutputFormatProcessorJSONEachRowWithProgress(*this);
registerOutputFormatProcessorXML(*this);
registerOutputFormatProcessorODBCDriver2(*this);
registerOutputFormatProcessorNull(*this);
registerOutputFormatProcessorMySQLWire(*this);
registerOutputFormatProcessorMarkdown(*this);
registerOutputFormatProcessorPostgreSQLWire(*this);
registerInputFormatProcessorRegexp(*this);
registerInputFormatProcessorJSONAsString(*this);
registerInputFormatProcessorLineAsString(*this);
#if !defined(ARCADIA_BUILD)
registerInputFormatProcessorCapnProto(*this);
#endif
}
FormatFactory & FormatFactory::instance()
{

View File

@ -96,7 +96,6 @@ private:
using FormatsDictionary = std::unordered_map<String, Creators>;
public:
static FormatFactory & instance();
BlockInputStreamPtr getInput(
@ -137,8 +136,6 @@ public:
private:
FormatsDictionary dict;
FormatFactory();
const Creators & getCreators(const String & name) const;
};

View File

@ -0,0 +1,160 @@
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include <Formats/FormatFactory.h>
namespace DB
{
/// File Segmentation Engines for parallel reading
void registerFileSegmentationEngineTabSeparated(FormatFactory & factory);
void registerFileSegmentationEngineCSV(FormatFactory & factory);
void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory);
void registerFileSegmentationEngineRegexp(FormatFactory & factory);
void registerFileSegmentationEngineJSONAsString(FormatFactory & factory);
void registerFileSegmentationEngineLineAsString(FormatFactory & factory);
/// Formats for both input/output.
void registerInputFormatNative(FormatFactory & factory);
void registerOutputFormatNative(FormatFactory & factory);
void registerInputFormatProcessorNative(FormatFactory & factory);
void registerOutputFormatProcessorNative(FormatFactory & factory);
void registerInputFormatProcessorRowBinary(FormatFactory & factory);
void registerOutputFormatProcessorRowBinary(FormatFactory & factory);
void registerInputFormatProcessorTabSeparated(FormatFactory & factory);
void registerOutputFormatProcessorTabSeparated(FormatFactory & factory);
void registerInputFormatProcessorValues(FormatFactory & factory);
void registerOutputFormatProcessorValues(FormatFactory & factory);
void registerInputFormatProcessorCSV(FormatFactory & factory);
void registerOutputFormatProcessorCSV(FormatFactory & factory);
void registerInputFormatProcessorTSKV(FormatFactory & factory);
void registerOutputFormatProcessorTSKV(FormatFactory & factory);
void registerInputFormatProcessorJSONEachRow(FormatFactory & factory);
void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory);
void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory);
void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory);
void registerInputFormatProcessorProtobuf(FormatFactory & factory);
void registerOutputFormatProcessorProtobuf(FormatFactory & factory);
void registerInputFormatProcessorTemplate(FormatFactory & factory);
void registerOutputFormatProcessorTemplate(FormatFactory & factory);
void registerInputFormatProcessorMsgPack(FormatFactory & factory);
void registerOutputFormatProcessorMsgPack(FormatFactory & factory);
void registerInputFormatProcessorORC(FormatFactory & factory);
void registerOutputFormatProcessorORC(FormatFactory & factory);
void registerInputFormatProcessorParquet(FormatFactory & factory);
void registerOutputFormatProcessorParquet(FormatFactory & factory);
void registerInputFormatProcessorArrow(FormatFactory & factory);
void registerOutputFormatProcessorArrow(FormatFactory & factory);
void registerInputFormatProcessorAvro(FormatFactory & factory);
void registerOutputFormatProcessorAvro(FormatFactory & factory);
void registerInputFormatProcessorRawBLOB(FormatFactory & factory);
void registerOutputFormatProcessorRawBLOB(FormatFactory & factory);
/// Output only (presentational) formats.
void registerOutputFormatNull(FormatFactory & factory);
void registerOutputFormatProcessorPretty(FormatFactory & factory);
void registerOutputFormatProcessorPrettyCompact(FormatFactory & factory);
void registerOutputFormatProcessorPrettySpace(FormatFactory & factory);
void registerOutputFormatProcessorVertical(FormatFactory & factory);
void registerOutputFormatProcessorJSON(FormatFactory & factory);
void registerOutputFormatProcessorJSONCompact(FormatFactory & factory);
void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factory);
void registerOutputFormatProcessorXML(FormatFactory & factory);
void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory);
void registerOutputFormatProcessorNull(FormatFactory & factory);
void registerOutputFormatProcessorMySQLWire(FormatFactory & factory);
void registerOutputFormatProcessorMarkdown(FormatFactory & factory);
void registerOutputFormatProcessorPostgreSQLWire(FormatFactory & factory);
/// Input only formats.
void registerInputFormatProcessorRegexp(FormatFactory & factory);
void registerInputFormatProcessorJSONAsString(FormatFactory & factory);
void registerInputFormatProcessorLineAsString(FormatFactory & factory);
void registerInputFormatProcessorCapnProto(FormatFactory & factory);
void registerFormats()
{
auto & factory = FormatFactory::instance();
registerFileSegmentationEngineTabSeparated(factory);
registerFileSegmentationEngineCSV(factory);
registerFileSegmentationEngineJSONEachRow(factory);
registerFileSegmentationEngineRegexp(factory);
registerFileSegmentationEngineJSONAsString(factory);
registerFileSegmentationEngineLineAsString(factory);
registerInputFormatNative(factory);
registerOutputFormatNative(factory);
registerInputFormatProcessorNative(factory);
registerOutputFormatProcessorNative(factory);
registerInputFormatProcessorRowBinary(factory);
registerOutputFormatProcessorRowBinary(factory);
registerInputFormatProcessorTabSeparated(factory);
registerOutputFormatProcessorTabSeparated(factory);
registerInputFormatProcessorValues(factory);
registerOutputFormatProcessorValues(factory);
registerInputFormatProcessorCSV(factory);
registerOutputFormatProcessorCSV(factory);
registerInputFormatProcessorTSKV(factory);
registerOutputFormatProcessorTSKV(factory);
registerInputFormatProcessorJSONEachRow(factory);
registerOutputFormatProcessorJSONEachRow(factory);
registerInputFormatProcessorJSONCompactEachRow(factory);
registerOutputFormatProcessorJSONCompactEachRow(factory);
registerInputFormatProcessorProtobuf(factory);
registerOutputFormatProcessorProtobuf(factory);
registerInputFormatProcessorTemplate(factory);
registerOutputFormatProcessorTemplate(factory);
registerInputFormatProcessorMsgPack(factory);
registerOutputFormatProcessorMsgPack(factory);
registerInputFormatProcessorRawBLOB(factory);
registerOutputFormatProcessorRawBLOB(factory);
#if !defined(ARCADIA_BUILD)
registerInputFormatProcessorORC(factory);
registerOutputFormatProcessorORC(factory);
registerInputFormatProcessorParquet(factory);
registerOutputFormatProcessorParquet(factory);
registerInputFormatProcessorArrow(factory);
registerOutputFormatProcessorArrow(factory);
registerInputFormatProcessorAvro(factory);
registerOutputFormatProcessorAvro(factory);
#endif
registerOutputFormatNull(factory);
registerOutputFormatProcessorPretty(factory);
registerOutputFormatProcessorPrettyCompact(factory);
registerOutputFormatProcessorPrettySpace(factory);
registerOutputFormatProcessorVertical(factory);
registerOutputFormatProcessorJSON(factory);
registerOutputFormatProcessorJSONCompact(factory);
registerOutputFormatProcessorJSONEachRowWithProgress(factory);
registerOutputFormatProcessorXML(factory);
registerOutputFormatProcessorODBCDriver2(factory);
registerOutputFormatProcessorNull(factory);
registerOutputFormatProcessorMySQLWire(factory);
registerOutputFormatProcessorMarkdown(factory);
registerOutputFormatProcessorPostgreSQLWire(factory);
registerInputFormatProcessorRegexp(factory);
registerInputFormatProcessorJSONAsString(factory);
registerInputFormatProcessorLineAsString(factory);
#if !defined(ARCADIA_BUILD)
registerInputFormatProcessorCapnProto(factory);
#endif
}
}

View File

@ -0,0 +1,9 @@
#pragma once
namespace DB
{
void registerFormats();
}

View File

@ -22,6 +22,7 @@ SRCS(
ProtobufReader.cpp
ProtobufSchemas.cpp
ProtobufWriter.cpp
registerFormats.cpp
verbosePrintString.cpp
)

View File

@ -12,7 +12,7 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/WriteBufferFromOStream.h>
#include <IO/WriteBufferFromString.h>
#define STATS_ENABLE_STDVEC_WRAPPERS
#include <stats.hpp>
@ -139,31 +139,29 @@ Variants bayesian_ab_test(String distribution, PODArray<Float64> & xs, PODArray<
String convertToJson(const PODArray<String> & variant_names, const Variants & variants)
{
FormatSettings settings;
std::stringstream s;
WriteBufferFromOwnString buf;
writeCString("{\"data\":[", buf);
for (size_t i = 0; i < variants.size(); ++i)
{
WriteBufferFromOStream buf(s);
writeCString("{\"data\":[", buf);
for (size_t i = 0; i < variants.size(); ++i)
{
writeCString("{\"variant_name\":", buf);
writeJSONString(variant_names[i], buf, settings);
writeCString(",\"x\":", buf);
writeText(variants[i].x, buf);
writeCString(",\"y\":", buf);
writeText(variants[i].y, buf);
writeCString(",\"beats_control\":", buf);
writeText(variants[i].beats_control, buf);
writeCString(",\"to_be_best\":", buf);
writeText(variants[i].best, buf);
writeCString("}", buf);
if (i != variant_names.size() -1) writeCString(",", buf);
}
writeCString("]}", buf);
writeCString("{\"variant_name\":", buf);
writeJSONString(variant_names[i], buf, settings);
writeCString(",\"x\":", buf);
writeText(variants[i].x, buf);
writeCString(",\"y\":", buf);
writeText(variants[i].y, buf);
writeCString(",\"beats_control\":", buf);
writeText(variants[i].beats_control, buf);
writeCString(",\"to_be_best\":", buf);
writeText(variants[i].best, buf);
writeCString("}", buf);
if (i != variant_names.size() -1)
writeCString(",", buf);
}
writeCString("]}", buf);
return s.str();
return buf.str();
}
class FunctionBayesAB : public IFunction

View File

@ -10,39 +10,44 @@ Variants test_bayesab(std::string dist, PODArray<Float64> xs, PODArray<Float64>
{
Variants variants;
std::cout << std::fixed;
//std::cout << std::fixed;
if (dist == "beta")
{
std::cout << dist << "\nclicks: ";
for (auto x : xs) std::cout << x << " ";
/* std::cout << dist << "\nclicks: ";
for (auto x : xs)
std::cout << x << " ";
std::cout <<"\tconversions: ";
for (auto y : ys) std::cout << y << " ";
for (auto y : ys)
std::cout << y << " ";
std::cout << "\n";
std::cout << "\n";*/
variants = bayesian_ab_test<true>(dist, xs, ys);
}
else if (dist == "gamma")
{
std::cout << dist << "\nclicks: ";
for (auto x : xs) std::cout << x << " ";
/* std::cout << dist << "\nclicks: ";
for (auto x : xs)
std::cout << x << " ";
std::cout <<"\tcost: ";
for (auto y : ys) std::cout << y << " ";
for (auto y : ys)
std::cout << y << " ";
std::cout << "\n";*/
std::cout << "\n";
variants = bayesian_ab_test<true>(dist, xs, ys);
}
for (size_t i = 0; i < variants.size(); ++i)
/* for (size_t i = 0; i < variants.size(); ++i)
std::cout << i << " beats 0: " << variants[i].beats_control << std::endl;
for (size_t i = 0; i < variants.size(); ++i)
std::cout << i << " to be best: " << variants[i].best << std::endl;
std::cout << convertToJson({"0", "1", "2"}, variants) << std::endl;
*/
Float64 max_val = 0.0, min_val = 2.0;
for (size_t i = 0; i < variants.size(); ++i)
{

View File

@ -20,6 +20,7 @@
# include <Poco/Net/PrivateKeyPassphraseHandler.h>
# include <Poco/Net/RejectCertificateHandler.h>
# include <Poco/Net/SSLManager.h>
# include <Poco/Net/SecureStreamSocket.h>
#endif
#include <Poco/Net/HTTPServerResponse.h>
@ -68,27 +69,27 @@ namespace
throw Exception("Unsupported scheme in URI '" + uri.toString() + "'", ErrorCodes::UNSUPPORTED_URI_SCHEME);
}
HTTPSessionPtr makeHTTPSessionImpl(const std::string & host, UInt16 port, bool https, bool keep_alive, bool resolve_host=true)
HTTPSessionPtr makeHTTPSessionImpl(const std::string & host, UInt16 port, bool https, bool keep_alive, bool resolve_host = true)
{
HTTPSessionPtr session;
if (https)
{
#if USE_SSL
session = std::make_shared<Poco::Net::HTTPSClientSession>();
/// Cannot resolve host in advance, otherwise SNI won't work in Poco.
session = std::make_shared<Poco::Net::HTTPSClientSession>(host, port);
#else
throw Exception("ClickHouse was built without HTTPS support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME);
#endif
}
else
session = std::make_shared<Poco::Net::HTTPClientSession>();
{
String resolved_host = resolve_host ? DNSResolver::instance().resolveHost(host).toString() : host;
session = std::make_shared<Poco::Net::HTTPClientSession>(resolved_host, port);
}
ProfileEvents::increment(ProfileEvents::CreatedHTTPConnections);
if (resolve_host)
session->setHost(DNSResolver::instance().resolveHost(host).toString());
else
session->setHost(host);
session->setPort(port);
/// doesn't work properly without patch
#if defined(POCO_CLICKHOUSE_PATCH)
session->setKeepAlive(keep_alive);
@ -239,6 +240,7 @@ void assertResponseIsOk(const Poco::Net::HTTPRequest & request, Poco::Net::HTTPR
if (!(status == Poco::Net::HTTPResponse::HTTP_OK || (isRedirect(status) && allow_redirects)))
{
std::stringstream error_message;
error_message.exceptions(std::ios::failbit);
error_message << "Received error from remote server " << request.getURI() << ". HTTP status code: " << status << " "
<< response.getReason() << ", body: " << istr.rdbuf();

View File

@ -13,6 +13,7 @@
#include <IO/ConnectionTimeouts.h>
namespace Poco
{
namespace Net
@ -24,6 +25,7 @@ namespace Net
namespace DB
{
constexpr int HTTP_TOO_MANY_REQUESTS = 429;
class SingleEndpointHTTPSessionPool : public PoolBase<Poco::Net::HTTPClientSession>
@ -39,6 +41,7 @@ private:
public:
SingleEndpointHTTPSessionPool(const std::string & host_, UInt16 port_, bool https_, size_t max_pool_size_);
};
using PooledHTTPSessionPtr = SingleEndpointHTTPSessionPool::Entry;
using HTTPSessionPtr = std::shared_ptr<Poco::Net::HTTPClientSession>;
@ -59,5 +62,7 @@ bool isRedirect(const Poco::Net::HTTPResponse::HTTPStatus status);
*/
std::istream * receiveResponse(
Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, bool allow_redirects);
void assertResponseIsOk(const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, const bool allow_redirects = false);
void assertResponseIsOk(
const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, const bool allow_redirects = false);
}

View File

@ -27,20 +27,14 @@ bool MySQLPacketPayloadReadBuffer::nextImpl()
in.readStrict(reinterpret_cast<char *>(&payload_length), 3);
if (payload_length > MAX_PACKET_LENGTH)
{
std::ostringstream tmp;
tmp << "Received packet with payload larger than max_packet_size: " << payload_length;
throw Exception(tmp.str(), ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT);
}
throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT,
"Received packet with payload larger than max_packet_size: {}", payload_length);
size_t packet_sequence_id = 0;
in.read(reinterpret_cast<char &>(packet_sequence_id));
if (packet_sequence_id != sequence_id)
{
std::ostringstream tmp;
tmp << "Received packet with wrong sequence-id: " << packet_sequence_id << ". Expected: " << static_cast<unsigned int>(sequence_id) << '.';
throw Exception(tmp.str(), ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT);
}
throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT,
"Received packet with wrong sequence-id: {}. Expected: {}.", packet_sequence_id, static_cast<unsigned int>(sequence_id));
sequence_id++;
if (payload_length == 0)

View File

@ -67,7 +67,7 @@ ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_,
bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds)
{
return offset() != buffer().size() || socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR);
return available() || socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR);
}
}

View File

@ -1 +0,0 @@
#include <IO/ReadWriteBufferFromHTTP.h>

View File

@ -72,10 +72,7 @@ public:
}
else
{
std::stringstream error_message;
error_message << "Too many redirects while trying to access " << initial_uri.toString();
throw Exception(error_message.str(), ErrorCodes::TOO_MANY_REDIRECTS);
throw Exception(ErrorCodes::TOO_MANY_REDIRECTS, "Too many redirects while trying to access {}", initial_uri.toString());
}
}

View File

@ -248,6 +248,7 @@ void PocoHTTPClient::makeRequestInternal(
response->SetContentType(poco_response.getContentType());
std::stringstream headers_ss;
headers_ss.exceptions(std::ios::failbit);
for (const auto & [header_name, header_value] : poco_response)
{
response->AddHeader(header_name, header_value);

View File

@ -77,6 +77,7 @@ std::string dumpContents(const T& container,
{
std::stringstream sstr;
sstr.exceptions(std::ios::failbit);
dumpBuffer(std::begin(container), std::end(container), &sstr, col_sep, row_sep, cols_in_row);
return sstr.str();

View File

@ -23,6 +23,7 @@ static void test(size_t data_size)
{
std::cout << "block size " << read_buffer_block_size << std::endl;
std::stringstream io;
io.exceptions(std::ios::failbit);
DB::WriteBufferFromOStream out_impl(io);
DB::HashingWriteBuffer out(out_impl);
out.write(data, data_size);

View File

@ -21,6 +21,7 @@ try
using namespace DB;
std::stringstream s;
s.exceptions(std::ios::failbit);
{
std::string src = "1";

View File

@ -17,6 +17,7 @@ int main(int, char **)
DB::String d = "'xyz\\";
std::stringstream s;
s.exceptions(std::ios::failbit);
{
DB::WriteBufferFromOStream out(s);

View File

@ -38,7 +38,6 @@ SRCS(
ReadBufferFromMemory.cpp
ReadBufferFromPocoSocket.cpp
ReadHelpers.cpp
ReadWriteBufferFromHTTP.cpp
SeekAvoidingReadBuffer.cpp
UseSSL.cpp
WriteBufferAIO.cpp

View File

@ -46,6 +46,14 @@ inline bool isLocalImpl(const Cluster::Address & address, const Poco::Net::Socke
return address.default_database.empty() && isLocalAddress(resolved_address, clickhouse_port);
}
void concatInsertPath(std::string & insert_path, const std::string & dir_name)
{
if (insert_path.empty())
insert_path = dir_name;
else
insert_path += "," + dir_name;
}
}
/// Implementation of Cluster::Address class
@ -358,9 +366,7 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config,
bool internal_replication = config.getBool(partial_prefix + ".internal_replication", false);
/// In case of internal_replication we will be appending names to dir_name_for_internal_replication
std::string dir_name_for_internal_replication;
std::string dir_name_for_internal_replication_with_local;
ShardInfoInsertPathForInternalReplication insert_paths;
for (const auto & replica_key : replica_keys)
{
@ -379,18 +385,20 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config,
if (internal_replication)
{
auto dir_name = replica_addresses.back().toFullString(settings.use_compact_format_in_distributed_parts_names);
if (!replica_addresses.back().is_local)
/// use_compact_format=0
{
if (dir_name_for_internal_replication.empty())
dir_name_for_internal_replication = dir_name;
else
dir_name_for_internal_replication += "," + dir_name;
auto dir_name = replica_addresses.back().toFullString(false /* use_compact_format */);
if (!replica_addresses.back().is_local)
concatInsertPath(insert_paths.prefer_localhost_replica, dir_name);
concatInsertPath(insert_paths.no_prefer_localhost_replica, dir_name);
}
/// use_compact_format=1
{
auto dir_name = replica_addresses.back().toFullString(true /* use_compact_format */);
if (!replica_addresses.back().is_local)
concatInsertPath(insert_paths.prefer_localhost_replica_compact, dir_name);
concatInsertPath(insert_paths.no_prefer_localhost_replica_compact, dir_name);
}
if (dir_name_for_internal_replication_with_local.empty())
dir_name_for_internal_replication_with_local = dir_name;
else
dir_name_for_internal_replication_with_local += "," + dir_name;
}
}
else
@ -425,8 +433,7 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config,
slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size());
shards_info.push_back({
std::move(dir_name_for_internal_replication),
std::move(dir_name_for_internal_replication_with_local),
std::move(insert_paths),
current_shard_num,
weight,
std::move(shard_local_addresses),
@ -485,8 +492,7 @@ Cluster::Cluster(const Settings & settings, const std::vector<std::vector<String
slot_to_shard.insert(std::end(slot_to_shard), default_weight, shards_info.size());
shards_info.push_back({
{}, // dir_name_for_internal_replication
{}, // dir_name_for_internal_replication_with_local
{}, // insert_path_for_internal_replication
current_shard_num,
default_weight,
std::move(shard_local_addresses),
@ -609,22 +615,25 @@ Cluster::Cluster(Cluster::SubclusterTag, const Cluster & from, const std::vector
initMisc();
}
const std::string & Cluster::ShardInfo::pathForInsert(bool prefer_localhost_replica) const
const std::string & Cluster::ShardInfo::insertPathForInternalReplication(bool prefer_localhost_replica, bool use_compact_format) const
{
if (!has_internal_replication)
throw Exception("internal_replication is not set", ErrorCodes::LOGICAL_ERROR);
if (prefer_localhost_replica)
const auto & paths = insert_path_for_internal_replication;
if (!use_compact_format)
{
if (dir_name_for_internal_replication.empty())
throw Exception("Directory name for async inserts is empty", ErrorCodes::LOGICAL_ERROR);
return dir_name_for_internal_replication;
if (prefer_localhost_replica)
return paths.prefer_localhost_replica;
else
return paths.no_prefer_localhost_replica;
}
else
{
if (dir_name_for_internal_replication_with_local.empty())
throw Exception("Directory name for async inserts is empty", ErrorCodes::LOGICAL_ERROR);
return dir_name_for_internal_replication_with_local;
if (prefer_localhost_replica)
return paths.prefer_localhost_replica_compact;
else
return paths.no_prefer_localhost_replica_compact;
}
}

View File

@ -133,6 +133,27 @@ public:
using Addresses = std::vector<Address>;
using AddressesWithFailover = std::vector<Addresses>;
/// Name of directory for asynchronous write to StorageDistributed if has_internal_replication
///
/// Contains different path for permutations of:
/// - prefer_localhost_replica
/// Notes with prefer_localhost_replica==0 will contains local nodes.
/// - use_compact_format_in_distributed_parts_names
/// See toFullString()
///
/// This is cached to avoid looping by replicas in insertPathForInternalReplication().
struct ShardInfoInsertPathForInternalReplication
{
/// prefer_localhost_replica == 1 && use_compact_format_in_distributed_parts_names=0
std::string prefer_localhost_replica;
/// prefer_localhost_replica == 0 && use_compact_format_in_distributed_parts_names=0
std::string no_prefer_localhost_replica;
/// prefer_localhost_replica == 1 && use_compact_format_in_distributed_parts_names=1
std::string prefer_localhost_replica_compact;
/// prefer_localhost_replica == 0 && use_compact_format_in_distributed_parts_names=1
std::string no_prefer_localhost_replica_compact;
};
struct ShardInfo
{
public:
@ -141,13 +162,10 @@ public:
size_t getLocalNodeCount() const { return local_addresses.size(); }
bool hasInternalReplication() const { return has_internal_replication; }
/// Name of directory for asynchronous write to StorageDistributed if has_internal_replication
const std::string & pathForInsert(bool prefer_localhost_replica) const;
const std::string & insertPathForInternalReplication(bool prefer_localhost_replica, bool use_compact_format) const;
public:
/// Name of directory for asynchronous write to StorageDistributed if has_internal_replication && prefer_localhost_replica
std::string dir_name_for_internal_replication;
/// Name of directory for asynchronous write to StorageDistributed if has_internal_replication && !prefer_localhost_replica
std::string dir_name_for_internal_replication_with_local;
ShardInfoInsertPathForInternalReplication insert_path_for_internal_replication;
/// Number of the shard, the indexation begins with 1
UInt32 shard_num = 0;
UInt32 weight = 1;

View File

@ -107,6 +107,7 @@ String formattedAST(const ASTPtr & ast)
if (!ast)
return {};
std::stringstream ss;
ss.exceptions(std::ios::failbit);
formatAST(*ast, ss, false, true);
return ss.str();
}

View File

@ -4,9 +4,10 @@
#include <Interpreters/Context.h>
#include <Interpreters/Cluster.h>
#include <Interpreters/IInterpreter.h>
#include <Parsers/queryToString.h>
#include <Interpreters/ProcessList.h>
#include <Parsers/queryToString.h>
#include <Processors/Pipe.h>
#include <Storages/SelectQueryInfo.h>
namespace DB
@ -81,16 +82,17 @@ Context updateSettingsForCluster(const Cluster & cluster, const Context & contex
}
Pipe executeQuery(
IStreamFactory & stream_factory, const ClusterPtr & cluster, Poco::Logger * log,
const ASTPtr & query_ast, const Context & context, const Settings & settings, const SelectQueryInfo & query_info)
IStreamFactory & stream_factory, Poco::Logger * log,
const ASTPtr & query_ast, const Context & context, const SelectQueryInfo & query_info)
{
assert(log);
Pipes res;
const Settings & settings = context.getSettingsRef();
const std::string query = queryToString(query_ast);
Context new_context = updateSettingsForCluster(*cluster, context, settings, log);
Context new_context = updateSettingsForCluster(*query_info.cluster, context, settings, log);
ThrottlerPtr user_level_throttler;
if (auto * process_list_element = context.getProcessListElement())
@ -109,7 +111,7 @@ Pipe executeQuery(
else
throttler = user_level_throttler;
for (const auto & shard_info : cluster->getShardsInfo())
for (const auto & shard_info : query_info.cluster->getShardsInfo())
stream_factory.createForShard(shard_info, query, query_ast, new_context, throttler, query_info, res);
return Pipe::unitePipes(std::move(res));

Some files were not shown because too many files have changed in this diff Show More