diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index c065219f980..2dddde9aa14 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -172,6 +172,7 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py + python3 merge_pr.py --check-approved ############################################################################################# diff --git a/.github/workflows/reusable_build.yml b/.github/workflows/reusable_build.yml index d2fe6f5dbe7..80d78d93e1b 100644 --- a/.github/workflows/reusable_build.yml +++ b/.github/workflows/reusable_build.yml @@ -43,8 +43,7 @@ jobs: runs-on: [self-hosted, '${{inputs.runner_type}}'] steps: - name: Check out repository code - # WIP: temporary try commit with limited perallelization of checkout - uses: ClickHouse/checkout@0be3f7b3098bae494d3ef5d29d2e0676fb606232 + uses: ClickHouse/checkout@v1 with: clear-repository: true ref: ${{ fromJson(inputs.data).git_ref }} diff --git a/CMakeLists.txt b/CMakeLists.txt index cbd0a6fb96f..6847b1bf994 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,11 +110,6 @@ endif() # - sanitize.cmake add_library(global-libs INTERFACE) -# We don't want to instrument everything with fuzzer, but only specific targets (see below), -# also, since we build our own llvm, we specifically don't want to instrument -# libFuzzer library itself - it would result in infinite recursion -#include (cmake/fuzzer.cmake) - include (cmake/sanitize.cmake) option(ENABLE_COLORED_BUILD "Enable colors in compiler output" ON) @@ -554,7 +549,9 @@ if (ENABLE_RUST) endif() endif() -if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64)) +if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" + AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND NOT ENABLE_FUZZING + AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64)) set(CHECK_LARGE_OBJECT_SIZES_DEFAULT ON) else () set(CHECK_LARGE_OBJECT_SIZES_DEFAULT OFF) @@ -577,10 +574,7 @@ if (FUZZER) if (NOT(target_type STREQUAL "INTERFACE_LIBRARY" OR target_type STREQUAL "UTILITY")) target_compile_options(${target} PRIVATE "-fsanitize=fuzzer-no-link") endif() - # clickhouse fuzzer isn't working correctly - # initial PR https://github.com/ClickHouse/ClickHouse/pull/27526 - #if (target MATCHES ".+_fuzzer" OR target STREQUAL "clickhouse") - if (target_type STREQUAL "EXECUTABLE" AND target MATCHES ".+_fuzzer") + if (target_type STREQUAL "EXECUTABLE" AND (target MATCHES ".+_fuzzer" OR target STREQUAL "clickhouse")) message(STATUS "${target} instrumented with fuzzer") target_link_libraries(${target} PUBLIC ch_contrib::fuzzer) # Add to fuzzers bundle diff --git a/cmake/fuzzer.cmake b/cmake/fuzzer.cmake deleted file mode 100644 index dd0c4b080fe..00000000000 --- a/cmake/fuzzer.cmake +++ /dev/null @@ -1,17 +0,0 @@ -# see ./CMakeLists.txt for variable declaration -if (FUZZER) - if (FUZZER STREQUAL "libfuzzer") - # NOTE: Eldar Zaitov decided to name it "libfuzzer" instead of "fuzzer" to keep in mind another possible fuzzer backends. - # NOTE: no-link means that all the targets are built with instrumentation for fuzzer, but only some of them - # (tests) have entry point for fuzzer and it's not checked. - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link -DFUZZER=1") - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link -DFUZZER=1") - - # NOTE: oss-fuzz can change LIB_FUZZING_ENGINE variable - if (NOT LIB_FUZZING_ENGINE) - set (LIB_FUZZING_ENGINE "-fsanitize=fuzzer") - endif () - else () - message (FATAL_ERROR "Unknown fuzzer type: ${FUZZER}") - endif () -endif() diff --git a/docs/en/engines/table-engines/special/memory.md b/docs/en/engines/table-engines/special/memory.md index 0d552a69804..19b5c798a76 100644 --- a/docs/en/engines/table-engines/special/memory.md +++ b/docs/en/engines/table-engines/special/memory.md @@ -21,3 +21,79 @@ When restarting a server, data disappears from the table and the table becomes e Normally, using this table engine is not justified. However, it can be used for tests, and for tasks where maximum speed is required on a relatively small number of rows (up to approximately 100,000,000). The Memory engine is used by the system for temporary tables with external query data (see the section “External data for processing a query”), and for implementing `GLOBAL IN` (see the section “IN operators”). + +Upper and lower bounds can be specified to limit Memory engine table size, effectively allowing it to act as a circular buffer (see [Engine Parameters](#engine-parameters)). + +## Engine Parameters {#engine-parameters} + +- `min_bytes_to_keep` — Minimum bytes to keep when memory table is size-capped. + - Default value: `0` + - Requires `max_bytes_to_keep` +- `max_bytes_to_keep` — Maximum bytes to keep within memory table where oldest rows are deleted on each insertion (i.e circular buffer). Max bytes can exceed the stated limit if the oldest batch of rows to remove falls under the `min_bytes_to_keep` limit when adding a large block. + - Default value: `0` +- `min_rows_to_keep` — Minimum rows to keep when memory table is size-capped. + - Default value: `0` + - Requires `max_rows_to_keep` +- `max_rows_to_keep` — Maximum rows to keep within memory table where oldest rows are deleted on each insertion (i.e circular buffer). Max rows can exceed the stated limit if the oldest batch of rows to remove falls under the `min_rows_to_keep` limit when adding a large block. + - Default value: `0` + +## Usage {#usage} + + +**Initialize settings** +``` sql +CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 100, max_rows_to_keep = 1000; +``` + +**Note:** Both `bytes` and `rows` capping parameters can be set at the same time, however, the lower bounds of `max` and `min` will be adhered to. + +## Examples {#examples} +``` sql +CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_bytes_to_keep = 4096, max_bytes_to_keep = 16384; + +/* 1. testing oldest block doesn't get deleted due to min-threshold - 3000 rows */ +INSERT INTO memory SELECT * FROM numbers(0, 1600); -- 8'192 bytes + +/* 2. adding block that doesn't get deleted */ +INSERT INTO memory SELECT * FROM numbers(1000, 100); -- 1'024 bytes + +/* 3. testing oldest block gets deleted - 9216 bytes - 1100 */ +INSERT INTO memory SELECT * FROM numbers(9000, 1000); -- 8'192 bytes + +/* 4. checking a very large block overrides all */ +INSERT INTO memory SELECT * FROM numbers(9000, 10000); -- 65'536 bytes + +SELECT total_bytes, total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +``` + +``` text +┌─total_bytes─┬─total_rows─┐ +│ 65536 │ 10000 │ +└─────────────┴────────────┘ +``` + +also, for rows: + +``` sql +CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 4000, max_rows_to_keep = 10000; + +/* 1. testing oldest block doesn't get deleted due to min-threshold - 3000 rows */ +INSERT INTO memory SELECT * FROM numbers(0, 1600); -- 1'600 rows + +/* 2. adding block that doesn't get deleted */ +INSERT INTO memory SELECT * FROM numbers(1000, 100); -- 100 rows + +/* 3. testing oldest block gets deleted - 9216 bytes - 1100 */ +INSERT INTO memory SELECT * FROM numbers(9000, 1000); -- 1'000 rows + +/* 4. checking a very large block overrides all */ +INSERT INTO memory SELECT * FROM numbers(9000, 10000); -- 10'000 rows + +SELECT total_bytes, total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +``` + +``` text +┌─total_bytes─┬─total_rows─┐ +│ 65536 │ 10000 │ +└─────────────┴────────────┘ +``` diff --git a/docs/en/getting-started/example-datasets/criteo.md b/docs/en/getting-started/example-datasets/criteo.md index a2e0fda0cb0..4becdb50731 100644 --- a/docs/en/getting-started/example-datasets/criteo.md +++ b/docs/en/getting-started/example-datasets/criteo.md @@ -55,7 +55,7 @@ CREATE TABLE criteo_log ( ) ENGINE = Log; ``` -Download the data: +Insert the data: ``` bash $ for i in {00..23}; do echo $i; zcat datasets/criteo/day_${i#0}.gz | sed -r 's/^/2000-01-'${i/00/24}'\t/' | clickhouse-client --host=example-perftest01j --query="INSERT INTO criteo_log FORMAT TabSeparated"; done diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index 9f17f4af1e8..089704705d0 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -95,9 +95,11 @@ which is equal to ## Substituting Configuration {#substitution} -The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/clickhouse/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md#macros)). +The config can define substitutions. There are two types of substitutions: -If you want to replace an entire element with a substitution use `include` as the element name. +- If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/clickhouse/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md#macros)). + +- If you want to replace an entire element with a substitution, use `include` as the element name. Substitutions can also be performed from ZooKeeper by specifying attribute `from_zk = "/path/to/node"`. In this case, the element value is replaced with the contents of the Zookeeper node at `/path/to/node`. This also works with you store an entire XML subtree as a Zookeeper node, it will be fully inserted into the source element. XML substitution example: @@ -114,7 +116,7 @@ XML substitution example: ``` -Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node, and it will be fully inserted into the source element. +If you want to merge the substituting content with the existing configuration instead of appending you can use attribute `merge="true"`, for example: ``. In this case, the existing configuration will be merged with the content from the substitution and the existing configuration settings will be replaced with values from substitution. ## Encrypting and Hiding Configuration {#encryption} diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 07c9a2b88ab..f20dcb9025e 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -933,9 +933,9 @@ Hard limit is configured via system tools ## database_atomic_delay_before_drop_table_sec {#database_atomic_delay_before_drop_table_sec} -Sets the delay before remove table data in seconds. If the query has `SYNC` modifier, this setting is ignored. +The delay before a table data is dropped in seconds. If the `DROP TABLE` query has a `SYNC` modifier, this setting is ignored. -Default value: `480` (8 minute). +Default value: `480` (8 minutes). ## database_catalog_unused_dir_hide_timeout_sec {#database_catalog_unused_dir_hide_timeout_sec} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 3f4dec9dc93..6c427442e49 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4337,6 +4337,18 @@ Possible values: Default value: `0`. + +## function_locate_has_mysql_compatible_argument_order {#function-locate-has-mysql-compatible-argument-order} + +Controls the order of arguments in function [locate](../../sql-reference/functions/string-search-functions.md#locate). + +Possible values: + +- 0 — Function `locate` accepts arguments `(haystack, needle[, start_pos])`. +- 1 — Function `locate` accepts arguments `(needle, haystack, [, start_pos])` (MySQL-compatible behavior) + +Default value: `1`. + ## date_time_overflow_behavior {#date_time_overflow_behavior} Defines the behavior when [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md) or integers are converted into Date, Date32, DateTime or DateTime64 but the value cannot be represented in the result type. diff --git a/docs/en/operations/system-tables/scheduler.md b/docs/en/operations/system-tables/scheduler.md index 953db4c28f2..c4de7f76fdc 100644 --- a/docs/en/operations/system-tables/scheduler.md +++ b/docs/en/operations/system-tables/scheduler.md @@ -26,7 +26,9 @@ priority: 0 is_active: 0 active_children: 0 dequeued_requests: 67 +canceled_requests: 0 dequeued_cost: 4692272 +canceled_cost: 0 busy_periods: 63 vruntime: 938454.1999999989 system_vruntime: ᴺᵁᴸᴸ @@ -54,7 +56,9 @@ Columns: - `is_active` (`UInt8`) - Whether this node is currently active - has resource requests to be dequeued and constraints satisfied. - `active_children` (`UInt64`) - The number of children in active state. - `dequeued_requests` (`UInt64`) - The total number of resource requests dequeued from this node. +- `canceled_requests` (`UInt64`) - The total number of resource requests canceled from this node. - `dequeued_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests dequeued from this node. +- `canceled_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests canceled from this node. - `busy_periods` (`UInt64`) - The total number of deactivations of this node. - `vruntime` (`Nullable(Float64)`) - For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner. - `system_vruntime` (`Nullable(Float64)`) - For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. Used during child activation as the new value of `vruntime`. diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index a465106c2ff..1adff18f598 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -36,9 +36,9 @@ You can explicitly set a time zone for `DateTime`-type columns when creating a t The [clickhouse-client](../../interfaces/cli.md) applies the server time zone by default if a time zone isn’t explicitly set when initializing the data type. To use the client time zone, run `clickhouse-client` with the `--use_client_time_zone` parameter. -ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings-formats.md#date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function. +ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function. -When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings-formats.md#date_time_input_format) setting. +When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format) setting. ## Examples @@ -147,8 +147,8 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse - [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md) - [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) - [Functions for working with arrays](../../sql-reference/functions/array-functions.md) -- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#date_time_input_format) -- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#date_time_output_format) +- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#settings-date_time_input_format) +- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#settings-date_time_output_format) - [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone) - [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 22f879c62ae..1b03f220db2 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -30,7 +30,6 @@ position(haystack, needle[, start_pos]) Alias: - `position(needle IN haystack)` -- `locate(haystack, needle[, start_pos])`. **Arguments** @@ -49,7 +48,7 @@ If substring `needle` is empty, these rules apply: - if `start_pos >= 1` and `start_pos <= length(haystack) + 1`: return `start_pos` - otherwise: return `0` -The same rules also apply to functions `positionCaseInsensitive`, `positionUTF8` and `positionCaseInsensitiveUTF8` +The same rules also apply to functions `locate`, `positionCaseInsensitive`, `positionUTF8` and `positionCaseInsensitiveUTF8`. Type: `Integer`. @@ -114,6 +113,21 @@ SELECT └─────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┘ ``` +## locate + +Like [position](#position) but with arguments `haystack` and `locate` switched. + +The behavior of this function depends on the ClickHouse version: +- in versions < v24.3, `locate` was an alias of function `position` and accepted arguments `(haystack, needle[, start_pos])`. +- in versions >= 24.3,, `locate` is an individual function (for better compatibility with MySQL) and accepts arguments `(needle, haystack[, start_pos])`. The previous behavior + can be restored using setting [function_locate_has_mysql_compatible_argument_order = false](../../operations/settings/settings.md#function-locate-has-mysql-compatible-argument-order); + +**Syntax** + +``` sql +locate(needle, haystack[, start_pos]) +``` + ## positionCaseInsensitive Like [position](#position) but searches case-insensitively. diff --git a/docs/en/sql-reference/statements/undrop.md b/docs/en/sql-reference/statements/undrop.md index 40ac1ab4f99..4b138bfe679 100644 --- a/docs/en/sql-reference/statements/undrop.md +++ b/docs/en/sql-reference/statements/undrop.md @@ -13,13 +13,6 @@ a system table called `system.dropped_tables`. If you have a materialized view without a `TO` clause associated with the dropped table, then you will also have to UNDROP the inner table of that view. -:::note -UNDROP TABLE is experimental. To use it add this setting: -```sql -set allow_experimental_undrop_table_query = 1; -``` -::: - :::tip Also see [DROP TABLE](/docs/en/sql-reference/statements/drop.md) ::: @@ -32,60 +25,53 @@ UNDROP TABLE [db.]name [UUID ''] [ON CLUSTER cluster] **Example** -``` sql -set allow_experimental_undrop_table_query = 1; -``` - ```sql -CREATE TABLE undropMe +CREATE TABLE tab ( `id` UInt8 ) ENGINE = MergeTree -ORDER BY id -``` +ORDER BY id; + +DROP TABLE tab; -```sql -DROP TABLE undropMe -``` -```sql SELECT * FROM system.dropped_tables -FORMAT Vertical +FORMAT Vertical; ``` + ```response Row 1: ────── index: 0 database: default -table: undropMe +table: tab uuid: aa696a1a-1d70-4e60-a841-4c80827706cc engine: MergeTree -metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.undropMe.aa696a1a-1d70-4e60-a841-4c80827706cc.sql +metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.tab.aa696a1a-1d70-4e60-a841-4c80827706cc.sql table_dropped_time: 2023-04-05 14:12:12 1 row in set. Elapsed: 0.001 sec. ``` + ```sql -UNDROP TABLE undropMe -``` -```response -Ok. -``` -```sql +UNDROP TABLE tab; + SELECT * FROM system.dropped_tables -FORMAT Vertical -``` +FORMAT Vertical; + ```response Ok. 0 rows in set. Elapsed: 0.001 sec. ``` + ```sql -DESCRIBE TABLE undropMe -FORMAT Vertical +DESCRIBE TABLE tab +FORMAT Vertical; ``` + ```response Row 1: ────── diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index 25e87794147..57f24786bb7 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -27,9 +27,9 @@ DateTime([timezone]) Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`. -ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/settings-formats.md#date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). +ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/index.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). -При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/settings-formats.md#date_time_input_format). +При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/index.md#settings-date_time_input_format). ## Примеры {#primery} @@ -119,8 +119,8 @@ FROM dt - [Функции преобразования типов](../../sql-reference/functions/type-conversion-functions.md) - [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md) - [Функции для работы с массивами](../../sql-reference/functions/array-functions.md) -- [Настройка `date_time_input_format`](../../operations/settings/settings-formats.md#date_time_input_format) -- [Настройка `date_time_output_format`](../../operations/settings/settings-formats.md#date_time_output_format) +- [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_input_format) +- [Настройка `date_time_output_format`](../../operations/settings/index.md) - [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone) - [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime) diff --git a/programs/compressor/Compressor.cpp b/programs/compressor/Compressor.cpp index 7125fdc744f..050bb495024 100644 --- a/programs/compressor/Compressor.cpp +++ b/programs/compressor/Compressor.cpp @@ -143,7 +143,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) ParserCodec codec_parser; std::string codecs_line = boost::algorithm::join(codecs, ","); - auto ast = parseQuery(codec_parser, "(" + codecs_line + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + auto ast = parseQuery(codec_parser, "(" + codecs_line + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); codec = CompressionCodecFactory::instance().get(ast, nullptr); } else diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index c92106e2f90..50f801f2560 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -234,7 +234,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv) size_t approx_query_length = multiple ? find_first_symbols<';'>(pos, end) - pos : end - pos; ASTPtr res = parseQueryAndMovePosition( - parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth); + parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth, cmd_settings.max_parser_backtracks); std::unique_ptr insert_query_payload = nullptr; /// If the query is INSERT ... VALUES, then we will try to parse the data. diff --git a/programs/keeper-client/KeeperClient.cpp b/programs/keeper-client/KeeperClient.cpp index 595fc65e50e..8297fab5ed9 100644 --- a/programs/keeper-client/KeeperClient.cpp +++ b/programs/keeper-client/KeeperClient.cpp @@ -44,7 +44,7 @@ String KeeperClient::executeFourLetterCommand(const String & command) std::vector KeeperClient::getCompletions(const String & prefix) const { Tokens tokens(prefix.data(), prefix.data() + prefix.size(), 0, false); - IParser::Pos pos(tokens, 0); + IParser::Pos pos(tokens, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); if (pos->type != TokenType::BareWord) return registered_commands_and_four_letter_words; @@ -278,6 +278,7 @@ bool KeeperClient::processQueryText(const String & text) /* allow_multi_statements = */ true, /* max_query_size = */ 0, /* max_parser_depth = */ 0, + /* max_parser_backtracks = */ 0, /* skip_insignificant = */ false); if (!res) diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index dd0bf67cb64..98d8848502d 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -11,6 +11,7 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES LibraryBridgeHandlers.cpp SharedLibrary.cpp library-bridge.cpp + createFunctionBaseCast.cpp ) clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES}) diff --git a/programs/library-bridge/LibraryBridge.h b/programs/library-bridge/LibraryBridge.h index 04860a042a3..a8d15a87e07 100644 --- a/programs/library-bridge/LibraryBridge.h +++ b/programs/library-bridge/LibraryBridge.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include "LibraryBridgeHandlerFactory.h" diff --git a/programs/library-bridge/createFunctionBaseCast.cpp b/programs/library-bridge/createFunctionBaseCast.cpp new file mode 100644 index 00000000000..dcdd47d79ce --- /dev/null +++ b/programs/library-bridge/createFunctionBaseCast.cpp @@ -0,0 +1,23 @@ +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +class IFunctionBase; +using FunctionBasePtr = std::shared_ptr; + +FunctionBasePtr createFunctionBaseCast( + ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional, CastType) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge"); +} + +} diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 50f9c242712..167d361f5e3 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -1000,12 +1000,6 @@ extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv) { std::vector argv(*pargv, *pargv + (*pargc + 1)); - if (!isClickhouseApp("local", argv)) - { - std::cerr << "\033[31m" << "ClickHouse compiled in fuzzing mode, only clickhouse local is available." << "\033[0m" << std::endl; - exit(1); - } - /// As a user you can add flags to clickhouse binary in fuzzing mode as follows /// clickhouse local -- @@ -1013,13 +1007,16 @@ extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv) auto it = argv.begin() + 1; for (; *it; ++it) + { if (strcmp(*it, "--") == 0) { ++it; break; } + } while (*it) + { if (strncmp(*it, "--", 2) != 0) { *(p++) = *it; @@ -1027,6 +1024,7 @@ extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv) } else ++it; + } *pargc = static_cast(p - &(*pargv)[0]); *p = nullptr; diff --git a/programs/main.cpp b/programs/main.cpp index c5f1b30f60e..0a35594bd30 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -68,7 +68,6 @@ namespace using MainFunc = int (*)(int, char**); #if !defined(FUZZING_MODE) - /// Add an item here to register new application std::pair clickhouse_applications[] = { @@ -105,13 +104,6 @@ std::pair clickhouse_applications[] = {"restart", mainEntryClickHouseRestart}, }; -/// Add an item here to register a new short name -std::pair clickhouse_short_names[] = -{ - {"chl", "local"}, - {"chc", "client"}, -}; - int printHelp(int, char **) { std::cerr << "Use one of the following commands:" << std::endl; @@ -121,6 +113,13 @@ int printHelp(int, char **) } #endif +/// Add an item here to register a new short name +std::pair clickhouse_short_names[] = +{ + {"chl", "local"}, + {"chc", "client"}, +}; + enum class InstructionFail { diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 56373601b95..18cda4d7a04 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -13,6 +13,7 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES getIdentifierQuote.cpp odbc-bridge.cpp validateODBCConnectionString.cpp + createFunctionBaseCast.cpp ) clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) diff --git a/programs/odbc-bridge/createFunctionBaseCast.cpp b/programs/odbc-bridge/createFunctionBaseCast.cpp new file mode 100644 index 00000000000..dcdd47d79ce --- /dev/null +++ b/programs/odbc-bridge/createFunctionBaseCast.cpp @@ -0,0 +1,23 @@ +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +class IFunctionBase; +using FunctionBasePtr = std::shared_ptr; + +FunctionBasePtr createFunctionBaseCast( + ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional, CastType) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge"); +} + +} diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 7636f221ab5..d7030e3b0aa 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -733,8 +733,6 @@ try LOG_INFO(log, "Available CPU instruction sets: {}", cpu_info); #endif - sanityChecks(*this); - // Initialize global thread pool. Do it before we fetch configs from zookeeper // nodes (`from_zk`), because ZooKeeper interface uses the pool. We will // ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well. @@ -904,6 +902,7 @@ try config_processor.savePreprocessedConfig(loaded_config, config().getString("path", DBMS_DEFAULT_PATH)); config().removeConfiguration(old_configuration.get()); config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false); + global_context->setConfig(loaded_config.configuration); } Settings::checkNoSettingNamesAtTopLevel(config(), config_path); @@ -911,6 +910,9 @@ try /// We need to reload server settings because config could be updated via zookeeper. server_settings.loadSettingsFromConfig(config()); + /// NOTE: Do sanity checks after we loaded all possible substitutions (for the configuration) from ZK + sanityChecks(*this); + #if defined(OS_LINUX) std::string executable_path = getExecutablePath(); diff --git a/src/Access/AccessEntityIO.cpp b/src/Access/AccessEntityIO.cpp index 80bb63b04bf..b0dfd74c53b 100644 --- a/src/Access/AccessEntityIO.cpp +++ b/src/Access/AccessEntityIO.cpp @@ -62,7 +62,7 @@ AccessEntityPtr deserializeAccessEntityImpl(const String & definition) const char * end = begin + definition.size(); while (pos < end) { - queries.emplace_back(parseQueryAndMovePosition(parser, pos, end, "", true, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH)); + queries.emplace_back(parseQueryAndMovePosition(parser, pos, end, "", true, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS)); while (isWhitespaceASCII(*pos) || *pos == ';') ++pos; } diff --git a/src/Access/RowPolicyCache.cpp b/src/Access/RowPolicyCache.cpp index 13140099a63..c1c4928d0da 100644 --- a/src/Access/RowPolicyCache.cpp +++ b/src/Access/RowPolicyCache.cpp @@ -86,7 +86,7 @@ void RowPolicyCache::PolicyInfo::setPolicy(const RowPolicyPtr & policy_) try { ParserExpression parser; - parsed_filters[filter_type_i] = parseQuery(parser, filter, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + parsed_filters[filter_type_i] = parseQuery(parser, filter, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); } catch (...) { diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index e9b2e1397ab..b4b843fc77e 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -66,7 +66,7 @@ namespace String error_message; const char * pos = string_query.data(); - auto ast = tryParseQuery(parser, pos, pos + string_query.size(), error_message, false, "", false, 0, 0); + auto ast = tryParseQuery(parser, pos, pos + string_query.size(), error_message, false, "", false, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS, true); if (!ast) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse grant query. Error: {}", error_message); diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index 8ac75e4451c..891f2ac4284 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -483,6 +483,7 @@ public: } bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; } + bool canOptimizeEqualKeysRanges() const override { return !is_able_to_parallelize_merge; } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override { @@ -576,6 +577,7 @@ public: } bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; } + bool canOptimizeEqualKeysRanges() const override { return !is_able_to_parallelize_merge; } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override { diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionArray.h b/src/AggregateFunctions/Combinators/AggregateFunctionArray.h index 7f38453f86b..6b918926d0d 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionArray.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionArray.h @@ -142,6 +142,7 @@ public: } bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); } + bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override { diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionIf.h b/src/AggregateFunctions/Combinators/AggregateFunctionIf.h index e81f2203e7b..df23398a10d 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionIf.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionIf.h @@ -165,6 +165,7 @@ public: } bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); } + bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override { diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionMerge.h b/src/AggregateFunctions/Combinators/AggregateFunctionMerge.h index 5b9e8e606af..53c24bd60c1 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionMerge.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionMerge.h @@ -111,6 +111,7 @@ public: } bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); } + bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override { diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionNull.h b/src/AggregateFunctions/Combinators/AggregateFunctionNull.h index 8b614f68540..ba72f960852 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionNull.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionNull.h @@ -152,6 +152,7 @@ public: } bool isAbleToParallelizeMerge() const override { return nested_function->isAbleToParallelizeMerge(); } + bool canOptimizeEqualKeysRanges() const override { return nested_function->canOptimizeEqualKeysRanges(); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override { diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionState.h b/src/AggregateFunctions/Combinators/AggregateFunctionState.h index 8335d21cb1e..b0ab6d49604 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionState.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionState.h @@ -92,6 +92,7 @@ public: } bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); } + bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override { diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index 94bb121893d..499185320e6 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -162,6 +162,10 @@ public: /// Tells if merge() with thread pool parameter could be used. virtual bool isAbleToParallelizeMerge() const { return false; } + /// Return true if it is allowed to replace call of `addBatch` + /// to `addBatchSinglePlace` for ranges of consecutive equal keys. + virtual bool canOptimizeEqualKeysRanges() const { return true; } + /// Should be used only if isAbleToParallelizeMerge() returned true. virtual void merge(AggregateDataPtr __restrict /*place*/, ConstAggregateDataPtr /*rhs*/, ThreadPool & /*thread_pool*/, Arena * /*arena*/) const diff --git a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp index 290da81944d..425364efb9c 100644 --- a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp +++ b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp @@ -27,6 +27,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) auto initialize = [&]() mutable { + if (context) + return true; + shared_context = Context::createShared(); context = Context::createGlobal(shared_context.get()); context->makeGlobalContext(); diff --git a/src/AggregateFunctions/parseAggregateFunctionParameters.cpp b/src/AggregateFunctions/parseAggregateFunctionParameters.cpp index db1efe224d1..593be1e0a79 100644 --- a/src/AggregateFunctions/parseAggregateFunctionParameters.cpp +++ b/src/AggregateFunctions/parseAggregateFunctionParameters.cpp @@ -81,7 +81,8 @@ void getAggregateFunctionNameAndParametersArray( ParserExpressionList params_parser(false); ASTPtr args_ast = parseQuery(params_parser, parameters_str.data(), parameters_str.data() + parameters_str.size(), - "parameters of aggregate function in " + error_context, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + "parameters of aggregate function in " + error_context, + 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); if (args_ast->children.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incorrect list of parameters to aggregate function {}", diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index 1a6ee9215a9..1c2097e7be9 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -54,10 +54,10 @@ public: if (!constant_node) return; - const auto & constant_value_literal = constant_node->getValue(); - if (!isInt64OrUInt64FieldType(constant_value_literal.getType())) + if (auto constant_type = constant_node->getResultType(); !isNativeInteger(constant_type)) return; + const auto & constant_value_literal = constant_node->getValue(); if (getSettings().aggregate_functions_null_for_empty) return; diff --git a/src/Backups/BackupInfo.cpp b/src/Backups/BackupInfo.cpp index 2bff400d4fe..461f613ecd2 100644 --- a/src/Backups/BackupInfo.cpp +++ b/src/Backups/BackupInfo.cpp @@ -25,7 +25,7 @@ String BackupInfo::toString() const BackupInfo BackupInfo::fromString(const String & str) { ParserIdentifierWithOptionalParameters parser; - ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); return fromAST(*ast); } diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index bae2f9aaa25..e20e8eb66c6 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -101,10 +101,12 @@ RestorerFromBackup::RestorerFromBackup( RestorerFromBackup::~RestorerFromBackup() { - if (!futures.empty()) + /// If an exception occurs we can come here to the destructor having some tasks still unfinished. + /// We have to wait until they finish. + if (getNumFutures() > 0) { - LOG_ERROR(log, "RestorerFromBackup must not be destroyed while {} tasks are still running", futures.size()); - chassert(false && "RestorerFromBackup must not be destroyed while some tasks are still running"); + LOG_INFO(log, "Waiting for {} tasks to finish", getNumFutures()); + waitFutures(); } } @@ -422,7 +424,7 @@ void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_ readStringUntilEOF(create_query_str, *read_buffer); read_buffer.reset(); ParserCreateQuery create_parser; - ASTPtr create_table_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr create_table_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); applyCustomStoragePolicy(create_table_query); renameDatabaseAndTableNameInCreateQuery(create_table_query, renaming_map, context->getGlobalContext()); String create_table_query_str = serializeAST(*create_table_query); @@ -532,7 +534,7 @@ void RestorerFromBackup::findDatabaseInBackupImpl(const String & database_name_i readStringUntilEOF(create_query_str, *read_buffer); read_buffer.reset(); ParserCreateQuery create_parser; - ASTPtr create_database_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr create_database_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); renameDatabaseAndTableNameInCreateQuery(create_database_query, renaming_map, context->getGlobalContext()); String create_database_query_str = serializeAST(*create_database_query); diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 48962880b8f..d561a64895b 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -345,7 +345,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (dialect == Dialect::kusto) parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); else if (dialect == Dialect::prql) - parser = std::make_unique(max_length, settings.max_parser_depth); + parser = std::make_unique(max_length, settings.max_parser_depth, settings.max_parser_backtracks); else parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); @@ -353,9 +353,9 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu { String message; if (dialect == Dialect::kusto) - res = tryParseKQLQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = tryParseKQLQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth, settings.max_parser_backtracks, true); else - res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth, settings.max_parser_backtracks, true); if (!res) { @@ -366,9 +366,9 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu else { if (dialect == Dialect::kusto) - res = parseKQLQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = parseKQLQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth, settings.max_parser_backtracks); else - res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth, settings.max_parser_backtracks); } if (is_interactive) @@ -385,12 +385,12 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu /// Consumes trailing semicolons and tries to consume the same-line trailing comment. -void ClientBase::adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, uint32_t max_parser_depth) +void ClientBase::adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, uint32_t max_parser_depth, uint32_t max_parser_backtracks) { // We have to skip the trailing semicolon that might be left // after VALUES parsing or just after a normal semicolon-terminated query. Tokens after_query_tokens(this_query_end, all_queries_end); - IParser::Pos after_query_iterator(after_query_tokens, max_parser_depth); + IParser::Pos after_query_iterator(after_query_tokens, max_parser_depth, max_parser_backtracks); while (after_query_iterator.isValid() && after_query_iterator->type == TokenType::Semicolon) { this_query_end = after_query_iterator->end; @@ -1984,6 +1984,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( return MultiQueryProcessingStage::QUERIES_END; unsigned max_parser_depth = static_cast(global_context->getSettingsRef().max_parser_depth); + unsigned max_parser_backtracks = static_cast(global_context->getSettingsRef().max_parser_backtracks); // If there are only comments left until the end of file, we just // stop. The parser can't handle this situation because it always @@ -1994,7 +1995,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( // and it makes more sense to treat them as such. { Tokens tokens(this_query_begin, all_queries_end); - IParser::Pos token_iterator(tokens, max_parser_depth); + IParser::Pos token_iterator(tokens, max_parser_depth, max_parser_backtracks); if (!token_iterator.isValid()) return MultiQueryProcessingStage::QUERIES_END; } @@ -2015,7 +2016,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( if (ignore_error) { Tokens tokens(this_query_begin, all_queries_end); - IParser::Pos token_iterator(tokens, max_parser_depth); + IParser::Pos token_iterator(tokens, max_parser_depth, max_parser_backtracks); while (token_iterator->type != TokenType::Semicolon && token_iterator.isValid()) ++token_iterator; this_query_begin = token_iterator->end; @@ -2055,7 +2056,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( // after we have processed the query. But even this guess is // beneficial so that we see proper trailing comments in "echo" and // server log. - adjustQueryEnd(this_query_end, all_queries_end, max_parser_depth); + adjustQueryEnd(this_query_end, all_queries_end, max_parser_depth, max_parser_backtracks); return MultiQueryProcessingStage::EXECUTE_QUERY; } @@ -2251,7 +2252,8 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) this_query_end = insert_ast->end; adjustQueryEnd( this_query_end, all_queries_end, - static_cast(global_context->getSettingsRef().max_parser_depth)); + static_cast(global_context->getSettingsRef().max_parser_depth), + static_cast(global_context->getSettingsRef().max_parser_backtracks)); } // Report error. diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index dd08e7c059b..7a9e9666e67 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -94,7 +94,7 @@ protected: void processParsedSingleQuery(const String & full_query, const String & query_to_execute, ASTPtr parsed_query, std::optional echo_query_ = {}, bool report_error = false); - static void adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, uint32_t max_parser_depth); + static void adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, uint32_t max_parser_depth, uint32_t max_parser_backtracks); ASTPtr parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const; static void setupSignalHandler(); diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index 7f1dce4b29a..7be01686258 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -569,7 +569,8 @@ void QueryFuzzer::fuzzColumnDeclaration(ASTColumnDeclaration & column) auto data_type = fuzzDataType(DataTypeFactory::instance().get(column.type)); ParserDataType parser; - column.type = parseQuery(parser, data_type->getName(), DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH); + column.type = parseQuery(parser, data_type->getName(), + DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); } } @@ -821,7 +822,8 @@ static ASTPtr tryParseInsertQuery(const String & full_query) ParserInsertQuery parser(end, false); String message; - return tryParseQuery(parser, pos, end, message, false, "", false, DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH); + return tryParseQuery(parser, pos, end, message, false, "", false, + DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS, true); } ASTs QueryFuzzer::getInsertQueriesForFuzzedTables(const String & full_query) @@ -914,6 +916,38 @@ ASTPtr QueryFuzzer::fuzzLiteralUnderExpressionList(ASTPtr child) child = makeASTFunction( "toFixedString", std::make_shared(value), std::make_shared(static_cast(value.size()))); } + else if (type == Field::Types::Which::UInt64 && fuzz_rand() % 7 == 0) + { + child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toUInt128" : "toUInt256", std::make_shared(l->value.get())); + } + else if (type == Field::Types::Which::Int64 && fuzz_rand() % 7 == 0) + { + child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toInt128" : "toInt256", std::make_shared(l->value.get())); + } + else if (type == Field::Types::Which::Float64 && fuzz_rand() % 7 == 0) + { + int decimal = fuzz_rand() % 4; + if (decimal == 0) + child = makeASTFunction( + "toDecimal32", + std::make_shared(l->value.get()), + std::make_shared(static_cast(fuzz_rand() % 9))); + else if (decimal == 1) + child = makeASTFunction( + "toDecimal64", + std::make_shared(l->value.get()), + std::make_shared(static_cast(fuzz_rand() % 18))); + else if (decimal == 2) + child = makeASTFunction( + "toDecimal128", + std::make_shared(l->value.get()), + std::make_shared(static_cast(fuzz_rand() % 38))); + else + child = makeASTFunction( + "toDecimal256", + std::make_shared(l->value.get()), + std::make_shared(static_cast(fuzz_rand() % 76))); + } if (fuzz_rand() % 7 == 0) child = makeASTFunction("toNullable", child); @@ -933,7 +967,19 @@ ASTPtr QueryFuzzer::reverseLiteralFuzzing(ASTPtr child) { if (auto * function = child.get()->as()) { - std::unordered_set can_be_reverted{"toNullable", "toLowCardinality", "materialize"}; + const std::unordered_set can_be_reverted{ + "materialize", + "toDecimal32", /// Keeping the first parameter only should be ok (valid query most of the time) + "toDecimal64", + "toDecimal128", + "toDecimal256", + "toFixedString", /// Same as toDecimal + "toInt128", + "toInt256", + "toLowCardinality", + "toNullable", + "toUInt128", + "toUInt256"}; if (can_be_reverted.contains(function->name) && function->children.size() == 1) { if (fuzz_rand() % 7 == 0) diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index 140194e10b4..4a39454ccbb 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -39,7 +39,7 @@ void logAboutProgress(LoggerPtr log, size_t processed, size_t total, AtomicStopw { if (total && (processed % PRINT_MESSAGE_EACH_N_OBJECTS == 0 || watch.compareAndRestart(PRINT_MESSAGE_EACH_N_SECONDS))) { - LOG_INFO(log, "Processed: {}%", static_cast(processed * 1000.0 / total) * 0.1); + LOG_INFO(log, "Processed: {:.1f}%", static_cast(processed) * 100.0 / total); watch.restart(); } } diff --git a/src/Common/ColumnsHashingImpl.h b/src/Common/ColumnsHashingImpl.h index 7116160e94c..d68171a6566 100644 --- a/src/Common/ColumnsHashingImpl.h +++ b/src/Common/ColumnsHashingImpl.h @@ -62,7 +62,6 @@ struct LastElementCache bool check(const Key & key) const { return value.first == key; } bool hasOnlyOneValue() const { return found && misses == 1; } - UInt64 getMisses() const { return misses; } }; template @@ -232,7 +231,7 @@ public: ALWAYS_INLINE UInt64 getCacheMissesSinceLastReset() const { if constexpr (consecutive_keys_optimization) - return cache.getMisses(); + return cache.misses; return 0; } diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 641e7ddcdaa..60407c6a174 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -427,6 +427,8 @@ void ConfigProcessor::doIncludesRecursive( /// Replace the original contents, not add to it. bool replace = attributes->getNamedItem("replace"); + /// Merge with the original contents + bool merge = attributes->getNamedItem("merge"); bool included_something = false; @@ -450,7 +452,6 @@ void ConfigProcessor::doIncludesRecursive( } else { - /// Replace the whole node not just contents. if (node->nodeName() == "include") { const NodeListPtr children = node_to_include->childNodes(); @@ -458,8 +459,18 @@ void ConfigProcessor::doIncludesRecursive( for (Node * child = children->item(0); child; child = next_child) { next_child = child->nextSibling(); - NodePtr new_node = config->importNode(child, true); - node->parentNode()->insertBefore(new_node, node); + + /// Recursively replace existing nodes in merge mode + if (merge) + { + NodePtr new_node = config->importNode(child->parentNode(), true); + mergeRecursive(config, node->parentNode(), new_node); + } + else /// Append to existing node by default + { + NodePtr new_node = config->importNode(child, true); + node->parentNode()->insertBefore(new_node, node); + } } node->parentNode()->removeChild(node); @@ -777,9 +788,9 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfig(bool allow_zk_includes } ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes( - zkutil::ZooKeeperNodeCache & zk_node_cache, - const zkutil::EventPtr & zk_changed_event, - bool fallback_to_preprocessed) + zkutil::ZooKeeperNodeCache & zk_node_cache, + const zkutil::EventPtr & zk_changed_event, + bool fallback_to_preprocessed) { XMLDocumentPtr config_xml; bool has_zk_includes; diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 9f2572cbfc6..75ba9cff81e 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -584,10 +584,6 @@ M(703, INVALID_IDENTIFIER) \ M(704, QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS) \ M(705, TABLE_NOT_EMPTY) \ - \ - M(900, DISTRIBUTED_CACHE_ERROR) \ - M(901, CANNOT_USE_DISTRIBUTED_CACHE) \ - \ M(706, LIBSSH_ERROR) \ M(707, GCP_ERROR) \ M(708, ILLEGAL_STATISTIC) \ @@ -599,6 +595,10 @@ M(715, CANNOT_DETECT_FORMAT) \ M(716, CANNOT_FORGET_PARTITION) \ M(717, EXPERIMENTAL_FEATURE_ERROR) \ + M(718, TOO_SLOW_PARSING) \ + \ + M(900, DISTRIBUTED_CACHE_ERROR) \ + M(901, CANNOT_USE_DISTRIBUTED_CACHE) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/NaNUtils.h b/src/Common/NaNUtils.h index 1c5a619e919..2ff3e2f5661 100644 --- a/src/Common/NaNUtils.h +++ b/src/Common/NaNUtils.h @@ -25,6 +25,18 @@ inline bool isFinite(T x) return true; } +template +bool canConvertTo(Float64 x) +{ + if constexpr (std::is_floating_point_v) + return true; + if (!isFinite(x)) + return false; + if (x > Float64(std::numeric_limits::max()) || x < Float64(std::numeric_limits::lowest())) + return false; + + return true; +} template T NaNOrZero() diff --git a/src/Common/NamedCollections/NamedCollectionUtils.cpp b/src/Common/NamedCollections/NamedCollectionUtils.cpp index fe0f42467c7..9b569390b3c 100644 --- a/src/Common/NamedCollections/NamedCollectionUtils.cpp +++ b/src/Common/NamedCollections/NamedCollectionUtils.cpp @@ -302,7 +302,7 @@ private: readStringUntilEOF(query, in); ParserCreateNamedCollectionQuery parser; - auto ast = parseQuery(parser, query, "in file " + path, 0, settings.max_parser_depth); + auto ast = parseQuery(parser, query, "in file " + path, 0, settings.max_parser_depth, settings.max_parser_backtracks); const auto & create_query = ast->as(); return create_query; } diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index ab1a16a3edf..e91b5adec87 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -534,6 +534,7 @@ The server successfully detected this situation and will download merged part fr \ M(AggregationPreallocatedElementsInHashTables, "How many elements were preallocated in hash tables for aggregation.") \ M(AggregationHashTablesInitializedAsTwoLevel, "How many hash tables were inited as two-level for aggregation.") \ + M(AggregationOptimizedEqualRangesOfKeys, "For how many blocks optimization of equal ranges of keys was applied") \ \ M(MetadataFromKeeperCacheHit, "Number of times an object storage metadata request was answered from cache without making request to Keeper") \ M(MetadataFromKeeperCacheMiss, "Number of times an object storage metadata request had to be answered from Keeper") \ diff --git a/src/Common/Scheduler/ISchedulerNode.h b/src/Common/Scheduler/ISchedulerNode.h index 804026d7bf4..20c1f4332da 100644 --- a/src/Common/Scheduler/ISchedulerNode.h +++ b/src/Common/Scheduler/ISchedulerNode.h @@ -387,7 +387,9 @@ public: /// Introspection std::atomic dequeued_requests{0}; + std::atomic canceled_requests{0}; std::atomic dequeued_cost{0}; + std::atomic canceled_cost{0}; std::atomic busy_periods{0}; }; diff --git a/src/Common/Scheduler/ISchedulerQueue.h b/src/Common/Scheduler/ISchedulerQueue.h index cbe63bd304a..532f4bf6c63 100644 --- a/src/Common/Scheduler/ISchedulerQueue.h +++ b/src/Common/Scheduler/ISchedulerQueue.h @@ -50,6 +50,12 @@ public: /// Should be called outside of scheduling subsystem, implementation must be thread-safe. virtual void enqueueRequest(ResourceRequest * request) = 0; + /// Cancel previously enqueued request. + /// Returns `false` and does nothing given unknown or already executed request. + /// Returns `true` if requests has been found and canceled. + /// Should be called outside of scheduling subsystem, implementation must be thread-safe. + virtual bool cancelRequest(ResourceRequest * request) = 0; + /// For introspection ResourceCost getBudget() const { diff --git a/src/Common/Scheduler/Nodes/FairPolicy.h b/src/Common/Scheduler/Nodes/FairPolicy.h index c0e187e6fa9..ce2bf729a04 100644 --- a/src/Common/Scheduler/Nodes/FairPolicy.h +++ b/src/Common/Scheduler/Nodes/FairPolicy.h @@ -134,56 +134,65 @@ public: std::pair dequeueRequest() override { - if (heap_size == 0) - return {nullptr, false}; - - // Recursively pull request from child - auto [request, child_active] = items.front().child->dequeueRequest(); - assert(request != nullptr); - std::pop_heap(items.begin(), items.begin() + heap_size); - Item & current = items[heap_size - 1]; - - // SFQ fairness invariant: system vruntime equals last served request start-time - assert(current.vruntime >= system_vruntime); - system_vruntime = current.vruntime; - - // By definition vruntime is amount of consumed resource (cost) divided by weight - current.vruntime += double(request->cost) / current.child->info.weight; - max_vruntime = std::max(max_vruntime, current.vruntime); - - if (child_active) // Put active child back in heap after vruntime update + // Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr` + while (true) { - std::push_heap(items.begin(), items.begin() + heap_size); - } - else // Deactivate child if it is empty, but remember it's vruntime for latter activations - { - heap_size--; + if (heap_size == 0) + return {nullptr, false}; - // Store index of this inactive child in `parent.idx` - // This enables O(1) search of inactive children instead of O(n) - current.child->info.parent.idx = heap_size; - } + // Recursively pull request from child + auto [request, child_active] = items.front().child->dequeueRequest(); + std::pop_heap(items.begin(), items.begin() + heap_size); + Item & current = items[heap_size - 1]; - // Reset any difference between children on busy period end - if (heap_size == 0) - { - // Reset vtime to zero to avoid floating-point error accumulation, - // but do not reset too often, because it's O(N) - UInt64 ns = clock_gettime_ns(); - if (last_reset_ns + 1000000000 < ns) + if (request) { - last_reset_ns = ns; - for (Item & item : items) - item.vruntime = 0; - max_vruntime = 0; - } - system_vruntime = max_vruntime; - busy_periods++; - } + // SFQ fairness invariant: system vruntime equals last served request start-time + assert(current.vruntime >= system_vruntime); + system_vruntime = current.vruntime; - dequeued_requests++; - dequeued_cost += request->cost; - return {request, heap_size > 0}; + // By definition vruntime is amount of consumed resource (cost) divided by weight + current.vruntime += double(request->cost) / current.child->info.weight; + max_vruntime = std::max(max_vruntime, current.vruntime); + } + + if (child_active) // Put active child back in heap after vruntime update + { + std::push_heap(items.begin(), items.begin() + heap_size); + } + else // Deactivate child if it is empty, but remember it's vruntime for latter activations + { + heap_size--; + + // Store index of this inactive child in `parent.idx` + // This enables O(1) search of inactive children instead of O(n) + current.child->info.parent.idx = heap_size; + } + + // Reset any difference between children on busy period end + if (heap_size == 0) + { + // Reset vtime to zero to avoid floating-point error accumulation, + // but do not reset too often, because it's O(N) + UInt64 ns = clock_gettime_ns(); + if (last_reset_ns + 1000000000 < ns) + { + last_reset_ns = ns; + for (Item & item : items) + item.vruntime = 0; + max_vruntime = 0; + } + system_vruntime = max_vruntime; + busy_periods++; + } + + if (request) + { + dequeued_requests++; + dequeued_cost += request->cost; + return {request, heap_size > 0}; + } + } } bool isActive() override diff --git a/src/Common/Scheduler/Nodes/FifoQueue.h b/src/Common/Scheduler/Nodes/FifoQueue.h index 38ae902bc2f..45ed32343ff 100644 --- a/src/Common/Scheduler/Nodes/FifoQueue.h +++ b/src/Common/Scheduler/Nodes/FifoQueue.h @@ -39,8 +39,7 @@ public: void enqueueRequest(ResourceRequest * request) override { - std::unique_lock lock(mutex); - request->enqueue_ns = clock_gettime_ns(); + std::lock_guard lock(mutex); queue_cost += request->cost; bool was_empty = requests.empty(); requests.push_back(request); @@ -50,7 +49,7 @@ public: std::pair dequeueRequest() override { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); if (requests.empty()) return {nullptr, false}; ResourceRequest * result = requests.front(); @@ -63,9 +62,29 @@ public: return {result, !requests.empty()}; } + bool cancelRequest(ResourceRequest * request) override + { + std::lock_guard lock(mutex); + // TODO(serxa): reimplement queue as intrusive list of ResourceRequest to make this O(1) instead of O(N) + for (auto i = requests.begin(), e = requests.end(); i != e; ++i) + { + if (*i == request) + { + requests.erase(i); + if (requests.empty()) + busy_periods++; + queue_cost -= request->cost; + canceled_requests++; + canceled_cost += request->cost; + return true; + } + } + return false; + } + bool isActive() override { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); return !requests.empty(); } @@ -98,14 +117,14 @@ public: std::pair getQueueLengthAndCost() { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); return {requests.size(), queue_cost}; } private: std::mutex mutex; Int64 queue_cost = 0; - std::deque requests; + std::deque requests; // TODO(serxa): reimplement it using intrusive list to avoid allocations/deallocations and O(N) during cancel }; } diff --git a/src/Common/Scheduler/Nodes/PriorityPolicy.h b/src/Common/Scheduler/Nodes/PriorityPolicy.h index 99a09a88f26..22a5155cfeb 100644 --- a/src/Common/Scheduler/Nodes/PriorityPolicy.h +++ b/src/Common/Scheduler/Nodes/PriorityPolicy.h @@ -102,25 +102,31 @@ public: std::pair dequeueRequest() override { - if (items.empty()) - return {nullptr, false}; - - // Recursively pull request from child - auto [request, child_active] = items.front().child->dequeueRequest(); - assert(request != nullptr); - - // Deactivate child if it is empty - if (!child_active) + // Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr` + while (true) { - std::pop_heap(items.begin(), items.end()); - items.pop_back(); if (items.empty()) - busy_periods++; - } + return {nullptr, false}; - dequeued_requests++; - dequeued_cost += request->cost; - return {request, !items.empty()}; + // Recursively pull request from child + auto [request, child_active] = items.front().child->dequeueRequest(); + + // Deactivate child if it is empty + if (!child_active) + { + std::pop_heap(items.begin(), items.end()); + items.pop_back(); + if (items.empty()) + busy_periods++; + } + + if (request) + { + dequeued_requests++; + dequeued_cost += request->cost; + return {request, !items.empty()}; + } + } } bool isActive() override diff --git a/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp b/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp index dbf94df8c18..1901a4fd120 100644 --- a/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp @@ -38,7 +38,6 @@ TEST(SchedulerDynamicResourceManager, Smoke) { ResourceGuard gA(cA->get("res1"), ResourceGuard::PostponeLocking); gA.lock(); - gA.setFailure(); gA.unlock(); ResourceGuard gB(cB->get("res1")); diff --git a/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp index 9fefbc02cbd..f8196d15819 100644 --- a/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp @@ -4,6 +4,7 @@ #include +#include #include using namespace DB; @@ -73,6 +74,22 @@ struct ResourceHolder } }; +struct MyRequest : public ResourceRequest +{ + std::function on_execute; + + explicit MyRequest(ResourceCost cost_, std::function on_execute_) + : ResourceRequest(cost_) + , on_execute(on_execute_) + {} + + void execute() override + { + if (on_execute) + on_execute(); + } +}; + TEST(SchedulerRoot, Smoke) { ResourceTest t; @@ -111,3 +128,49 @@ TEST(SchedulerRoot, Smoke) EXPECT_TRUE(fc2->requests.contains(&rg.request)); } } + +TEST(SchedulerRoot, Cancel) +{ + ResourceTest t; + + ResourceHolder r1(t); + auto * fc1 = r1.add("/", "1"); + r1.add("/prio"); + auto a = r1.addQueue("/prio/A", "1"); + auto b = r1.addQueue("/prio/B", "2"); + r1.registerResource(); + + std::barrier destruct_sync(2); + std::barrier sync(2); + std::thread consumer1([&] + { + MyRequest request(1,[&] + { + sync.arrive_and_wait(); // (A) + EXPECT_TRUE(fc1->requests.contains(&request)); + sync.arrive_and_wait(); // (B) + request.finish(); + destruct_sync.arrive_and_wait(); // (C) + }); + a.queue->enqueueRequest(&request); + destruct_sync.arrive_and_wait(); // (C) + }); + + std::thread consumer2([&] + { + MyRequest request(1,[&] + { + FAIL() << "This request must be canceled, but instead executes"; + }); + sync.arrive_and_wait(); // (A) wait for request of consumer1 to be inside execute, so that constraint is in violated state and our request will not be executed immediately + b.queue->enqueueRequest(&request); + bool canceled = b.queue->cancelRequest(&request); + EXPECT_TRUE(canceled); + sync.arrive_and_wait(); // (B) release request of consumer1 to be finished + }); + + consumer1.join(); + consumer2.join(); + + EXPECT_TRUE(fc1->requests.empty()); +} diff --git a/src/Common/Scheduler/ResourceGuard.h b/src/Common/Scheduler/ResourceGuard.h index dca4041b176..50f665a384b 100644 --- a/src/Common/Scheduler/ResourceGuard.h +++ b/src/Common/Scheduler/ResourceGuard.h @@ -71,8 +71,7 @@ public: // lock(mutex) is not required because `Dequeued` request cannot be used by the scheduler thread chassert(state == Dequeued); state = Finished; - if (constraint) - constraint->finishRequest(this); + ResourceRequest::finish(); } static Request & local() @@ -126,12 +125,6 @@ public: } } - /// Mark request as unsuccessful; by default request is considered to be successful - void setFailure() - { - request.successful = false; - } - ResourceLink link; Request & request; }; diff --git a/src/Common/Scheduler/ResourceRequest.cpp b/src/Common/Scheduler/ResourceRequest.cpp new file mode 100644 index 00000000000..26e8084cdfa --- /dev/null +++ b/src/Common/Scheduler/ResourceRequest.cpp @@ -0,0 +1,13 @@ +#include +#include + +namespace DB +{ + +void ResourceRequest::finish() +{ + if (constraint) + constraint->finishRequest(this); +} + +} diff --git a/src/Common/Scheduler/ResourceRequest.h b/src/Common/Scheduler/ResourceRequest.h index 3d2230746f9..f3153ad382c 100644 --- a/src/Common/Scheduler/ResourceRequest.h +++ b/src/Common/Scheduler/ResourceRequest.h @@ -14,9 +14,6 @@ class ISchedulerConstraint; using ResourceCost = Int64; constexpr ResourceCost ResourceCostMax = std::numeric_limits::max(); -/// Timestamps (nanoseconds since epoch) -using ResourceNs = UInt64; - /* * Request for a resource consumption. The main moving part of the scheduling subsystem. * Resource requests processing workflow: @@ -31,7 +28,7 @@ using ResourceNs = UInt64; * 3) Scheduler calls ISchedulerNode::dequeueRequest() that returns the request. * 4) Callback ResourceRequest::execute() is called to provide access to the resource. * 5) The resource consumption is happening outside of the scheduling subsystem. - * 6) request->constraint->finishRequest() is called when consumption is finished. + * 6) ResourceRequest::finish() is called when consumption is finished. * * Steps (5) and (6) can be omitted if constraint is not used by the resource. * @@ -39,7 +36,10 @@ using ResourceNs = UInt64; * Request ownership is done outside of the scheduling subsystem. * After (6) request can be destructed safely. * - * Request cancelling is not supported yet. + * Request can also be canceled before (3) using ISchedulerQueue::cancelRequest(). + * Returning false means it is too late for request to be canceled. It should be processed in a regular way. + * Returning true means successful cancel and therefore steps (4) and (5) are not going to happen + * and step (6) MUST be omitted. */ class ResourceRequest { @@ -48,32 +48,20 @@ public: /// NOTE: If cost is not known in advance, ResourceBudget should be used (note that every ISchedulerQueue has it) ResourceCost cost; - /// Request outcome - /// Should be filled during resource consumption - bool successful; - /// Scheduler node to be notified on consumption finish /// Auto-filled during request enqueue/dequeue ISchedulerConstraint * constraint; - /// Timestamps for introspection - ResourceNs enqueue_ns; - ResourceNs execute_ns; - ResourceNs finish_ns; - explicit ResourceRequest(ResourceCost cost_ = 1) { reset(cost_); } + /// ResourceRequest object may be reused again after reset() void reset(ResourceCost cost_) { cost = cost_; - successful = true; constraint = nullptr; - enqueue_ns = 0; - execute_ns = 0; - finish_ns = 0; } virtual ~ResourceRequest() = default; @@ -83,6 +71,12 @@ public: /// just triggering start of a consumption, not doing the consumption itself /// (e.g. setting an std::promise or creating a job in a thread pool) virtual void execute() = 0; + + /// Stop resource consumption and notify resource scheduler. + /// Should be called when resource consumption is finished by consumer. + /// ResourceRequest should not be destructed or reset before calling to `finish()`. + /// WARNING: this function MUST not be called if request was canceled. + void finish(); }; } diff --git a/src/Common/Scheduler/SchedulerRoot.h b/src/Common/Scheduler/SchedulerRoot.h index 3a23a8df834..ab3f702a422 100644 --- a/src/Common/Scheduler/SchedulerRoot.h +++ b/src/Common/Scheduler/SchedulerRoot.h @@ -145,22 +145,27 @@ public: std::pair dequeueRequest() override { - if (current == nullptr) // No active resources - return {nullptr, false}; + while (true) + { + if (current == nullptr) // No active resources + return {nullptr, false}; - // Dequeue request from current resource - auto [request, resource_active] = current->root->dequeueRequest(); - assert(request != nullptr); + // Dequeue request from current resource + auto [request, resource_active] = current->root->dequeueRequest(); - // Deactivate resource if required - if (!resource_active) - deactivate(current); - else - current = current->next; // Just move round-robin pointer + // Deactivate resource if required + if (!resource_active) + deactivate(current); + else + current = current->next; // Just move round-robin pointer - dequeued_requests++; - dequeued_cost += request->cost; - return {request, current != nullptr}; + if (request == nullptr) // Possible in case of request cancel, just retry + continue; + + dequeued_requests++; + dequeued_cost += request->cost; + return {request, current != nullptr}; + } } bool isActive() override @@ -245,7 +250,6 @@ private: void execute(ResourceRequest * request) { - request->execute_ns = clock_gettime_ns(); request->execute(); } diff --git a/src/Common/tests/gtest_global_context.cpp b/src/Common/tests/gtest_global_context.cpp index ec86c953c5b..0c1556766a9 100644 --- a/src/Common/tests/gtest_global_context.cpp +++ b/src/Common/tests/gtest_global_context.cpp @@ -10,9 +10,3 @@ ContextHolder & getMutableContext() static ContextHolder holder; return holder; } - -void destroyContext() -{ - auto & holder = getMutableContext(); - return holder.destroy(); -} diff --git a/src/Common/tests/gtest_global_context.h b/src/Common/tests/gtest_global_context.h index f846a0dbe4f..7ae8bb32f70 100644 --- a/src/Common/tests/gtest_global_context.h +++ b/src/Common/tests/gtest_global_context.h @@ -28,5 +28,3 @@ struct ContextHolder const ContextHolder & getContext(); ContextHolder & getMutableContext(); - -void destroyContext(); diff --git a/src/Compression/examples/CMakeLists.txt b/src/Compression/examples/CMakeLists.txt index 7bf68e8845e..a7cc6bebf42 100644 --- a/src/Compression/examples/CMakeLists.txt +++ b/src/Compression/examples/CMakeLists.txt @@ -1,5 +1,2 @@ clickhouse_add_executable (compressed_buffer compressed_buffer.cpp) -target_link_libraries (compressed_buffer PRIVATE dbms) - -clickhouse_add_executable (cached_compressed_read_buffer cached_compressed_read_buffer.cpp) -target_link_libraries (cached_compressed_read_buffer PRIVATE dbms) +target_link_libraries (compressed_buffer PRIVATE clickhouse_common_io clickhouse_compression) diff --git a/src/Compression/examples/cached_compressed_read_buffer.cpp b/src/Compression/examples/cached_compressed_read_buffer.cpp deleted file mode 100644 index a8e14ac7271..00000000000 --- a/src/Compression/examples/cached_compressed_read_buffer.cpp +++ /dev/null @@ -1,79 +0,0 @@ -#include -#include -#include - -#include -#include -#include -#include -#include - -#include - - -int main(int argc, char ** argv) -{ - using namespace DB; - - if (argc < 2) - { - std::cerr << "Usage: program path\n"; - return 1; - } - - try - { - UncompressedCache cache("SLRU", 1024, 0.5); - std::string path = argv[1]; - - std::cerr << std::fixed << std::setprecision(3); - - size_t hits = 0; - size_t misses = 0; - - { - Stopwatch watch; - CachedCompressedReadBuffer in( - path, - [&]() - { - return createReadBufferFromFileBase(path, {}); - }, - &cache - ); - WriteBufferFromFile out("/dev/null"); - copyData(in, out); - - std::cerr << "Elapsed: " << watch.elapsedSeconds() << std::endl; - } - - cache.getStats(hits, misses); - std::cerr << "Hits: " << hits << ", misses: " << misses << std::endl; - - { - Stopwatch watch; - CachedCompressedReadBuffer in( - path, - [&]() - { - return createReadBufferFromFileBase(path, {}); - }, - &cache - ); - WriteBufferFromFile out("/dev/null"); - copyData(in, out); - - std::cerr << "Elapsed: " << watch.elapsedSeconds() << std::endl; - } - - cache.getStats(hits, misses); - std::cerr << "Hits: " << hits << ", misses: " << misses << std::endl; - } - catch (const Exception & e) - { - std::cerr << e.what() << ", " << e.displayText() << std::endl; - return 1; - } - - return 0; -} diff --git a/src/Compression/examples/compressed_buffer.cpp b/src/Compression/examples/compressed_buffer.cpp index 74646ff0f28..530f0938662 100644 --- a/src/Compression/examples/compressed_buffer.cpp +++ b/src/Compression/examples/compressed_buffer.cpp @@ -1,7 +1,4 @@ -#include - #include -#include #include #include diff --git a/src/Compression/tests/gtest_compressionCodec.cpp b/src/Compression/tests/gtest_compressionCodec.cpp index 24f16a55c25..16573e035e0 100644 --- a/src/Compression/tests/gtest_compressionCodec.cpp +++ b/src/Compression/tests/gtest_compressionCodec.cpp @@ -442,7 +442,7 @@ CompressionCodecPtr makeCodec(const std::string & codec_string, const DataTypePt { const std::string codec_statement = "(" + codec_string + ")"; Tokens tokens(codec_statement.begin().base(), codec_statement.end().base()); - IParser::Pos token_iterator(tokens, 0); + IParser::Pos token_iterator(tokens, 0, 0); Expected expected; ASTPtr codec_ast; diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 80345db2524..796506a07db 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -121,7 +121,8 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo auth_settings.use_insecure_imds_request.value_or(false), auth_settings.expiration_window_seconds.value_or(S3::DEFAULT_EXPIRATION_WINDOW_SECONDS), auth_settings.no_sign_request.value_or(false), - }); + }, + credentials.GetSessionToken()); auto new_client = std::make_shared(std::move(new_uri), std::move(auth_settings), std::move(client)); diff --git a/src/Core/BaseSettings.cpp b/src/Core/BaseSettings.cpp index a7e1ab99af7..c535b9ce65e 100644 --- a/src/Core/BaseSettings.cpp +++ b/src/Core/BaseSettings.cpp @@ -41,13 +41,13 @@ BaseSettingsHelpers::Flags BaseSettingsHelpers::readFlags(ReadBuffer & in) void BaseSettingsHelpers::throwSettingNotFound(std::string_view name) { - throw Exception(ErrorCodes::UNKNOWN_SETTING, "Unknown setting {}", String{name}); + throw Exception(ErrorCodes::UNKNOWN_SETTING, "Unknown setting '{}'", String{name}); } void BaseSettingsHelpers::warningSettingNotFound(std::string_view name) { - LOG_WARNING(getLogger("Settings"), "Unknown setting {}, skipping", name); + LOG_WARNING(getLogger("Settings"), "Unknown setting '{}', skipping", name); } } diff --git a/src/Core/Defines.h b/src/Core/Defines.h index cc6f49aa361..a8dd26519c2 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -63,6 +63,8 @@ static constexpr auto DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC = 120; /// Default limit on recursion depth of recursive descend parser. static constexpr auto DBMS_DEFAULT_MAX_PARSER_DEPTH = 1000; +/// Default limit on the amount of backtracking of recursive descend parser. +static constexpr auto DBMS_DEFAULT_MAX_PARSER_BACKTRACKS = 1000000; /// Default limit on query size. static constexpr auto DBMS_DEFAULT_MAX_QUERY_SIZE = 262144; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 48f6b4d621c..f6b965e9dfc 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -175,6 +175,7 @@ class IColumn; M(Bool, enable_positional_arguments, true, "Enable positional arguments in ORDER BY, GROUP BY and LIMIT BY", 0) \ M(Bool, enable_extended_results_for_datetime_functions, false, "Enable date functions like toLastDayOfMonth return Date32 results (instead of Date results) for Date32/DateTime64 arguments.", 0) \ M(Bool, allow_nonconst_timezone_arguments, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()", 0) \ + M(Bool, function_locate_has_mysql_compatible_argument_order, true, "Function locate() has arguments (needle, haystack[, start_pos]) like in MySQL instead of (haystack, needle[, start_pos]) like function position()", 0) \ \ M(Bool, group_by_use_nulls, false, "Treat columns mentioned in ROLLUP, CUBE or GROUPING SETS as Nullable", 0) \ \ @@ -607,6 +608,7 @@ class IColumn; M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \ M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \ + M(UInt64, max_parser_backtracks, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS, "Maximum parser backtracking (how many times it tries different alternatives in the recursive descend parsing process).", 0) \ M(Bool, allow_settings_after_format_in_insert, false, "Allow SETTINGS after FORMAT, but note, that this is not always safe (note: this is a compatibility setting).", 0) \ M(Seconds, periodic_live_view_refresh, 60, "Interval after which periodically refreshed live view is forced to refresh.", 0) \ M(Bool, transform_null_in, false, "If enabled, NULL values will be matched with 'IN' operator as if they are considered equal.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 4914f97a6fb..9ab92280610 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -94,7 +94,9 @@ static std::map sett {"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"}, {"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication is dependent materialized view cannot work together with async inserts."}, {"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"}, + {"function_locate_has_mysql_compatible_argument_order", false, true, "Increase compatibility with MySQL's locate function."}, {"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"}, + {"max_parser_backtracks", 0, 1000000, "Limiting the complexity of parsing"}, }}, {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, diff --git a/src/Core/examples/CMakeLists.txt b/src/Core/examples/CMakeLists.txt index 2326eada96d..f30ee25491f 100644 --- a/src/Core/examples/CMakeLists.txt +++ b/src/Core/examples/CMakeLists.txt @@ -6,6 +6,3 @@ target_link_libraries (field PRIVATE dbms) clickhouse_add_executable (string_ref_hash string_ref_hash.cpp) target_link_libraries (string_ref_hash PRIVATE clickhouse_common_io) - -clickhouse_add_executable (mysql_protocol mysql_protocol.cpp) -target_link_libraries (mysql_protocol PRIVATE dbms) diff --git a/src/Core/examples/mysql_protocol.cpp b/src/Core/examples/mysql_protocol.cpp deleted file mode 100644 index a6247418e87..00000000000 --- a/src/Core/examples/mysql_protocol.cpp +++ /dev/null @@ -1,390 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -int main(int argc, char ** argv) -{ - using namespace DB; - using namespace MySQLProtocol; - using namespace MySQLProtocol::Generic; - using namespace MySQLProtocol::Authentication; - using namespace MySQLProtocol::ConnectionPhase; - using namespace MySQLProtocol::ProtocolText; - - - uint8_t server_sequence_id = 1; - uint8_t client_sequence_id = 1; - String user = "default"; - String password = "123"; - String database; - - UInt8 charset_utf8 = 33; - UInt32 max_packet_size = MAX_PACKET_LENGTH; - String mysql_native_password = "mysql_native_password"; - - UInt32 server_capability_flags = CLIENT_PROTOCOL_41 | CLIENT_SECURE_CONNECTION | CLIENT_PLUGIN_AUTH - | CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA | CLIENT_CONNECT_WITH_DB | CLIENT_DEPRECATE_EOF; - - UInt32 client_capability_flags = CLIENT_PROTOCOL_41 | CLIENT_PLUGIN_AUTH | CLIENT_SECURE_CONNECTION; - - /// Handshake packet - { - /// 1. Greeting: - /// 1.1 Server writes greeting to client - std::string s0; - WriteBufferFromString out0(s0); - - Handshake server_handshake( - server_capability_flags, -1, "ClickHouse", "mysql_native_password", "aaaaaaaaaaaaaaaaaaaaa", CharacterSet::utf8_general_ci); - server_handshake.writePayload(out0, server_sequence_id); - - /// 1.2 Client reads the greeting - ReadBufferFromString in0(s0); - Handshake client_handshake; - client_handshake.readPayload(in0, client_sequence_id); - - /// Check packet - ASSERT(server_handshake.capability_flags == client_handshake.capability_flags) - ASSERT(server_handshake.status_flags == client_handshake.status_flags) - ASSERT(server_handshake.server_version == client_handshake.server_version) - ASSERT(server_handshake.protocol_version == client_handshake.protocol_version) - ASSERT(server_handshake.auth_plugin_data.substr(0, 20) == client_handshake.auth_plugin_data) - ASSERT(server_handshake.auth_plugin_name == client_handshake.auth_plugin_name) - - /// 2. Greeting Response: - std::string s1; - WriteBufferFromString out1(s1); - - /// 2.1 Client writes to server - Native41 native41(password, client_handshake.auth_plugin_data); - String auth_plugin_data = native41.getAuthPluginData(); - HandshakeResponse client_handshake_response( - client_capability_flags, max_packet_size, charset_utf8, user, database, auth_plugin_data, mysql_native_password); - client_handshake_response.writePayload(out1, client_sequence_id); - - /// 2.2 Server reads the response - ReadBufferFromString in1(s1); - HandshakeResponse server_handshake_response; - server_handshake_response.readPayload(in1, server_sequence_id); - - /// Check - ASSERT(server_handshake_response.capability_flags == client_handshake_response.capability_flags) - ASSERT(server_handshake_response.character_set == client_handshake_response.character_set) - ASSERT(server_handshake_response.username == client_handshake_response.username) - ASSERT(server_handshake_response.database == client_handshake_response.database) - ASSERT(server_handshake_response.auth_response == client_handshake_response.auth_response) - ASSERT(server_handshake_response.auth_plugin_name == client_handshake_response.auth_plugin_name) - } - - /// OK Packet - { - // 1. Server writes packet - std::string s0; - WriteBufferFromString out0(s0); - OKPacket server(0x00, server_capability_flags, 0, 0, 0, "", ""); - server.writePayload(out0, server_sequence_id); - - // 2. Client reads packet - ReadBufferFromString in0(s0); - ResponsePacket client(server_capability_flags); - client.readPayload(in0, client_sequence_id); - - // Check - ASSERT(client.getType() == PACKET_OK) - ASSERT(client.ok.header == server.header) - ASSERT(client.ok.status_flags == server.status_flags) - ASSERT(client.ok.capabilities == server.capabilities) - } - - /// ERR Packet - { - // 1. Server writes packet - std::string s0; - WriteBufferFromString out0(s0); - ERRPacket server(123, "12345", "This is the error message"); - server.writePayload(out0, server_sequence_id); - - // 2. Client reads packet - ReadBufferFromString in0(s0); - ResponsePacket client(server_capability_flags); - client.readPayload(in0, client_sequence_id); - - // Check - ASSERT(client.getType() == PACKET_ERR) - ASSERT(client.err.header == server.header) - ASSERT(client.err.error_code == server.error_code) - ASSERT(client.err.sql_state == server.sql_state) - ASSERT(client.err.error_message == server.error_message) - } - - /// EOF Packet - { - // 1. Server writes packet - std::string s0; - WriteBufferFromString out0(s0); - EOFPacket server(1, 1); - server.writePayload(out0, server_sequence_id); - - // 2. Client reads packet - ReadBufferFromString in0(s0); - ResponsePacket client(server_capability_flags); - client.readPayload(in0, client_sequence_id); - - // Check - ASSERT(client.getType() == PACKET_EOF) - ASSERT(client.eof.header == server.header) - ASSERT(client.eof.warnings == server.warnings) - ASSERT(client.eof.status_flags == server.status_flags) - } - - /// ColumnDefinition Packet - { - // 1. Server writes packet - std::string s0; - WriteBufferFromString out0(s0); - ColumnDefinition server("schema", "tbl", "org_tbl", "name", "org_name", 33, 0x00, MYSQL_TYPE_STRING, 0x00, 0x00); - server.writePayload(out0, server_sequence_id); - - // 2. Client reads packet - ReadBufferFromString in0(s0); - ColumnDefinition client; - client.readPayload(in0, client_sequence_id); - - // Check - ASSERT(client.column_type == server.column_type) - ASSERT(client.column_length == server.column_length) - ASSERT(client.next_length == server.next_length) - ASSERT(client.character_set == server.character_set) - ASSERT(client.decimals == server.decimals) - ASSERT(client.name == server.name) - ASSERT(client.org_name == server.org_name) - ASSERT(client.table == server.table) - ASSERT(client.org_table == server.org_table) - ASSERT(client.schema == server.schema) - } - - /// GTID sets tests. - { - struct Testcase - { - String name; - String sets; - String want; - }; - - Testcase cases[] = { - {"gtid-sets-without-whitespace", - "2c5adab4-d64a-11e5-82df-ac162d72dac0:1-247743812,9f58c169-d121-11e7-835b-ac162db9c048:1-56060985:56060987-56061175:56061177-" - "56061224:56061226-75201528:75201530-75201755:75201757-75201983:75201985-75407550:75407552-75407604:75407606-75407661:" - "75407663-87889848:87889850-87889935:87889937-87890042:87890044-88391955:88391957-88392125:88392127-88392245:88392247-" - "88755771:88755773-88755826:88755828-88755921:88755923-100279047:100279049-100279126:100279128-100279247:100279249-121672430:" - "121672432-121672503:121672505-121672524:121672526-122946019:122946021-122946291:122946293-122946469:122946471-134313284:" - "134313286-134313415:134313417-134313648:134313650-136492728:136492730-136492784:136492786-136492904:136492906-145582402:" - "145582404-145582439:145582441-145582463:145582465-147455222:147455224-147455262:147455264-147455277:147455279-149319049:" - "149319051-149319261:149319263-150635915,a6d83ff6-bfcf-11e7-8c93-246e96158550:1-126618302", - "2c5adab4-d64a-11e5-82df-ac162d72dac0:1-247743812,9f58c169-d121-11e7-835b-ac162db9c048:1-56060985:56060987-56061175:56061177-" - "56061224:56061226-75201528:75201530-75201755:75201757-75201983:75201985-75407550:75407552-75407604:75407606-75407661:" - "75407663-87889848:87889850-87889935:87889937-87890042:87890044-88391955:88391957-88392125:88392127-88392245:88392247-" - "88755771:88755773-88755826:88755828-88755921:88755923-100279047:100279049-100279126:100279128-100279247:100279249-121672430:" - "121672432-121672503:121672505-121672524:121672526-122946019:122946021-122946291:122946293-122946469:122946471-134313284:" - "134313286-134313415:134313417-134313648:134313650-136492728:136492730-136492784:136492786-136492904:136492906-145582402:" - "145582404-145582439:145582441-145582463:145582465-147455222:147455224-147455262:147455264-147455277:147455279-149319049:" - "149319051-149319261:149319263-150635915,a6d83ff6-bfcf-11e7-8c93-246e96158550:1-126618302"}, - - {"gtid-sets-with-whitespace", - "2c5adab4-d64a-11e5-82df-ac162d72dac0:1-247743812, 9f58c169-d121-11e7-835b-ac162db9c048:1-56060985:56060987-56061175:56061177", - "2c5adab4-d64a-11e5-82df-ac162d72dac0:1-247743812,9f58c169-d121-11e7-835b-ac162db9c048:1-56060985:56060987-56061175:56061177"}, - - {"gtid-sets-single", "2c5adab4-d64a-11e5-82df-ac162d72dac0:1-247743812", "2c5adab4-d64a-11e5-82df-ac162d72dac0:1-247743812"}}; - - for (auto & tc : cases) - { - GTIDSets gtid_sets; - gtid_sets.parse(tc.sets); - - String want = tc.want; - String got = gtid_sets.toString(); - ASSERT(want == got) - } - } - - { - struct Testcase - { - String name; - String gtid_sets; - String gtid_str; - String want; - }; - - Testcase cases[] = { - {"merge", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-2:4-7", - "10662d71-9d91-11ea-bbc2-0242ac110003:3", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-7"}, - - {"merge-front", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-2:5-7", - "10662d71-9d91-11ea-bbc2-0242ac110003:3", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-3:5-7"}, - - {"extend-interval", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-2:6-7", - "10662d71-9d91-11ea-bbc2-0242ac110003:4", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-2:4:6-7"}, - - {"extend-interval", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-2:4:7-9", - "10662d71-9d91-11ea-bbc2-0242ac110003:5", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-2:4-5:7-9"}, - - {"extend-interval", - "10662d71-9d91-11ea-bbc2-0242ac110003:6-7", - "10662d71-9d91-11ea-bbc2-0242ac110003:4", - "10662d71-9d91-11ea-bbc2-0242ac110003:4:6-7"}, - - {"extend-interval", - "10662d71-9d91-11ea-bbc2-0242ac110003:6-7", - "10662d71-9d91-11ea-bbc2-0242ac110003:9", - "10662d71-9d91-11ea-bbc2-0242ac110003:6-7:9"}, - - {"extend-interval", - "10662d71-9d91-11ea-bbc2-0242ac110003:6-7", - "20662d71-9d91-11ea-bbc2-0242ac110003:9", - "10662d71-9d91-11ea-bbc2-0242ac110003:6-7,20662d71-9d91-11ea-bbc2-0242ac110003:9"}, - - {"shrink-sequence", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-3:4-5:7", - "10662d71-9d91-11ea-bbc2-0242ac110003:6", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-7"}, - - {"shrink-sequence", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-3:4-5:10", - "10662d71-9d91-11ea-bbc2-0242ac110003:8", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-5:8:10" - } - }; - - for (auto & tc : cases) - { - GTIDSets gtid_sets; - gtid_sets.parse(tc.gtid_sets); - ASSERT(tc.gtid_sets == gtid_sets.toString()) - - GTIDSets gtid_sets1; - gtid_sets1.parse(tc.gtid_str); - - GTID gtid; - gtid.uuid = gtid_sets1.sets[0].uuid; - gtid.seq_no = gtid_sets1.sets[0].intervals[0].start; - gtid_sets.update(gtid); - - String want = tc.want; - String got = gtid_sets.toString(); - ASSERT(want == got) - } - } - - { - /// mysql_protocol --host=172.17.0.3 --user=root --password=123 --db=sbtest - try - { - boost::program_options::options_description desc("Allowed options"); - desc.add_options()("host", boost::program_options::value()->required(), "master host")( - "port", boost::program_options::value()->default_value(3306), "master port")( - "user", boost::program_options::value()->default_value("root"), "master user")( - "password", boost::program_options::value()->required(), "master password")( - "gtid", boost::program_options::value()->default_value(""), "executed GTID sets")( - "db", boost::program_options::value()->required(), "replicate do db")( - "binlog_checksum", boost::program_options::value()->default_value("CRC32"), "master binlog_checksum"); - - boost::program_options::variables_map options; - boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); - if (argc == 0) - { - return 1; - } - - auto host = options.at("host").as(); - auto port = options.at("port").as(); - auto master_user = options.at("user").as(); - auto master_password = options.at("password").as(); - auto gtid_sets = options.at("gtid").as(); - auto replicate_db = options.at("db").as(); - auto binlog_checksum = options.at("binlog_checksum").as(); - - std::cerr << "Master Host: " << host << ", Port: " << port << ", User: " << master_user << ", Password: " << master_password - << ", Replicate DB: " << replicate_db << ", GTID: " << gtid_sets << std::endl; - - UInt32 slave_id = 9004; - MySQLClient slave(host, port, master_user, master_password); - - /// Connect to the master. - slave.connect(); - slave.startBinlogDumpGTID(slave_id, replicate_db, {}, gtid_sets, binlog_checksum); - - WriteBufferFromOStream cerr(std::cerr); - - /// Read one binlog event on by one. - while (true) - { - auto event = slave.readOneBinlogEvent(); - switch (event->type()) - { - case MYSQL_QUERY_EVENT: { - auto binlog_event = std::static_pointer_cast(event); - binlog_event->dump(cerr); - - Position pos = slave.getPosition(); - pos.dump(cerr); - break; - } - case MYSQL_WRITE_ROWS_EVENT: { - auto binlog_event = std::static_pointer_cast(event); - binlog_event->dump(cerr); - - Position pos = slave.getPosition(); - pos.dump(cerr); - break; - } - case MYSQL_UPDATE_ROWS_EVENT: { - auto binlog_event = std::static_pointer_cast(event); - binlog_event->dump(cerr); - - Position pos = slave.getPosition(); - pos.dump(cerr); - break; - } - case MYSQL_DELETE_ROWS_EVENT: { - auto binlog_event = std::static_pointer_cast(event); - binlog_event->dump(cerr); - - Position pos = slave.getPosition(); - pos.dump(cerr); - break; - } - default: - if (event->header.type != MySQLReplication::EventType::HEARTBEAT_EVENT) - { - event->dump(cerr); - } - break; - } - } - } - catch (const Exception & ex) - { - std::cerr << "Error: " << ex.message() << std::endl; - return 1; - } - } -} diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index d154b386ace..844384f3c95 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -56,13 +56,14 @@ DataTypePtr DataTypeFactory::getImpl(const String & full_name) const { String out_err; const char * start = full_name.data(); - ast = tryParseQuery(parser, start, start + full_name.size(), out_err, false, "data type", false, DBMS_DEFAULT_MAX_QUERY_SIZE, data_type_max_parse_depth); + ast = tryParseQuery(parser, start, start + full_name.size(), out_err, false, "data type", false, + DBMS_DEFAULT_MAX_QUERY_SIZE, data_type_max_parse_depth, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS, true); if (!ast) return nullptr; } else { - ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", false, data_type_max_parse_depth); + ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", false, data_type_max_parse_depth, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); } return getImpl(ast); diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 55f584ef1e0..4403e3d9bd4 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -533,6 +533,8 @@ class DataTypeDateTime; class DataTypeDateTime64; template constexpr bool IsDataTypeDecimal> = true; + +/// TODO: this is garbage, remove it. template <> inline constexpr bool IsDataTypeDecimal = true; template constexpr bool IsDataTypeNumber> = true; diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp index 1bdc29daebd..67bf7af7799 100644 --- a/src/DataTypes/Serializations/SerializationObject.cpp +++ b/src/DataTypes/Serializations/SerializationObject.cpp @@ -5,13 +5,11 @@ #include #include #include -#include #include #include -#include #include +#include #include -#include #include #include @@ -29,6 +27,7 @@ namespace ErrorCodes extern const int INCORRECT_DATA; extern const int CANNOT_READ_ALL_DATA; extern const int ARGUMENT_OUT_OF_BOUND; + extern const int CANNOT_PARSE_TEXT; extern const int EXPERIMENTAL_FEATURE_ERROR; } @@ -344,7 +343,20 @@ void SerializationObject::deserializeBinaryBulkFromString( state.nested_serialization->deserializeBinaryBulkWithMultipleStreams( column_string, limit, settings, state.nested_state, cache); - ConvertImplGenericFromString::executeImpl(*column_string, column_object, *this, column_string->size()); + size_t input_rows_count = column_string->size(); + column_object.reserve(input_rows_count); + + FormatSettings format_settings; + for (size_t i = 0; i < input_rows_count; ++i) + { + const auto & val = column_string->getDataAt(i); + ReadBufferFromMemory read_buffer(val.data, val.size); + deserializeWholeText(column_object, read_buffer, format_settings); + + if (!read_buffer.eof()) + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, + "Cannot parse string to column Object. Expected eof"); + } } template diff --git a/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp b/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp index e40734e0a57..0ae325871fb 100644 --- a/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp +++ b/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp @@ -24,6 +24,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) auto initialize = [&]() mutable { + if (context) + return true; + shared_context = Context::createShared(); context = Context::createGlobal(shared_context.get()); context->makeGlobalContext(); diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index cb85119e3b0..75a01a6190f 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -444,8 +444,9 @@ namespace ParserSelectWithUnionQuery parser; String description = fmt::format("Query for ClickHouse dictionary {}", data.table_name); String fixed_query = removeWhereConditionPlaceholder(query); + const Settings & settings = data.context->getSettingsRef(); ASTPtr select = parseQuery(parser, fixed_query, description, - data.context->getSettingsRef().max_query_size, data.context->getSettingsRef().max_parser_depth); + settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); DDLDependencyVisitor::Visitor visitor{data}; visitor.visit(select); diff --git a/src/Databases/DatabaseDictionary.cpp b/src/Databases/DatabaseDictionary.cpp index 9a65c7a46ef..76fdb4fa961 100644 --- a/src/Databases/DatabaseDictionary.cpp +++ b/src/Databases/DatabaseDictionary.cpp @@ -115,7 +115,7 @@ ASTPtr DatabaseDictionary::getCreateTableQueryImpl(const String & table_name, Co const char * pos = query.data(); std::string error_message; auto ast = tryParseQuery(parser, pos, pos + query.size(), error_message, - /* hilite = */ false, "", /* allow_multi_statements = */ false, 0, settings.max_parser_depth); + /* hilite = */ false, "", /* allow_multi_statements = */ false, 0, settings.max_parser_depth, settings.max_parser_backtracks, true); if (!ast && throw_on_error) throw Exception::createDeprecated(error_message, ErrorCodes::SYNTAX_ERROR); @@ -134,7 +134,7 @@ ASTPtr DatabaseDictionary::getCreateDatabaseQuery() const } auto settings = getContext()->getSettingsRef(); ParserCreateQuery parser; - return parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth); + return parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth, settings.max_parser_backtracks); } void DatabaseDictionary::shutdown() diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index 5af1e1ae0d2..05af0acf978 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -187,7 +187,7 @@ ASTPtr DatabaseFilesystem::getCreateDatabaseQuery() const const String query = fmt::format("CREATE DATABASE {} ENGINE = Filesystem('{}')", backQuoteIfNeed(getDatabaseName()), path); ParserCreateQuery parser; - ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth); + ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth, settings.max_parser_backtracks); if (const auto database_comment = getDatabaseComment(); !database_comment.empty()) { diff --git a/src/Databases/DatabaseHDFS.cpp b/src/Databases/DatabaseHDFS.cpp index 3a1e6b16ccf..2688ff2443c 100644 --- a/src/Databases/DatabaseHDFS.cpp +++ b/src/Databases/DatabaseHDFS.cpp @@ -183,7 +183,7 @@ ASTPtr DatabaseHDFS::getCreateDatabaseQuery() const ParserCreateQuery parser; const String query = fmt::format("CREATE DATABASE {} ENGINE = HDFS('{}')", backQuoteIfNeed(getDatabaseName()), source); - ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth); + ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth, settings.max_parser_backtracks); if (const auto database_comment = getDatabaseComment(); !database_comment.empty()) { diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index fcb073644c5..dcfc1916450 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -526,7 +526,7 @@ ASTPtr DatabaseOnDisk::getCreateDatabaseQuery() const /// If database.sql doesn't exist, then engine is Ordinary String query = "CREATE DATABASE " + backQuoteIfNeed(getDatabaseName()) + " ENGINE = Ordinary"; ParserCreateQuery parser; - ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth); + ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth, settings.max_parser_backtracks); } if (const auto database_comment = getDatabaseComment(); !database_comment.empty()) @@ -707,7 +707,7 @@ ASTPtr DatabaseOnDisk::parseQueryFromMetadata( const char * pos = query.data(); std::string error_message; auto ast = tryParseQuery(parser, pos, pos + query.size(), error_message, /* hilite = */ false, - "in file " + metadata_file_path, /* allow_multi_statements = */ false, 0, settings.max_parser_depth); + "in file " + metadata_file_path, /* allow_multi_statements = */ false, 0, settings.max_parser_depth, settings.max_parser_backtracks, true); if (!ast && throw_on_error) throw Exception::createDeprecated(error_message, ErrorCodes::SYNTAX_ERROR); @@ -765,12 +765,14 @@ ASTPtr DatabaseOnDisk::getCreateQueryFromStorage(const String & table_name, cons auto ast_storage = std::make_shared(); ast_storage->set(ast_storage->engine, ast_engine); - unsigned max_parser_depth = static_cast(getContext()->getSettingsRef().max_parser_depth); - auto create_table_query = DB::getCreateQueryFromStorage(storage, - ast_storage, - false, - max_parser_depth, - throw_on_error); + const Settings & settings = getContext()->getSettingsRef(); + auto create_table_query = DB::getCreateQueryFromStorage( + storage, + ast_storage, + false, + static_cast(settings.max_parser_depth), + static_cast(settings.max_parser_backtracks), + throw_on_error); create_table_query->set(create_table_query->as()->comment, std::make_shared("SYSTEM TABLE is built on the fly.")); diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index bc552b9c927..95bdcfc7dce 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -440,10 +440,22 @@ void DatabaseOrdinary::stopLoading() DatabaseTablesIteratorPtr DatabaseOrdinary::getTablesIterator(ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const { - auto result = DatabaseWithOwnTablesBase::getTablesIterator(local_context, filter_by_table_name); - std::scoped_lock lock(mutex); - typeid_cast(*result).setLoadTasks(startup_table); - return result; + // Wait for every table (matching the filter) to be loaded and started up before we make the snapshot. + // It is important, because otherwise table might be: + // - not attached and thus will be missed in the snapshot; + // - not started, which is not good for DDL operations. + LoadTaskPtrs tasks_to_wait; + { + std::lock_guard lock(mutex); + if (!filter_by_table_name) + tasks_to_wait.reserve(startup_table.size()); + for (const auto & [table_name, task] : startup_table) + if (!filter_by_table_name || filter_by_table_name(table_name)) + tasks_to_wait.emplace_back(task); + } + waitLoad(currentPoolOr(TablesLoaderForegroundPoolId), tasks_to_wait); + + return DatabaseWithOwnTablesBase::getTablesIterator(local_context, filter_by_table_name); } void DatabaseOrdinary::alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata) @@ -469,7 +481,7 @@ void DatabaseOrdinary::alterTable(ContextPtr local_context, const StorageID & ta statement.data() + statement.size(), "in file " + table_metadata_path, 0, - local_context->getSettingsRef().max_parser_depth); + local_context->getSettingsRef().max_parser_depth, local_context->getSettingsRef().max_parser_backtracks); applyMetadataChangesToCreateQuery(ast, metadata); diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 9cf19a251f7..3b6a712510d 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -812,7 +812,8 @@ static UUID getTableUUIDIfReplicated(const String & metadata, ContextPtr context ParserCreateQuery parser; auto size = context->getSettingsRef().max_query_size; auto depth = context->getSettingsRef().max_parser_depth; - ASTPtr query = parseQuery(parser, metadata, size, depth); + auto backtracks = context->getSettingsRef().max_parser_backtracks; + ASTPtr query = parseQuery(parser, metadata, size, depth, backtracks); const ASTCreateQuery & create = query->as(); if (!create.storage || !create.storage->engine) return UUIDHelpers::Nil; @@ -1234,7 +1235,7 @@ ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node { ParserCreateQuery parser; String description = "in ZooKeeper " + zookeeper_path + "/metadata/" + node_name; - auto ast = parseQuery(parser, query, description, 0, getContext()->getSettingsRef().max_parser_depth); + auto ast = parseQuery(parser, query, description, 0, getContext()->getSettingsRef().max_parser_depth, getContext()->getSettingsRef().max_parser_backtracks); auto & create = ast->as(); if (create.uuid == UUIDHelpers::Nil || create.getTable() != TABLE_WITH_UUID_NAME_PLACEHOLDER || create.database) @@ -1559,7 +1560,7 @@ DatabaseReplicated::getTablesForBackup(const FilterByNameFunction & filter, cons for (const auto & [table_name, metadata] : snapshot) { ParserCreateQuery parser; - auto create_table_query = parseQuery(parser, metadata, 0, getContext()->getSettingsRef().max_parser_depth); + auto create_table_query = parseQuery(parser, metadata, 0, getContext()->getSettingsRef().max_parser_depth, getContext()->getSettingsRef().max_parser_backtracks); auto & create = create_table_query->as(); create.attach = false; diff --git a/src/Databases/DatabaseS3.cpp b/src/Databases/DatabaseS3.cpp index d2ca5a05ea4..159a5242dbe 100644 --- a/src/Databases/DatabaseS3.cpp +++ b/src/Databases/DatabaseS3.cpp @@ -191,7 +191,7 @@ ASTPtr DatabaseS3::getCreateDatabaseQuery() const creation_args += fmt::format(", '{}', '{}'", config.access_key_id.value(), config.secret_access_key.value()); const String query = fmt::format("CREATE DATABASE {} ENGINE = S3({})", backQuoteIfNeed(getDatabaseName()), creation_args); - ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth); + ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth, settings.max_parser_backtracks); if (const auto database_comment = getDatabaseComment(); !database_comment.empty()) { diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index 963cf0064df..f8d6ad69ba8 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -108,7 +108,8 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo } -ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr & ast_storage, bool only_ordinary, uint32_t max_parser_depth, bool throw_on_error) +ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr & ast_storage, bool only_ordinary, + uint32_t max_parser_depth, uint32_t max_parser_backtracks, bool throw_on_error) { auto table_id = storage->getStorageID(); auto metadata_ptr = storage->getInMemoryMetadataPtr(); @@ -148,7 +149,7 @@ ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr & ast_ Expected expected; expected.max_parsed_pos = string_end; Tokens tokens(type_name.c_str(), string_end); - IParser::Pos pos(tokens, max_parser_depth); + IParser::Pos pos(tokens, max_parser_depth, max_parser_backtracks); ParserDataType parser; if (!parser.parse(pos, ast_type, expected)) { diff --git a/src/Databases/DatabasesCommon.h b/src/Databases/DatabasesCommon.h index 4e9d967c11a..81a3c55a435 100644 --- a/src/Databases/DatabasesCommon.h +++ b/src/Databases/DatabasesCommon.h @@ -13,7 +13,8 @@ namespace DB { void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemoryMetadata & metadata); -ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr & ast_storage, bool only_ordinary, uint32_t max_parser_depth, bool throw_on_error); +ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr & ast_storage, bool only_ordinary, + uint32_t max_parser_depth, uint32_t max_parser_backtracks, bool throw_on_error); /// Cleans a CREATE QUERY from temporary flags like "IF NOT EXISTS", "OR REPLACE", "AS SELECT" (for non-views), etc. void cleanupObjectDefinitionFromTemporaryFlags(ASTCreateQuery & query); diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index ec380fa759d..75662bfebe3 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -77,17 +77,12 @@ private: Tables tables; Tables::iterator it; - // Tasks to wait before returning a table - using Tasks = std::unordered_map; - Tasks tasks; - protected: DatabaseTablesSnapshotIterator(DatabaseTablesSnapshotIterator && other) noexcept : IDatabaseTablesIterator(std::move(other.database_name)) { size_t idx = std::distance(other.tables.begin(), other.it); std::swap(tables, other.tables); - std::swap(tasks, other.tasks); other.it = other.tables.end(); it = tables.begin(); std::advance(it, idx); @@ -110,17 +105,7 @@ public: const String & name() const override { return it->first; } - const StoragePtr & table() const override - { - if (auto task = tasks.find(it->first); task != tasks.end()) - waitLoad(currentPoolOr(TablesLoaderForegroundPoolId), task->second); - return it->second; - } - - void setLoadTasks(const Tasks & tasks_) - { - tasks = tasks_; - } + const StoragePtr & table() const override { return it->second; } }; using DatabaseTablesIteratorPtr = std::unique_ptr; diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 96a5c3a18ce..d9b0f7f9ac7 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -174,12 +174,14 @@ ASTPtr DatabaseMySQL::getCreateTableQueryImpl(const String & table_name, Context ast_storage->settings = nullptr; } - unsigned max_parser_depth = static_cast(getContext()->getSettingsRef().max_parser_depth); - auto create_table_query = DB::getCreateQueryFromStorage(storage, - table_storage_define, - true, - max_parser_depth, - throw_on_error); + const Settings & settings = getContext()->getSettingsRef(); + auto create_table_query = DB::getCreateQueryFromStorage( + storage, + table_storage_define, + true, + static_cast(settings.max_parser_depth), + static_cast(settings.max_parser_backtracks), + throw_on_error); return create_table_query; } diff --git a/src/Databases/MySQL/tryConvertStringLiterals.cpp b/src/Databases/MySQL/tryConvertStringLiterals.cpp index ab392b301e8..ac65d510f67 100644 --- a/src/Databases/MySQL/tryConvertStringLiterals.cpp +++ b/src/Databases/MySQL/tryConvertStringLiterals.cpp @@ -61,7 +61,7 @@ static bool tryReadCharset( bool tryConvertStringLiterals(String & query) { Tokens tokens(query.data(), query.data() + query.size()); - IParser::Pos pos(tokens, 0); + IParser::Pos pos(tokens, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); Expected expected; String rewritten_query; rewritten_query.reserve(query.size()); diff --git a/src/Databases/MySQL/tryParseTableIDFromDDL.cpp b/src/Databases/MySQL/tryParseTableIDFromDDL.cpp index a01eb311450..4fe0f44c767 100644 --- a/src/Databases/MySQL/tryParseTableIDFromDDL.cpp +++ b/src/Databases/MySQL/tryParseTableIDFromDDL.cpp @@ -10,7 +10,7 @@ StorageID tryParseTableIDFromDDL(const String & query, const String & default_da { bool is_ddl = false; Tokens tokens(query.data(), query.data() + query.size()); - IParser::Pos pos(tokens, 0); + IParser::Pos pos(tokens, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); Expected expected; if (ParserKeyword("CREATE TEMPORARY TABLE").ignore(pos, expected) || ParserKeyword("CREATE TABLE").ignore(pos, expected)) { diff --git a/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp index c5a366698e6..9ecc81c693f 100644 --- a/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp +++ b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp @@ -37,7 +37,7 @@ static void quoteLiteral( bool tryQuoteUnrecognizedTokens(String & query) { Tokens tokens(query.data(), query.data() + query.size()); - IParser::Pos pos(tokens, 0); + IParser::Pos pos(tokens, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); Expected expected; String rewritten_query; const char * copy_from = query.data(); diff --git a/src/Databases/SQLite/DatabaseSQLite.cpp b/src/Databases/SQLite/DatabaseSQLite.cpp index b3d5288cdf7..b7a82fd9d0f 100644 --- a/src/Databases/SQLite/DatabaseSQLite.cpp +++ b/src/Databases/SQLite/DatabaseSQLite.cpp @@ -194,10 +194,10 @@ ASTPtr DatabaseSQLite::getCreateTableQueryImpl(const String & table_name, Contex /// Add table_name to engine arguments storage_engine_arguments->children.insert(storage_engine_arguments->children.begin() + 1, std::make_shared(table_id.table_name)); - unsigned max_parser_depth = static_cast(getContext()->getSettingsRef().max_parser_depth); + const Settings & settings = getContext()->getSettingsRef(); + auto create_table_query = DB::getCreateQueryFromStorage(storage, table_storage_define, true, - max_parser_depth, - throw_on_error); + static_cast(settings.max_parser_depth), static_cast(settings.max_parser_backtracks), throw_on_error); return create_table_query; } diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index 6e9b09f8919..935658719db 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include diff --git a/src/Dictionaries/DictionaryPipelineExecutor.cpp b/src/Dictionaries/DictionaryPipelineExecutor.cpp new file mode 100644 index 00000000000..30d1ab95f53 --- /dev/null +++ b/src/Dictionaries/DictionaryPipelineExecutor.cpp @@ -0,0 +1,42 @@ +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + +DictionaryPipelineExecutor::DictionaryPipelineExecutor(QueryPipeline & pipeline_, bool async) + : async_executor(async ? std::make_unique(pipeline_) : nullptr) + , executor(async ? nullptr : std::make_unique(pipeline_)) +{ +} + +bool DictionaryPipelineExecutor::pull(Block & block) +{ + if (async_executor) + { + while (true) + { + bool has_data = async_executor->pull(block); + if (has_data && !block) + continue; + return has_data; + } + } + else if (executor) + return executor->pull(block); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "DictionaryPipelineExecutor is not initialized"); +} + +DictionaryPipelineExecutor::~DictionaryPipelineExecutor() = default; + +} diff --git a/src/Dictionaries/DictionaryPipelineExecutor.h b/src/Dictionaries/DictionaryPipelineExecutor.h new file mode 100644 index 00000000000..601213e5039 --- /dev/null +++ b/src/Dictionaries/DictionaryPipelineExecutor.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +namespace DB +{ + +class Block; +class QueryPipeline; +class PullingAsyncPipelineExecutor; +class PullingPipelineExecutor; + +/// Wrapper for `Pulling(Async)PipelineExecutor` to dynamically dispatch calls to the right executor +class DictionaryPipelineExecutor +{ +public: + DictionaryPipelineExecutor(QueryPipeline & pipeline_, bool async); + bool pull(Block & block); + + ~DictionaryPipelineExecutor(); + +private: + std::unique_ptr async_executor; + std::unique_ptr executor; +}; + +} diff --git a/src/Dictionaries/DictionarySourceHelpers.cpp b/src/Dictionaries/DictionarySourceHelpers.cpp index d9a4d9ccbcf..f0e1bc4109a 100644 --- a/src/Dictionaries/DictionarySourceHelpers.cpp +++ b/src/Dictionaries/DictionarySourceHelpers.cpp @@ -9,15 +9,11 @@ #include #include -#include -#include - namespace DB { namespace ErrorCodes { - extern const int LOGICAL_ERROR; extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; } @@ -135,29 +131,4 @@ String TransformWithAdditionalColumns::getName() const return "TransformWithAdditionalColumns"; } -DictionaryPipelineExecutor::DictionaryPipelineExecutor(QueryPipeline & pipeline_, bool async) - : async_executor(async ? std::make_unique(pipeline_) : nullptr) - , executor(async ? nullptr : std::make_unique(pipeline_)) -{} - -bool DictionaryPipelineExecutor::pull(Block & block) -{ - if (async_executor) - { - while (true) - { - bool has_data = async_executor->pull(block); - if (has_data && !block) - continue; - return has_data; - } - } - else if (executor) - return executor->pull(block); - else - throw Exception(ErrorCodes::LOGICAL_ERROR, "DictionaryPipelineExecutor is not initialized"); -} - -DictionaryPipelineExecutor::~DictionaryPipelineExecutor() = default; - } diff --git a/src/Dictionaries/DictionarySourceHelpers.h b/src/Dictionaries/DictionarySourceHelpers.h index a545b5cdac7..39c6e7b3c42 100644 --- a/src/Dictionaries/DictionarySourceHelpers.h +++ b/src/Dictionaries/DictionarySourceHelpers.h @@ -16,10 +16,6 @@ namespace DB struct DictionaryStructure; class SettingsChanges; -class PullingPipelineExecutor; -class PullingAsyncPipelineExecutor; -class QueryPipeline; - /// For simple key Block blockForIds( @@ -55,17 +51,4 @@ private: size_t current_range_index = 0; }; -/// Wrapper for `Pulling(Async)PipelineExecutor` to dynamically dispatch calls to the right executor -class DictionaryPipelineExecutor -{ -public: - DictionaryPipelineExecutor(QueryPipeline & pipeline_, bool async); - bool pull(Block & block); - - ~DictionaryPipelineExecutor(); -private: - std::unique_ptr async_executor; - std::unique_ptr executor; -}; - } diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h index 55060b1592f..56d11be9837 100644 --- a/src/Dictionaries/DictionaryStructure.h +++ b/src/Dictionaries/DictionaryStructure.h @@ -41,6 +41,33 @@ enum class AttributeUnderlyingType : TypeIndexUnderlying #undef map_item + +#define CALL_FOR_ALL_DICTIONARY_ATTRIBUTE_TYPES(M) \ + M(UInt8) \ + M(UInt16) \ + M(UInt32) \ + M(UInt64) \ + M(UInt128) \ + M(UInt256) \ + M(Int8) \ + M(Int16) \ + M(Int32) \ + M(Int64) \ + M(Int128) \ + M(Int256) \ + M(Decimal32) \ + M(Decimal64) \ + M(Decimal128) \ + M(Decimal256) \ + M(DateTime64) \ + M(Float32) \ + M(Float64) \ + M(UUID) \ + M(IPv4) \ + M(IPv6) \ + M(String) \ + M(Array) + /// Min and max lifetimes for a dictionary or its entry using DictionaryLifetime = ExternalLoadableLifetime; diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index efb7d0a176c..fc58ff525bd 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index d09f402143e..2420c07277c 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h index b3b8cc56868..46a0af487f5 100644 --- a/src/Dictionaries/HashedDictionary.h +++ b/src/Dictionaries/HashedDictionary.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp index e1c9572e607..1bc6d16c932 100644 --- a/src/Dictionaries/IPAddressDictionary.cpp +++ b/src/Dictionaries/IPAddressDictionary.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp index 485b48d9d81..1456a0db750 100644 --- a/src/Dictionaries/PolygonDictionary.cpp +++ b/src/Dictionaries/PolygonDictionary.cpp @@ -1,6 +1,5 @@ #include "PolygonDictionary.h" -#include #include #include @@ -15,7 +14,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp new file mode 100644 index 00000000000..30a0123ade6 --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -0,0 +1,225 @@ +#include + +namespace DB +{ + +template +ColumnPtr RangeHashedDictionary::getColumn( + const std::string & attribute_name, + const DataTypePtr & attribute_type, + const Columns & key_columns, + const DataTypes & key_types, + DefaultOrFilter default_or_filter) const +{ + bool is_short_circuit = std::holds_alternative(default_or_filter); + assert(is_short_circuit || std::holds_alternative(default_or_filter)); + + if (dictionary_key_type == DictionaryKeyType::Complex) + { + auto key_types_copy = key_types; + key_types_copy.pop_back(); + dict_struct.validateKeyTypes(key_types_copy); + } + + ColumnPtr result; + + const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, attribute_type); + const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second; + const auto & attribute = attributes[attribute_index]; + + /// Cast range column to storage type + Columns modified_key_columns = key_columns; + const ColumnPtr & range_storage_column = key_columns.back(); + ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types.back(), ""}; + modified_key_columns.back() = castColumnAccurate(column_to_cast, dict_struct.range_min->type); + + size_t keys_size = key_columns.front()->size(); + bool is_attribute_nullable = attribute.is_value_nullable.has_value(); + + ColumnUInt8::MutablePtr col_null_map_to; + ColumnUInt8::Container * vec_null_map_to = nullptr; + if (is_attribute_nullable) + { + col_null_map_to = ColumnUInt8::create(keys_size, false); + vec_null_map_to = &col_null_map_to->getData(); + } + + auto type_call = [&](const auto & dictionary_attribute_type) + { + using Type = std::decay_t; + using AttributeType = typename Type::AttributeType; + using ValueType = DictionaryValueType; + using ColumnProvider = DictionaryAttributeColumnProvider; + + auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size); + + if (is_short_circuit) + { + IColumn::Filter & default_mask = std::get(default_or_filter).get(); + size_t keys_found = 0; + + if constexpr (std::is_same_v) + { + auto * out = column.get(); + + keys_found = getItemsShortCircuitImpl( + attribute, + modified_key_columns, + [&](size_t, const Array & value, bool) + { + out->insert(value); + }, + default_mask); + } + else if constexpr (std::is_same_v) + { + auto * out = column.get(); + + if (is_attribute_nullable) + keys_found = getItemsShortCircuitImpl( + attribute, + modified_key_columns, + [&](size_t row, StringRef value, bool is_null) + { + (*vec_null_map_to)[row] = is_null; + out->insertData(value.data, value.size); + }, + default_mask); + else + keys_found = getItemsShortCircuitImpl( + attribute, + modified_key_columns, + [&](size_t, StringRef value, bool) + { + out->insertData(value.data, value.size); + }, + default_mask); + } + else + { + auto & out = column->getData(); + + if (is_attribute_nullable) + keys_found = getItemsShortCircuitImpl( + attribute, + modified_key_columns, + [&](size_t row, const auto value, bool is_null) + { + (*vec_null_map_to)[row] = is_null; + out[row] = value; + }, + default_mask); + else + keys_found = getItemsShortCircuitImpl( + attribute, + modified_key_columns, + [&](size_t row, const auto value, bool) + { + out[row] = value; + }, + default_mask); + + out.resize(keys_found); + } + + if (is_attribute_nullable) + vec_null_map_to->resize(keys_found); + } + else + { + const ColumnPtr & default_values_column = std::get(default_or_filter).get(); + + DictionaryDefaultValueExtractor default_value_extractor( + dictionary_attribute.null_value, default_values_column); + + if constexpr (std::is_same_v) + { + auto * out = column.get(); + + getItemsImpl( + attribute, + modified_key_columns, + [&](size_t, const Array & value, bool) + { + out->insert(value); + }, + default_value_extractor); + } + else if constexpr (std::is_same_v) + { + auto * out = column.get(); + + if (is_attribute_nullable) + getItemsImpl( + attribute, + modified_key_columns, + [&](size_t row, StringRef value, bool is_null) + { + (*vec_null_map_to)[row] = is_null; + out->insertData(value.data, value.size); + }, + default_value_extractor); + else + getItemsImpl( + attribute, + modified_key_columns, + [&](size_t, StringRef value, bool) + { + out->insertData(value.data, value.size); + }, + default_value_extractor); + } + else + { + auto & out = column->getData(); + + if (is_attribute_nullable) + getItemsImpl( + attribute, + modified_key_columns, + [&](size_t row, const auto value, bool is_null) + { + (*vec_null_map_to)[row] = is_null; + out[row] = value; + }, + default_value_extractor); + else + getItemsImpl( + attribute, + modified_key_columns, + [&](size_t row, const auto value, bool) + { + out[row] = value; + }, + default_value_extractor); + } + } + + result = std::move(column); + }; + + callOnDictionaryAttributeType(attribute.type, type_call); + + if (is_attribute_nullable) + result = ColumnNullable::create(result, std::move(col_null_map_to)); + + return result; +} + +template +ColumnPtr RangeHashedDictionary::getColumn( + const std::string & attribute_name, + const DataTypePtr & attribute_type, + const Columns & key_columns, + const DataTypes & key_types, + DefaultOrFilter default_or_filter) const; + +template +ColumnPtr RangeHashedDictionary::getColumn( + const std::string & attribute_name, + const DataTypePtr & attribute_type, + const Columns & key_columns, + const DataTypes & key_types, + DefaultOrFilter default_or_filter) const; + +} diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index 1a6ee7e81d4..0469e82d7be 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include @@ -29,11 +31,6 @@ #include #include -#include -#include -#include - - namespace DB { @@ -46,7 +43,6 @@ namespace ErrorCodes extern const int TYPE_MISMATCH; } - enum class RangeHashedDictionaryLookupStrategy : uint8_t { min, @@ -238,18 +234,21 @@ private: static Attribute createAttribute(const DictionaryAttribute & dictionary_attribute); - template + template + using ValueSetterFunc = std::function; + + template void getItemsImpl( const Attribute & attribute, const Columns & key_columns, - ValueSetter && set_value, + ValueSetterFunc && set_value, DefaultValueExtractor & default_value_extractor) const; - template + template size_t getItemsShortCircuitImpl( const Attribute & attribute, const Columns & key_columns, - ValueSetter && set_value, + ValueSetterFunc && set_value, IColumn::Filter & default_mask) const; ColumnPtr getColumnInternal( @@ -341,209 +340,6 @@ RangeHashedDictionary::RangeHashedDictionary( calculateBytesAllocated(); } -template -ColumnPtr RangeHashedDictionary::getColumn( - const std::string & attribute_name, - const DataTypePtr & attribute_type, - const Columns & key_columns, - const DataTypes & key_types, - DefaultOrFilter default_or_filter) const -{ - bool is_short_circuit = std::holds_alternative(default_or_filter); - assert(is_short_circuit || std::holds_alternative(default_or_filter)); - - if (dictionary_key_type == DictionaryKeyType::Complex) - { - auto key_types_copy = key_types; - key_types_copy.pop_back(); - dict_struct.validateKeyTypes(key_types_copy); - } - - ColumnPtr result; - - const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, attribute_type); - const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second; - const auto & attribute = attributes[attribute_index]; - - /// Cast range column to storage type - Columns modified_key_columns = key_columns; - const ColumnPtr & range_storage_column = key_columns.back(); - ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types.back(), ""}; - modified_key_columns.back() = castColumnAccurate(column_to_cast, dict_struct.range_min->type); - - size_t keys_size = key_columns.front()->size(); - bool is_attribute_nullable = attribute.is_value_nullable.has_value(); - - ColumnUInt8::MutablePtr col_null_map_to; - ColumnUInt8::Container * vec_null_map_to = nullptr; - if (is_attribute_nullable) - { - col_null_map_to = ColumnUInt8::create(keys_size, false); - vec_null_map_to = &col_null_map_to->getData(); - } - - auto type_call = [&](const auto & dictionary_attribute_type) - { - using Type = std::decay_t; - using AttributeType = typename Type::AttributeType; - using ValueType = DictionaryValueType; - using ColumnProvider = DictionaryAttributeColumnProvider; - - auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size); - - if (is_short_circuit) - { - IColumn::Filter & default_mask = std::get(default_or_filter).get(); - size_t keys_found = 0; - - if constexpr (std::is_same_v) - { - auto * out = column.get(); - - keys_found = getItemsShortCircuitImpl( - attribute, - modified_key_columns, - [&](size_t, const Array & value, bool) - { - out->insert(value); - }, - default_mask); - } - else if constexpr (std::is_same_v) - { - auto * out = column.get(); - - if (is_attribute_nullable) - keys_found = getItemsShortCircuitImpl( - attribute, - modified_key_columns, - [&](size_t row, StringRef value, bool is_null) - { - (*vec_null_map_to)[row] = is_null; - out->insertData(value.data, value.size); - }, - default_mask); - else - keys_found = getItemsShortCircuitImpl( - attribute, - modified_key_columns, - [&](size_t, StringRef value, bool) - { - out->insertData(value.data, value.size); - }, - default_mask); - } - else - { - auto & out = column->getData(); - - if (is_attribute_nullable) - keys_found = getItemsShortCircuitImpl( - attribute, - modified_key_columns, - [&](size_t row, const auto value, bool is_null) - { - (*vec_null_map_to)[row] = is_null; - out[row] = value; - }, - default_mask); - else - keys_found = getItemsShortCircuitImpl( - attribute, - modified_key_columns, - [&](size_t row, const auto value, bool) - { - out[row] = value; - }, - default_mask); - - out.resize(keys_found); - } - - if (is_attribute_nullable) - vec_null_map_to->resize(keys_found); - } - else - { - const ColumnPtr & default_values_column = std::get(default_or_filter).get(); - - DictionaryDefaultValueExtractor default_value_extractor( - dictionary_attribute.null_value, default_values_column); - - if constexpr (std::is_same_v) - { - auto * out = column.get(); - - getItemsImpl( - attribute, - modified_key_columns, - [&](size_t, const Array & value, bool) - { - out->insert(value); - }, - default_value_extractor); - } - else if constexpr (std::is_same_v) - { - auto * out = column.get(); - - if (is_attribute_nullable) - getItemsImpl( - attribute, - modified_key_columns, - [&](size_t row, StringRef value, bool is_null) - { - (*vec_null_map_to)[row] = is_null; - out->insertData(value.data, value.size); - }, - default_value_extractor); - else - getItemsImpl( - attribute, - modified_key_columns, - [&](size_t, StringRef value, bool) - { - out->insertData(value.data, value.size); - }, - default_value_extractor); - } - else - { - auto & out = column->getData(); - - if (is_attribute_nullable) - getItemsImpl( - attribute, - modified_key_columns, - [&](size_t row, const auto value, bool is_null) - { - (*vec_null_map_to)[row] = is_null; - out[row] = value; - }, - default_value_extractor); - else - getItemsImpl( - attribute, - modified_key_columns, - [&](size_t row, const auto value, bool) - { - out[row] = value; - }, - default_value_extractor); - } - } - - result = std::move(column); - }; - - callOnDictionaryAttributeType(attribute.type, type_call); - - if (is_attribute_nullable) - result = ColumnNullable::create(result, std::move(col_null_map_to)); - - return result; -} - template ColumnPtr RangeHashedDictionary::getColumnInternal( const std::string & attribute_name, @@ -842,224 +638,6 @@ typename RangeHashedDictionary::Attribute RangeHashedDictio return attribute; } -template -template -void RangeHashedDictionary::getItemsImpl( - const Attribute & attribute, - const Columns & key_columns, - ValueSetter && set_value, - DefaultValueExtractor & default_value_extractor) const -{ - const auto & attribute_container = std::get>(attribute.container); - - size_t keys_found = 0; - - const ColumnPtr & range_column = key_columns.back(); - auto key_columns_copy = key_columns; - key_columns_copy.pop_back(); - - DictionaryKeysArenaHolder arena_holder; - DictionaryKeysExtractor keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena()); - const size_t keys_size = keys_extractor.getKeysSize(); - - callOnRangeType(dict_struct.range_min->type, [&](const auto & types) - { - using Types = std::decay_t; - using RangeColumnType = typename Types::LeftType; - using RangeStorageType = typename RangeColumnType::ValueType; - using RangeInterval = Interval; - - const auto * range_column_typed = typeid_cast(range_column.get()); - if (!range_column_typed) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Dictionary {} range column type should be equal to {}", - getFullName(), - dict_struct.range_min->type->getName()); - - const auto & range_column_data = range_column_typed->getData(); - - const auto & key_attribute_container = std::get>(key_attribute.container); - - for (size_t key_index = 0; key_index < keys_size; ++key_index) - { - auto key = keys_extractor.extractCurrentKey(); - const auto it = key_attribute_container.find(key); - - if (it) - { - const auto date = range_column_data[key_index]; - const auto & interval_tree = it->getMapped(); - - size_t value_index = 0; - std::optional range; - - interval_tree.find(date, [&](auto & interval, auto & interval_value_index) - { - if (range) - { - if (likely(configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::min) && interval < *range) - { - range = interval; - value_index = interval_value_index; - } - else if (configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::max && interval > * range) - { - range = interval; - value_index = interval_value_index; - } - } - else - { - range = interval; - value_index = interval_value_index; - } - - return true; - }); - - if (range.has_value()) - { - ++keys_found; - - AttributeType value = attribute_container[value_index]; - - if constexpr (is_nullable) - { - bool is_null = (*attribute.is_value_nullable)[value_index]; - set_value(key_index, value, is_null); - } - else - { - set_value(key_index, value, false); - } - - keys_extractor.rollbackCurrentKey(); - continue; - } - } - - if constexpr (is_nullable) - set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index)); - else - set_value(key_index, default_value_extractor[key_index], false); - - keys_extractor.rollbackCurrentKey(); - } - }); - - query_count.fetch_add(keys_size, std::memory_order_relaxed); - found_count.fetch_add(keys_found, std::memory_order_relaxed); -} - -template -template -size_t RangeHashedDictionary::getItemsShortCircuitImpl( - const Attribute & attribute, - const Columns & key_columns, - ValueSetter && set_value, - IColumn::Filter & default_mask) const -{ - const auto & attribute_container = std::get>(attribute.container); - - size_t keys_found = 0; - - const ColumnPtr & range_column = key_columns.back(); - auto key_columns_copy = key_columns; - key_columns_copy.pop_back(); - - DictionaryKeysArenaHolder arena_holder; - DictionaryKeysExtractor keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena()); - const size_t keys_size = keys_extractor.getKeysSize(); - default_mask.resize(keys_size); - - callOnRangeType(dict_struct.range_min->type, [&](const auto & types) - { - using Types = std::decay_t; - using RangeColumnType = typename Types::LeftType; - using RangeStorageType = typename RangeColumnType::ValueType; - using RangeInterval = Interval; - - const auto * range_column_typed = typeid_cast(range_column.get()); - if (!range_column_typed) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Dictionary {} range column type should be equal to {}", - getFullName(), - dict_struct.range_min->type->getName()); - - const auto & range_column_data = range_column_typed->getData(); - - const auto & key_attribute_container = std::get>(key_attribute.container); - - for (size_t key_index = 0; key_index < keys_size; ++key_index) - { - auto key = keys_extractor.extractCurrentKey(); - const auto it = key_attribute_container.find(key); - - if (it) - { - const auto date = range_column_data[key_index]; - const auto & interval_tree = it->getMapped(); - - size_t value_index = 0; - std::optional range; - - interval_tree.find(date, [&](auto & interval, auto & interval_value_index) - { - if (range) - { - if (likely(configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::min) && interval < *range) - { - range = interval; - value_index = interval_value_index; - } - else if (configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::max && interval > * range) - { - range = interval; - value_index = interval_value_index; - } - } - else - { - range = interval; - value_index = interval_value_index; - } - - return true; - }); - - if (range.has_value()) - { - default_mask[key_index] = 0; - ++keys_found; - - AttributeType value = attribute_container[value_index]; - - if constexpr (is_nullable) - { - bool is_null = (*attribute.is_value_nullable)[value_index]; - set_value(key_index, value, is_null); - } - else - { - set_value(key_index, value, false); - } - - keys_extractor.rollbackCurrentKey(); - continue; - } - } - - default_mask[key_index] = 1; - - keys_extractor.rollbackCurrentKey(); - } - }); - - query_count.fetch_add(keys_size, std::memory_order_relaxed); - found_count.fetch_add(keys_found, std::memory_order_relaxed); - return keys_found; -} - template template void RangeHashedDictionary::getItemsInternalImpl( diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsImpl.txx b/src/Dictionaries/RangeHashedDictionaryGetItemsImpl.txx new file mode 100644 index 00000000000..9da2b0faf4a --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsImpl.txx @@ -0,0 +1,133 @@ +#include + +#define INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType, IsNullable, AttributeType, ValueType) \ +template void RangeHashedDictionary::getItemsImpl>( \ + const Attribute & attribute,\ + const Columns & key_columns,\ + typename RangeHashedDictionary::ValueSetterFunc && set_value,\ + DictionaryDefaultValueExtractor & default_value_extractor) const; + +#define INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(AttributeType) \ + INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Simple, true, AttributeType, DictionaryValueType) \ + INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Simple, false, AttributeType, DictionaryValueType) \ + INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Complex, true, AttributeType, DictionaryValueType) \ + INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Complex, false, AttributeType, DictionaryValueType) + +namespace DB +{ + +template +template +void RangeHashedDictionary::getItemsImpl( + const Attribute & attribute, + const Columns & key_columns, + typename RangeHashedDictionary::ValueSetterFunc && set_value, + DefaultValueExtractor & default_value_extractor) const +{ + const auto & attribute_container = std::get>(attribute.container); + + + size_t keys_found = 0; + + const ColumnPtr & range_column = key_columns.back(); + auto key_columns_copy = key_columns; + key_columns_copy.pop_back(); + + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena()); + const size_t keys_size = keys_extractor.getKeysSize(); + + callOnRangeType( + dict_struct.range_min->type, + [&](const auto & types) + { + using Types = std::decay_t; + using RangeColumnType = typename Types::LeftType; + using RangeStorageType = typename RangeColumnType::ValueType; + using RangeInterval = Interval; + + const auto * range_column_typed = typeid_cast(range_column.get()); + if (!range_column_typed) + throw Exception( + ErrorCodes::TYPE_MISMATCH, + "Dictionary {} range column type should be equal to {}", + getFullName(), + dict_struct.range_min->type->getName()); + + const auto & range_column_data = range_column_typed->getData(); + + const auto & key_attribute_container = std::get>(key_attribute.container); + + for (size_t key_index = 0; key_index < keys_size; ++key_index) + { + auto key = keys_extractor.extractCurrentKey(); + const auto it = key_attribute_container.find(key); + + if (it) + { + const auto date = range_column_data[key_index]; + const auto & interval_tree = it->getMapped(); + + size_t value_index = 0; + std::optional range; + + interval_tree.find( + date, + [&](auto & interval, auto & interval_value_index) + { + if (range) + { + if (likely(configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::min) && interval < *range) + { + range = interval; + value_index = interval_value_index; + } + else if (configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::max && interval > *range) + { + range = interval; + value_index = interval_value_index; + } + } + else + { + range = interval; + value_index = interval_value_index; + } + + return true; + }); + + if (range.has_value()) + { + ++keys_found; + + ValueType value = attribute_container[value_index]; + + if constexpr (is_nullable) + { + bool is_null = (*attribute.is_value_nullable)[value_index]; + set_value(key_index, value, is_null); + } + else + { + set_value(key_index, value, false); + } + + keys_extractor.rollbackCurrentKey(); + continue; + } + } + + if constexpr (is_nullable) + set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index)); + else + set_value(key_index, default_value_extractor[key_index], false); + + keys_extractor.rollbackCurrentKey(); + } + }); + + query_count.fetch_add(keys_size, std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); +} +} diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsImplDecimal.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsImplDecimal.cpp new file mode 100644 index 00000000000..f1ee4dd58e1 --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsImplDecimal.cpp @@ -0,0 +1,10 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Decimal32); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Decimal64); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Decimal128); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Decimal256); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(DateTime64); +} diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsImplFloat.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsImplFloat.cpp new file mode 100644 index 00000000000..291a55a76db --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsImplFloat.cpp @@ -0,0 +1,7 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Float32); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Float64); +} diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsImplInt.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsImplInt.cpp new file mode 100644 index 00000000000..a0748a9f486 --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsImplInt.cpp @@ -0,0 +1,11 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Int8); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Int16); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Int32); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Int64); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Int128); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Int256); +} diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsImplOthers.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsImplOthers.cpp new file mode 100644 index 00000000000..96e5bb54d0b --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsImplOthers.cpp @@ -0,0 +1,10 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UUID); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(IPv4); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(IPv6); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(String); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Array); +} diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsImplUInt.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsImplUInt.cpp new file mode 100644 index 00000000000..e60a7189a2d --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsImplUInt.cpp @@ -0,0 +1,11 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UInt8); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UInt16); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UInt32); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UInt64); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UInt128); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UInt256); +} diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImpl.txx b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImpl.txx new file mode 100644 index 00000000000..5807af519f9 --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImpl.txx @@ -0,0 +1,132 @@ +#include + +#define INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType, IsNullable, ValueType) \ + template size_t RangeHashedDictionary::getItemsShortCircuitImpl( \ + const Attribute & attribute, \ + const Columns & key_columns, \ + typename RangeHashedDictionary::ValueSetterFunc && set_value, \ + IColumn::Filter & default_mask) const; + +#define INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(AttributeType) \ + INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Simple, true, DictionaryValueType) \ + INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Simple, false, DictionaryValueType) \ + INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Complex, true, DictionaryValueType) \ + INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Complex, false, DictionaryValueType) + +namespace DB +{ + +template +template +size_t RangeHashedDictionary::getItemsShortCircuitImpl( + const Attribute & attribute, + const Columns & key_columns, + typename RangeHashedDictionary::ValueSetterFunc && set_value, + IColumn::Filter & default_mask) const +{ + const auto & attribute_container = std::get>(attribute.container); + + size_t keys_found = 0; + + const ColumnPtr & range_column = key_columns.back(); + auto key_columns_copy = key_columns; + key_columns_copy.pop_back(); + + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena()); + const size_t keys_size = keys_extractor.getKeysSize(); + default_mask.resize(keys_size); + + callOnRangeType( + dict_struct.range_min->type, + [&](const auto & types) + { + using Types = std::decay_t; + using RangeColumnType = typename Types::LeftType; + using RangeStorageType = typename RangeColumnType::ValueType; + using RangeInterval = Interval; + + const auto * range_column_typed = typeid_cast(range_column.get()); + if (!range_column_typed) + throw Exception( + ErrorCodes::TYPE_MISMATCH, + "Dictionary {} range column type should be equal to {}", + getFullName(), + dict_struct.range_min->type->getName()); + + const auto & range_column_data = range_column_typed->getData(); + + const auto & key_attribute_container = std::get>(key_attribute.container); + + for (size_t key_index = 0; key_index < keys_size; ++key_index) + { + auto key = keys_extractor.extractCurrentKey(); + const auto it = key_attribute_container.find(key); + + if (it) + { + const auto date = range_column_data[key_index]; + const auto & interval_tree = it->getMapped(); + + size_t value_index = 0; + std::optional range; + + interval_tree.find( + date, + [&](auto & interval, auto & interval_value_index) + { + if (range) + { + if (likely(configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::min) && interval < *range) + { + range = interval; + value_index = interval_value_index; + } + else if (configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::max && interval > *range) + { + range = interval; + value_index = interval_value_index; + } + } + else + { + range = interval; + value_index = interval_value_index; + } + + return true; + }); + + if (range.has_value()) + { + default_mask[key_index] = 0; + ++keys_found; + + ValueType value = attribute_container[value_index]; + + if constexpr (is_nullable) + { + bool is_null = (*attribute.is_value_nullable)[value_index]; + set_value(key_index, value, is_null); + } + else + { + set_value(key_index, value, false); + } + + keys_extractor.rollbackCurrentKey(); + continue; + } + } + + default_mask[key_index] = 1; + + keys_extractor.rollbackCurrentKey(); + } + }); + + query_count.fetch_add(keys_size, std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); + return keys_found; +} +} diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplDecimal.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplDecimal.cpp new file mode 100644 index 00000000000..298369e4735 --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplDecimal.cpp @@ -0,0 +1,10 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Decimal32); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Decimal64); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Decimal128); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Decimal256); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(DateTime64); +} diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplFloat.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplFloat.cpp new file mode 100644 index 00000000000..e8e8da6c75e --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplFloat.cpp @@ -0,0 +1,7 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Float32); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Float64); +} diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplInt.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplInt.cpp new file mode 100644 index 00000000000..c685b9b5331 --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplInt.cpp @@ -0,0 +1,11 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Int8); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Int16); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Int32); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Int64); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Int128); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Int256); +} diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplOthers.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplOthers.cpp new file mode 100644 index 00000000000..46ea141b59b --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplOthers.cpp @@ -0,0 +1,10 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UUID); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(IPv4); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(IPv6); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(String); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Array); +} diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplUInt.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplUInt.cpp new file mode 100644 index 00000000000..18421fd7e2d --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplUInt.cpp @@ -0,0 +1,11 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UInt8); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UInt16); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UInt32); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UInt64); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UInt128); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UInt256); +} diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp index 1f5c2d6d2c7..a3f243b49d7 100644 --- a/src/Dictionaries/RegExpTreeDictionary.cpp +++ b/src/Dictionaries/RegExpTreeDictionary.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff --git a/src/Dictionaries/registerRangeHashedDictionary.cpp b/src/Dictionaries/registerRangeHashedDictionary.cpp index 4e20abfdb79..8123b811198 100644 --- a/src/Dictionaries/registerRangeHashedDictionary.cpp +++ b/src/Dictionaries/registerRangeHashedDictionary.cpp @@ -1,5 +1,8 @@ -#include "RangeHashedDictionary.h" +#include + #include +#include +#include #include namespace DB diff --git a/src/Dictionaries/tests/gtest_dictionary_configuration.cpp b/src/Dictionaries/tests/gtest_dictionary_configuration.cpp index 989ce5c8f18..08aad663a8c 100644 --- a/src/Dictionaries/tests/gtest_dictionary_configuration.cpp +++ b/src/Dictionaries/tests/gtest_dictionary_configuration.cpp @@ -48,7 +48,7 @@ TEST(ConvertDictionaryAST, SimpleDictConfiguration) " COMMENT 'hello world!'"; ParserCreateDictionaryQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTCreateQuery * create = ast->as(); DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create, getContext().context); @@ -119,7 +119,7 @@ TEST(ConvertDictionaryAST, TrickyAttributes) " SOURCE(CLICKHOUSE(HOST 'localhost'))"; ParserCreateDictionaryQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTCreateQuery * create = ast->as(); DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create, getContext().context); @@ -164,7 +164,7 @@ TEST(ConvertDictionaryAST, ComplexKeyAndLayoutWithParams) " LIFETIME(MIN 1 MAX 10)"; ParserCreateDictionaryQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTCreateQuery * create = ast->as(); DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create, getContext().context); @@ -215,7 +215,7 @@ TEST(ConvertDictionaryAST, ComplexSource) " RANGE(MIN second_column MAX third_column)"; ParserCreateDictionaryQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTCreateQuery * create = ast->as(); DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create, getContext().context); /// source diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index f72e6634465..8595cc45218 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -1,7 +1,5 @@ #include "ReadBufferFromRemoteFSGather.h" -#include - #include #include #include @@ -9,7 +7,6 @@ #include #include #include -#include #include using namespace DB; diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index cb574551d26..0bba7a1f424 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -1054,7 +1054,7 @@ namespace { if (depth > settings.max_parser_depth) throw Exception(ErrorCodes::TOO_DEEP_RECURSION, - "Maximum parse depth ({}) exceeded. Consider raising max_parser_depth setting.", settings.max_parser_depth); + "Maximum parse depth ({}) exceeded. Consider raising max_parser_depth setting.", settings.max_parser_depth); assertChar('{', buf); skipWhitespaceIfAny(buf); diff --git a/src/Formats/fuzzers/format_fuzzer.cpp b/src/Formats/fuzzers/format_fuzzer.cpp index 583d1173a01..46661e4828c 100644 --- a/src/Formats/fuzzers/format_fuzzer.cpp +++ b/src/Formats/fuzzers/format_fuzzer.cpp @@ -32,6 +32,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) auto initialize = [&]() mutable { + if (context) + return true; + shared_context = Context::createShared(); context = Context::createGlobal(shared_context.get()); context->makeGlobalContext(); diff --git a/src/Functions/CRC.cpp b/src/Functions/CRC.cpp index ba13fcf78f1..49d6dd6fa52 100644 --- a/src/Functions/CRC.cpp +++ b/src/Functions/CRC.cpp @@ -51,7 +51,7 @@ struct CRC32IEEEImpl : public CRCImpl static constexpr auto name = "CRC32IEEE"; }; -struct CRC32ZLIBImpl +struct CRC32ZLibImpl { using ReturnType = UInt32; static constexpr auto name = "CRC32"; @@ -133,13 +133,14 @@ private: } }; -template +template using FunctionCRC = FunctionStringOrArrayToT, T, typename T::ReturnType>; + // The same as IEEE variant, but uses 0xffffffff as initial value // This is the default // -// (And zlib is used here, since it has optimized version) -using FunctionCRC32ZLIB = FunctionCRC; +// (And ZLib is used here, since it has optimized version) +using FunctionCRC32ZLib = FunctionCRC; // Uses CRC-32-IEEE 802.3 polynomial using FunctionCRC32IEEE = FunctionCRC; // Uses CRC-64-ECMA polynomial @@ -147,17 +148,11 @@ using FunctionCRC64ECMA = FunctionCRC; } -template -void registerFunctionCRCImpl(FunctionFactory & factory) -{ - factory.registerFunction(T::name, {}, FunctionFactory::CaseInsensitive); -} - REGISTER_FUNCTION(CRC) { - registerFunctionCRCImpl(factory); - registerFunctionCRCImpl(factory); - registerFunctionCRCImpl(factory); + factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::CaseInsensitive); } } diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index 7fc46db50f1..5ca4b0bc579 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -1,7 +1,11 @@ #include -#include #include +#include +#include +#include +#include #include +#include namespace DB @@ -12,69 +16,72 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } +FunctionBasePtr createFunctionBaseCast( + ContextPtr context, + const char * name, + const ColumnsWithTypeAndName & arguments, + const DataTypePtr & return_type, + std::optional diagnostic, + CastType cast_type); + + /** CastInternal does not preserve nullability of the data type, * i.e. CastInternal(toNullable(toInt8(1)) as Int32) will be Int32(1). * * Cast preserves nullability according to setting `cast_keep_nullable`, * i.e. Cast(toNullable(toInt8(1)) as Int32) will be Nullable(Int32(1)) if `cast_keep_nullable` == 1. */ -template class CastOverloadResolverImpl : public IFunctionOverloadResolver { public: - using MonotonicityForRange = FunctionCastBase::MonotonicityForRange; + const char * getNameImpl() const + { + if (cast_type == CastType::accurate) + return "accurateCast"; + if (cast_type == CastType::accurateOrNull) + return "accurateCastOrNull"; + if (internal) + return "_CAST"; + else + return "CAST"; + } - static constexpr auto name = cast_type == CastType::accurate - ? CastName::accurate_cast_name - : (cast_type == CastType::accurateOrNull ? CastName::accurate_cast_or_null_name : CastName::cast_name); - - String getName() const override { return name; } + String getName() const override + { + return getNameImpl(); + } size_t getNumberOfArguments() const override { return 2; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - explicit CastOverloadResolverImpl(ContextPtr context_, std::optional diagnostic_, bool keep_nullable_, const DataTypeValidationSettings & data_type_validation_settings_) + explicit CastOverloadResolverImpl(ContextPtr context_, CastType cast_type_, bool internal_, std::optional diagnostic_, bool keep_nullable_, const DataTypeValidationSettings & data_type_validation_settings_) : context(context_) + , cast_type(cast_type_) + , internal(internal_) , diagnostic(std::move(diagnostic_)) , keep_nullable(keep_nullable_) , data_type_validation_settings(data_type_validation_settings_) { } - static FunctionOverloadResolverPtr create(ContextPtr context) + static FunctionOverloadResolverPtr create(ContextPtr context, CastType cast_type, bool internal, std::optional diagnostic) { - const auto & settings_ref = context->getSettingsRef(); - - if constexpr (internal) - return createImpl(context, {}, false /*keep_nullable*/); - - return createImpl(context, {}, settings_ref.cast_keep_nullable, DataTypeValidationSettings(settings_ref)); - } - - static FunctionOverloadResolverPtr createImpl(ContextPtr context, std::optional diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {}) - { - assert(!internal || !keep_nullable); - return std::make_unique(context, std::move(diagnostic), keep_nullable, data_type_validation_settings); - } - - static FunctionOverloadResolverPtr createImpl(std::optional diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {}) - { - assert(!internal || !keep_nullable); - return std::make_unique(ContextPtr(), std::move(diagnostic), keep_nullable, data_type_validation_settings); + if (internal) + { + return std::make_unique(context, cast_type, internal, diagnostic, false /*keep_nullable*/, DataTypeValidationSettings{}); + } + else + { + const auto & settings_ref = context->getSettingsRef(); + return std::make_unique(context, cast_type, internal, diagnostic, settings_ref.cast_keep_nullable, DataTypeValidationSettings(settings_ref)); + } } protected: - FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override { - DataTypes data_types(arguments.size()); - - for (size_t i = 0; i < arguments.size(); ++i) - data_types[i] = arguments[i].type; - - auto monotonicity = MonotonicityHelper::getMonotonicityInformation(arguments.front().type, return_type.get()); - return std::make_unique>(context, name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type); + return createFunctionBaseCast(context, getNameImpl(), arguments, return_type, diagnostic, cast_type); } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override @@ -92,10 +99,10 @@ protected: DataTypePtr type = DataTypeFactory::instance().get(type_col->getValue()); validateDataType(type, data_type_validation_settings); - if constexpr (cast_type == CastType::accurateOrNull) + if (cast_type == CastType::accurateOrNull) return makeNullable(type); - if constexpr (internal) + if (internal) return type; if (keep_nullable && arguments.front().type->isNullable() && type->canBeInsideNullable()) @@ -110,55 +117,27 @@ protected: private: ContextPtr context; + CastType cast_type; + bool internal; std::optional diagnostic; bool keep_nullable; DataTypeValidationSettings data_type_validation_settings; }; -struct CastOverloadName -{ - static constexpr auto cast_name = "CAST"; - static constexpr auto accurate_cast_name = "accurateCast"; - static constexpr auto accurate_cast_or_null_name = "accurateCastOrNull"; -}; - -struct CastInternalOverloadName -{ - static constexpr auto cast_name = "_CAST"; - static constexpr auto accurate_cast_name = "accurate_Cast"; - static constexpr auto accurate_cast_or_null_name = "accurate_CastOrNull"; -}; - -template -using CastOverloadResolver = CastOverloadResolverImpl; - -template -using CastInternalOverloadResolver = CastOverloadResolverImpl; - - FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional diagnostic) { - switch (type) - { - case CastType::nonAccurate: - return CastInternalOverloadResolver::createImpl(diagnostic); - case CastType::accurate: - return CastInternalOverloadResolver::createImpl(diagnostic); - case CastType::accurateOrNull: - return CastInternalOverloadResolver::createImpl(diagnostic); - } + return CastOverloadResolverImpl::create(ContextPtr{}, type, true, diagnostic); } - REGISTER_FUNCTION(CastOverloadResolvers) { - factory.registerFunction>({}, FunctionFactory::CaseInsensitive); + factory.registerFunction("_CAST", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::nonAccurate, true, {}); }, {}, FunctionFactory::CaseInsensitive); /// Note: "internal" (not affected by null preserving setting) versions of accurate cast functions are unneeded. - factory.registerFunction>({}, FunctionFactory::CaseInsensitive); - factory.registerFunction>(); - factory.registerFunction>(); + factory.registerFunction("CAST", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::nonAccurate, false, {}); }, {}, FunctionFactory::CaseInsensitive); + factory.registerFunction("accurateCast", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::accurate, false, {}); }, {}); + factory.registerFunction("accurateCastOrNull", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::accurateOrNull, false, {}); }, {}); } } diff --git a/src/Functions/FunctionFQDN.cpp b/src/Functions/FunctionFQDN.cpp index b054ff8e1d7..108a96216fd 100644 --- a/src/Functions/FunctionFQDN.cpp +++ b/src/Functions/FunctionFQDN.cpp @@ -47,7 +47,7 @@ public: REGISTER_FUNCTION(FQDN) { factory.registerFunction({}, FunctionFactory::CaseInsensitive); - factory.registerFunction("fullHostName"); + factory.registerAlias("fullHostName", "FQDN"); } } diff --git a/src/Functions/FunctionFactory.cpp b/src/Functions/FunctionFactory.cpp index 6a7274376b9..004ef745a93 100644 --- a/src/Functions/FunctionFactory.cpp +++ b/src/Functions/FunctionFactory.cpp @@ -49,6 +49,18 @@ void FunctionFactory::registerFunction( } } +void FunctionFactory::registerFunction( + const std::string & name, + FunctionSimpleCreator creator, + FunctionDocumentation doc, + CaseSensitiveness case_sensitiveness) +{ + registerFunction(name, [my_creator = std::move(creator)](ContextPtr context) + { + return std::make_unique(my_creator(context)); + }, std::move(doc), std::move(case_sensitiveness)); +} + FunctionOverloadResolverPtr FunctionFactory::getImpl( const std::string & name, diff --git a/src/Functions/FunctionFactory.h b/src/Functions/FunctionFactory.h index 588cae64e16..bb43d4719b8 100644 --- a/src/Functions/FunctionFactory.h +++ b/src/Functions/FunctionFactory.h @@ -17,6 +17,7 @@ namespace DB { using FunctionCreator = std::function; +using FunctionSimpleCreator = std::function; using FunctionFactoryData = std::pair; /** Creates function by name. @@ -34,15 +35,6 @@ public: registerFunction(Function::name, std::move(doc), case_sensitiveness); } - template - void registerFunction(const std::string & name, FunctionDocumentation doc = {}, CaseSensitiveness case_sensitiveness = CaseSensitive) - { - if constexpr (std::is_base_of_v) - registerFunction(name, &adaptFunctionToOverloadResolver, std::move(doc), case_sensitiveness); - else - registerFunction(name, &Function::create, std::move(doc), case_sensitiveness); - } - /// This function is used by YQL - innovative transactional DBMS that depends on ClickHouse by source code. std::vector getAllNames() const; @@ -66,6 +58,12 @@ public: FunctionDocumentation doc = {}, CaseSensitiveness case_sensitiveness = CaseSensitive); + void registerFunction( + const std::string & name, + FunctionSimpleCreator creator, + FunctionDocumentation doc = {}, + CaseSensitiveness case_sensitiveness = CaseSensitive); + FunctionDocumentation getDocumentation(const std::string & name) const; private: @@ -74,17 +72,17 @@ private: Functions functions; Functions case_insensitive_functions; - template - static FunctionOverloadResolverPtr adaptFunctionToOverloadResolver(ContextPtr context) - { - return std::make_unique(Function::create(context)); - } - const Functions & getMap() const override { return functions; } const Functions & getCaseInsensitiveMap() const override { return case_insensitive_functions; } String getFactoryName() const override { return "FunctionFactory"; } + + template + void registerFunction(const std::string & name, FunctionDocumentation doc = {}, CaseSensitiveness case_sensitiveness = CaseSensitive) + { + registerFunction(name, &Function::create, std::move(doc), case_sensitiveness); + } }; const String & getFunctionCanonicalNameIfAny(const String & name); diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h index 3efa40df9be..37db514fd1f 100644 --- a/src/Functions/FunctionSQLJSON.h +++ b/src/Functions/FunctionSQLJSON.h @@ -123,7 +123,7 @@ public: class Executor { public: - static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, const ContextPtr & context) + static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, uint32_t parse_backtracks, const ContextPtr & context) { MutableColumnPtr to{result_type->createColumn()}; to->reserve(input_rows_count); @@ -161,7 +161,7 @@ public: /// Tokenize the query Tokens tokens(query.data(), query.data() + query.size()); /// Max depth 0 indicates that depth is not limited - IParser::Pos token_iterator(tokens, parse_depth); + IParser::Pos token_iterator(tokens, parse_depth, parse_backtracks); /// Parse query and create AST tree Expected expected; @@ -232,16 +232,17 @@ public: /// 3. Parser(Tokens, ASTPtr) -> complete AST /// 4. Execute functions: call getNextItem on generator and handle each item unsigned parse_depth = static_cast(getContext()->getSettingsRef().max_parser_depth); + unsigned parse_backtracks = static_cast(getContext()->getSettingsRef().max_parser_backtracks); #if USE_SIMDJSON if (getContext()->getSettingsRef().allow_simdjson) return FunctionSQLJSONHelpers::Executor< Name, Impl>, - SimdJSONParser>::run(arguments, result_type, input_rows_count, parse_depth, getContext()); + SimdJSONParser>::run(arguments, result_type, input_rows_count, parse_depth, parse_backtracks, getContext()); #endif return FunctionSQLJSONHelpers:: Executor>, DummyJSONParser>::run( - arguments, result_type, input_rows_count, parse_depth, getContext()); + arguments, result_type, input_rows_count, parse_depth, parse_backtracks, getContext()); } }; diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 01e057e19a1..ceff4f3fd7e 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1,10 +1,4821 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace DB { +namespace ErrorCodes +{ + extern const int ATTEMPT_TO_READ_AFTER_EOF; + extern const int CANNOT_PARSE_NUMBER; + extern const int CANNOT_READ_ARRAY_FROM_TEXT; + extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; + extern const int CANNOT_PARSE_QUOTED_STRING; + extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; + extern const int CANNOT_PARSE_DATE; + extern const int CANNOT_PARSE_DATETIME; + extern const int CANNOT_PARSE_TEXT; + extern const int CANNOT_PARSE_UUID; + extern const int CANNOT_PARSE_IPV4; + extern const int CANNOT_PARSE_IPV6; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; + extern const int LOGICAL_ERROR; + extern const int TYPE_MISMATCH; + extern const int CANNOT_CONVERT_TYPE; + extern const int ILLEGAL_COLUMN; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NOT_IMPLEMENTED; + extern const int CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN; + extern const int CANNOT_PARSE_BOOL; + extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; +} + +namespace +{ + +/** Type conversion functions. + * toType - conversion in "natural way"; + */ + +UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column) +{ + const auto * arg_type = named_column.type.get(); + bool ok = checkAndGetDataType(arg_type) + || checkAndGetDataType(arg_type) + || checkAndGetDataType(arg_type) + || checkAndGetDataType(arg_type); + if (!ok) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of toDecimal() scale {}", named_column.type->getName()); + + Field field; + named_column.column->get(0, field); + return static_cast(field.get()); +} + + +/** Conversion of Date to DateTime: adding 00:00:00 time component. + */ +template +struct ToDateTimeImpl +{ + static constexpr auto name = "toDateTime"; + + static UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (d > MAX_DATETIME_DAY_NUM) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Day number {} is out of bounds of type DateTime", d); + } + else if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) + { + if (d > MAX_DATETIME_DAY_NUM) + d = MAX_DATETIME_DAY_NUM; + } + return static_cast(time_zone.fromDayNum(DayNum(d))); + } + + static UInt32 execute(Int32 d, const DateLUTImpl & time_zone) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) + { + if (d < 0) + return 0; + else if (d > MAX_DATETIME_DAY_NUM) + d = MAX_DATETIME_DAY_NUM; + } + else if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (d < 0 || d > MAX_DATETIME_DAY_NUM) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type DateTime", d); + } + return static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); + } + + static UInt32 execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) + { + return dt; + } + + static UInt32 execute(Int64 dt64, const DateLUTImpl & /*time_zone*/) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Ignore) + return static_cast(dt64); + else + { + if (dt64 < 0 || dt64 >= MAX_DATETIME_TIMESTAMP) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) + return dt64 < 0 ? 0 : std::numeric_limits::max(); + else + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type DateTime", dt64); + } + else + return static_cast(dt64); + } + } +}; + + +/// Implementation of toDate function. + +template +struct ToDateTransform32Or64 +{ + static constexpr auto name = "toDate"; + + static NO_SANITIZE_UNDEFINED UInt16 execute(const FromType & from, const DateLUTImpl & time_zone) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from > MAX_DATETIME_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); + } + /// if value is smaller (or equal) than maximum day value for Date, than treat it as day num, + /// otherwise treat it as unix timestamp. This is a bit weird, but we leave this behavior. + if (from <= DATE_LUT_MAX_DAY_NUM) + return from; + else + return time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); + } +}; + + +template +struct ToDateTransform32Or64Signed +{ + static constexpr auto name = "toDate"; + + static NO_SANITIZE_UNDEFINED UInt16 execute(const FromType & from, const DateLUTImpl & time_zone) + { + // TODO: decide narrow or extended range based on FromType + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from < 0 || from > MAX_DATE_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); + } + else + { + if (from < 0) + return 0; + } + return (from <= DATE_LUT_MAX_DAY_NUM) + ? static_cast(from) + : time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATE_TIMESTAMP))); + } +}; + +template +struct ToDateTransform8Or16Signed +{ + static constexpr auto name = "toDate"; + + static NO_SANITIZE_UNDEFINED UInt16 execute(const FromType & from, const DateLUTImpl &) + { + if (from < 0) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); + else + return 0; + } + return from; + } +}; + +/// Implementation of toDate32 function. + +template +struct ToDate32Transform32Or64 +{ + static constexpr auto name = "toDate32"; + + static NO_SANITIZE_UNDEFINED Int32 execute(const FromType & from, const DateLUTImpl & time_zone) + { + if (from < DATE_LUT_MAX_EXTEND_DAY_NUM) + { + return static_cast(from); + } + else + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type Date32", from); + } + return time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATETIME64_TIMESTAMP))); + } + } +}; + +template +struct ToDate32Transform32Or64Signed +{ + static constexpr auto name = "toDate32"; + + static NO_SANITIZE_UNDEFINED Int32 execute(const FromType & from, const DateLUTImpl & time_zone) + { + static const Int32 daynum_min_offset = -static_cast(time_zone.getDayNumOffsetEpoch()); + + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from < daynum_min_offset || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type Date32", from); + } + + if (from < daynum_min_offset) + return daynum_min_offset; + + return (from < DATE_LUT_MAX_EXTEND_DAY_NUM) + ? static_cast(from) + : time_zone.toDayNum(std::min(time_t(Int64(from)), time_t(MAX_DATETIME64_TIMESTAMP))); + } +}; + +template +struct ToDate32Transform8Or16Signed +{ + static constexpr auto name = "toDate32"; + + static NO_SANITIZE_UNDEFINED Int32 execute(const FromType & from, const DateLUTImpl &) + { + return from; + } +}; + +template +struct ToDateTimeTransform64 +{ + static constexpr auto name = "toDateTime"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from > MAX_DATETIME_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); + } + return static_cast(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); + } +}; + +template +struct ToDateTimeTransformSigned +{ + static constexpr auto name = "toDateTime"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + { + if (from < 0) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); + else + return 0; + } + return from; + } +}; + +template +struct ToDateTimeTransform64Signed +{ + static constexpr auto name = "toDateTime"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from < 0 || from > MAX_DATETIME_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); + } + + if (from < 0) + return 0; + return static_cast(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); + } +}; + +/** Conversion of numeric to DateTime64 + */ + +template +struct ToDateTime64TransformUnsigned +{ + static constexpr auto name = "toDateTime64"; + + const DateTime64::NativeType scale_multiplier; + + ToDateTime64TransformUnsigned(UInt32 scale) /// NOLINT + : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) + {} + + NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); + else + return DecimalUtils::decimalFromComponentsWithMultiplier(from, 0, scale_multiplier); + } + else + return DecimalUtils::decimalFromComponentsWithMultiplier(std::min(from, MAX_DATETIME64_TIMESTAMP), 0, scale_multiplier); + } +}; + +template +struct ToDateTime64TransformSigned +{ + static constexpr auto name = "toDateTime64"; + + const DateTime64::NativeType scale_multiplier; + + ToDateTime64TransformSigned(UInt32 scale) /// NOLINT + : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) + {} + + NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from < MIN_DATETIME64_TIMESTAMP || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); + } + from = static_cast(std::max(from, MIN_DATETIME64_TIMESTAMP)); + from = static_cast(std::min(from, MAX_DATETIME64_TIMESTAMP)); + + return DecimalUtils::decimalFromComponentsWithMultiplier(from, 0, scale_multiplier); + } +}; + +template +struct ToDateTime64TransformFloat +{ + static constexpr auto name = "toDateTime64"; + + const UInt32 scale; + + ToDateTime64TransformFloat(UInt32 scale_) /// NOLINT + : scale(scale_) + {} + + NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from < MIN_DATETIME64_TIMESTAMP || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); + } + + from = std::max(from, static_cast(MIN_DATETIME64_TIMESTAMP)); + from = std::min(from, static_cast(MAX_DATETIME64_TIMESTAMP)); + return convertToDecimal(from, scale); + } +}; + +struct ToDateTime64Transform +{ + static constexpr auto name = "toDateTime64"; + + const DateTime64::NativeType scale_multiplier; + + ToDateTime64Transform(UInt32 scale) /// NOLINT + : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) + {} + + DateTime64::NativeType execute(UInt16 d, const DateLUTImpl & time_zone) const + { + const auto dt = ToDateTimeImpl<>::execute(d, time_zone); + return execute(dt, time_zone); + } + + DateTime64::NativeType execute(Int32 d, const DateLUTImpl & time_zone) const + { + Int64 dt = static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); + return DecimalUtils::decimalFromComponentsWithMultiplier(dt, 0, scale_multiplier); + } + + DateTime64::NativeType execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) const + { + return DecimalUtils::decimalFromComponentsWithMultiplier(dt, 0, scale_multiplier); + } +}; + +/** Transformation of numbers, dates, datetimes to strings: through formatting. + */ +template +struct FormatImpl +{ + template + static ReturnType execute(const typename DataType::FieldType x, WriteBuffer & wb, const DataType *, const DateLUTImpl *) + { + writeText(x, wb); + return ReturnType(true); + } +}; + +template <> +struct FormatImpl +{ + template + static ReturnType execute(const DataTypeDate::FieldType x, WriteBuffer & wb, const DataTypeDate *, const DateLUTImpl * time_zone) + { + writeDateText(DayNum(x), wb, *time_zone); + return ReturnType(true); + } +}; + +template <> +struct FormatImpl +{ + template + static ReturnType execute(const DataTypeDate32::FieldType x, WriteBuffer & wb, const DataTypeDate32 *, const DateLUTImpl * time_zone) + { + writeDateText(ExtendedDayNum(x), wb, *time_zone); + return ReturnType(true); + } +}; + +template <> +struct FormatImpl +{ + template + static ReturnType execute(const DataTypeDateTime::FieldType x, WriteBuffer & wb, const DataTypeDateTime *, const DateLUTImpl * time_zone) + { + writeDateTimeText(x, wb, *time_zone); + return ReturnType(true); + } +}; + +template <> +struct FormatImpl +{ + template + static ReturnType execute(const DataTypeDateTime64::FieldType x, WriteBuffer & wb, const DataTypeDateTime64 * type, const DateLUTImpl * time_zone) + { + writeDateTimeText(DateTime64(x), type->getScale(), wb, *time_zone); + return ReturnType(true); + } +}; + + +template +struct FormatImpl> +{ + template + static ReturnType execute(const FieldType x, WriteBuffer & wb, const DataTypeEnum * type, const DateLUTImpl *) + { + static constexpr bool throw_exception = std::is_same_v; + + if constexpr (throw_exception) + { + writeString(type->getNameForValue(x), wb); + } + else + { + StringRef res; + bool is_ok = type->getNameForValue(x, res); + if (is_ok) + writeString(res, wb); + return ReturnType(is_ok); + } + } +}; + +template +struct FormatImpl> +{ + template + static ReturnType execute(const FieldType x, WriteBuffer & wb, const DataTypeDecimal * type, const DateLUTImpl *) + { + writeText(x, type->getScale(), wb, false); + return ReturnType(true); + } +}; + +ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col) +{ + ColumnUInt8::MutablePtr null_map = nullptr; + if (const auto * col_nullable = checkAndGetColumn(col.get())) + { + null_map = ColumnUInt8::create(); + null_map->insertRangeFrom(col_nullable->getNullMapColumn(), 0, col_nullable->size()); + } + return null_map; +} + + +/// Generic conversion of any type to String or FixedString via serialization to text. +template +struct ConvertImplGenericToString +{ + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) + { + static_assert(std::is_same_v || std::is_same_v, + "Can be used only to serialize to ColumnString or ColumnFixedString"); + + ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); + + const auto & col_with_type_and_name = columnGetNested(arguments[0]); + const IDataType & type = *col_with_type_and_name.type; + const IColumn & col_from = *col_with_type_and_name.column; + + size_t size = col_from.size(); + auto col_to = removeNullable(result_type)->createColumn(); + + { + ColumnStringHelpers::WriteHelper write_helper( + assert_cast(*col_to), + size); + + auto & write_buffer = write_helper.getWriteBuffer(); + + FormatSettings format_settings; + auto serialization = type.getDefaultSerialization(); + for (size_t row = 0; row < size; ++row) + { + serialization->serializeText(col_from, row, write_buffer, format_settings); + write_helper.rowWritten(); + } + + write_helper.finalize(); + } + + if (result_type->isNullable() && null_map) + return ColumnNullable::create(std::move(col_to), std::move(null_map)); + return col_to; + } +}; + +/** Conversion of time_t to UInt16, Int32, UInt32 + */ +template +void convertFromTime(typename DataType::FieldType & x, time_t & time) +{ + x = time; +} + +template <> +inline void convertFromTime(DataTypeDateTime::FieldType & x, time_t & time) +{ + if (unlikely(time < 0)) + x = 0; + else if (unlikely(time > MAX_DATETIME_TIMESTAMP)) + x = MAX_DATETIME_TIMESTAMP; + else + x = static_cast(time); +} + +/** Conversion of strings to numbers, dates, datetimes: through parsing. + */ +template +void parseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool precise_float_parsing) +{ + if constexpr (std::is_floating_point_v) + { + if (precise_float_parsing) + readFloatTextPrecise(x, rb); + else + readFloatTextFast(x, rb); + } + else + readText(x, rb); +} + +template <> +inline void parseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) +{ + DayNum tmp(0); + readDateText(tmp, rb, *time_zone); + x = tmp; +} + +template <> +inline void parseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) +{ + ExtendedDayNum tmp(0); + readDateText(tmp, rb, *time_zone); + x = tmp; +} + + +// NOTE: no need of extra overload of DateTime64, since readDateTimeText64 has different signature and that case is explicitly handled in the calling code. +template <> +inline void parseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) +{ + time_t time = 0; + readDateTimeText(time, rb, *time_zone); + convertFromTime(x, time); +} + +template <> +inline void parseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) +{ + UUID tmp; + readUUIDText(tmp, rb); + x = tmp.toUnderType(); +} + +template <> +inline void parseImpl(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) +{ + IPv4 tmp; + readIPv4Text(tmp, rb); + x = tmp.toUnderType(); +} + +template <> +inline void parseImpl(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) +{ + IPv6 tmp; + readIPv6Text(tmp, rb); + x = tmp; +} + +template +bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool precise_float_parsing) +{ + if constexpr (std::is_floating_point_v) + { + if (precise_float_parsing) + return tryReadFloatTextPrecise(x, rb); + else + return tryReadFloatTextFast(x, rb); + } + else /*if constexpr (is_integer_v)*/ + return tryReadIntText(x, rb); +} + +template <> +inline bool tryParseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) +{ + DayNum tmp(0); + if (!tryReadDateText(tmp, rb, *time_zone)) + return false; + x = tmp; + return true; +} + +template <> +inline bool tryParseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) +{ + ExtendedDayNum tmp(0); + if (!tryReadDateText(tmp, rb, *time_zone)) + return false; + x = tmp; + return true; +} + +template <> +inline bool tryParseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) +{ + time_t time = 0; + if (!tryReadDateTimeText(time, rb, *time_zone)) + return false; + convertFromTime(x, time); + return true; +} + +template <> +inline bool tryParseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) +{ + UUID tmp; + if (!tryReadUUIDText(tmp, rb)) + return false; + + x = tmp.toUnderType(); + return true; +} + +template <> +inline bool tryParseImpl(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) +{ + IPv4 tmp; + if (!tryReadIPv4Text(tmp, rb)) + return false; + + x = tmp.toUnderType(); + return true; +} + +template <> +inline bool tryParseImpl(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) +{ + IPv6 tmp; + if (!tryReadIPv6Text(tmp, rb)) + return false; + + x = tmp; + return true; +} + + +/** Throw exception with verbose message when string value is not parsed completely. + */ +[[noreturn]] inline void throwExceptionForIncompletelyParsedValue(ReadBuffer & read_buffer, const IDataType & result_type) +{ + WriteBufferFromOwnString message_buf; + message_buf << "Cannot parse string " << quote << String(read_buffer.buffer().begin(), read_buffer.buffer().size()) + << " as " << result_type.getName() + << ": syntax error"; + + if (read_buffer.offset()) + message_buf << " at position " << read_buffer.offset() + << " (parsed just " << quote << String(read_buffer.buffer().begin(), read_buffer.offset()) << ")"; + else + message_buf << " at begin of string"; + + // Currently there are no functions toIPv{4,6}Or{Null,Zero} + if (isNativeNumber(result_type) && !(result_type.getName() == "IPv4" || result_type.getName() == "IPv6")) + message_buf << ". Note: there are to" << result_type.getName() << "OrZero and to" << result_type.getName() << "OrNull functions, which returns zero/NULL instead of throwing exception."; + + throw Exception(PreformattedMessage{message_buf.str(), "Cannot parse string {} as {}: syntax error {}"}, ErrorCodes::CANNOT_PARSE_TEXT); +} + + +enum class ConvertFromStringExceptionMode +{ + Throw, /// Throw exception if value cannot be parsed. + Zero, /// Fill with zero or default if value cannot be parsed. + Null /// Return ColumnNullable with NULLs when value cannot be parsed. +}; + +enum class ConvertFromStringParsingMode +{ + Normal, + BestEffort, /// Only applicable for DateTime. Will use sophisticated method, that is slower. + BestEffortUS +}; + +template +struct ConvertThroughParsing +{ + static_assert(std::is_same_v || std::is_same_v, + "ConvertThroughParsing is only applicable for String or FixedString data types"); + + static constexpr bool to_datetime64 = std::is_same_v; + + static bool isAllRead(ReadBuffer & in) + { + /// In case of FixedString, skip zero bytes at end. + if constexpr (std::is_same_v) + while (!in.eof() && *in.position() == 0) + ++in.position(); + + if (in.eof()) + return true; + + /// Special case, that allows to parse string with DateTime or DateTime64 as Date or Date32. + if constexpr (std::is_same_v || std::is_same_v) + { + if (!in.eof() && (*in.position() == ' ' || *in.position() == 'T')) + { + if (in.buffer().size() == strlen("YYYY-MM-DD hh:mm:ss")) + return true; + + if (in.buffer().size() >= strlen("YYYY-MM-DD hh:mm:ss.x") + && in.buffer().begin()[19] == '.') + { + in.position() = in.buffer().begin() + 20; + + while (!in.eof() && isNumericASCII(*in.position())) + ++in.position(); + + if (in.eof()) + return true; + } + } + } + + return false; + } + + template + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, size_t input_rows_count, + Additions additions [[maybe_unused]] = Additions()) + { + using ColVecTo = typename ToDataType::ColumnType; + + const DateLUTImpl * local_time_zone [[maybe_unused]] = nullptr; + const DateLUTImpl * utc_time_zone [[maybe_unused]] = nullptr; + + /// For conversion to Date or DateTime type, second argument with time zone could be specified. + if constexpr (std::is_same_v || to_datetime64) + { + const auto result_type = removeNullable(res_type); + // Time zone is already figured out during result type resolution, no need to do it here. + if (const auto dt_col = checkAndGetDataType(result_type.get())) + local_time_zone = &dt_col->getTimeZone(); + else + local_time_zone = &extractTimeZoneFromFunctionArguments(arguments, 1, 0); + + if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort || parsing_mode == ConvertFromStringParsingMode::BestEffortUS) + utc_time_zone = &DateLUT::instance("UTC"); + } + else if constexpr (std::is_same_v || std::is_same_v) + { + // Timezone is more or less dummy when parsing Date/Date32 from string. + local_time_zone = &DateLUT::instance(); + utc_time_zone = &DateLUT::instance("UTC"); + } + + const IColumn * col_from = arguments[0].column.get(); + const ColumnString * col_from_string = checkAndGetColumn(col_from); + const ColumnFixedString * col_from_fixed_string = checkAndGetColumn(col_from); + + if (std::is_same_v && !col_from_string) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + col_from->getName(), Name::name); + + if (std::is_same_v && !col_from_fixed_string) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + col_from->getName(), Name::name); + + size_t size = input_rows_count; + typename ColVecTo::MutablePtr col_to = nullptr; + + if constexpr (IsDataTypeDecimal) + { + UInt32 scale = additions; + if constexpr (to_datetime64) + { + ToDataType check_bounds_in_ctor(scale, local_time_zone ? local_time_zone->getTimeZone() : String{}); + } + else + { + ToDataType check_bounds_in_ctor(ToDataType::maxPrecision(), scale); + } + col_to = ColVecTo::create(size, scale); + } + else + col_to = ColVecTo::create(size); + + typename ColVecTo::Container & vec_to = col_to->getData(); + + ColumnUInt8::MutablePtr col_null_map_to; + ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr; + if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) + { + col_null_map_to = ColumnUInt8::create(size); + vec_null_map_to = &col_null_map_to->getData(); + } + + const ColumnString::Chars * chars = nullptr; + const IColumn::Offsets * offsets = nullptr; + size_t fixed_string_size = 0; + + if constexpr (std::is_same_v) + { + chars = &col_from_string->getChars(); + offsets = &col_from_string->getOffsets(); + } + else + { + chars = &col_from_fixed_string->getChars(); + fixed_string_size = col_from_fixed_string->getN(); + } + + size_t current_offset = 0; + + bool precise_float_parsing = false; + + if (DB::CurrentThread::isInitialized()) + { + const DB::ContextPtr query_context = DB::CurrentThread::get().getQueryContext(); + + if (query_context) + precise_float_parsing = query_context->getSettingsRef().precise_float_parsing; + } + + for (size_t i = 0; i < size; ++i) + { + size_t next_offset = std::is_same_v ? (*offsets)[i] : (current_offset + fixed_string_size); + size_t string_size = std::is_same_v ? next_offset - current_offset - 1 : fixed_string_size; + + ReadBufferFromMemory read_buffer(chars->data() + current_offset, string_size); + + if constexpr (exception_mode == ConvertFromStringExceptionMode::Throw) + { + if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort) + { + if constexpr (to_datetime64) + { + DateTime64 res = 0; + parseDateTime64BestEffort(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); + vec_to[i] = res; + } + else + { + time_t res; + parseDateTimeBestEffort(res, read_buffer, *local_time_zone, *utc_time_zone); + convertFromTime(vec_to[i], res); + } + } + else if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffortUS) + { + if constexpr (to_datetime64) + { + DateTime64 res = 0; + parseDateTime64BestEffortUS(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); + vec_to[i] = res; + } + else + { + time_t res; + parseDateTimeBestEffortUS(res, read_buffer, *local_time_zone, *utc_time_zone); + convertFromTime(vec_to[i], res); + } + } + else + { + if constexpr (to_datetime64) + { + DateTime64 value = 0; + readDateTime64Text(value, col_to->getScale(), read_buffer, *local_time_zone); + vec_to[i] = value; + } + else if constexpr (IsDataTypeDecimal) + { + SerializationDecimal::readText( + vec_to[i], read_buffer, ToDataType::maxPrecision(), col_to->getScale()); + } + else + { + /// we want to utilize constexpr condition here, which is not mixable with value comparison + do + { + if constexpr (std::is_same_v && std::is_same_v) + { + if (fixed_string_size == IPV6_BINARY_LENGTH) + { + readBinary(vec_to[i], read_buffer); + break; + } + } + parseImpl(vec_to[i], read_buffer, local_time_zone, precise_float_parsing); + } while (false); + } + } + + if (!isAllRead(read_buffer)) + throwExceptionForIncompletelyParsedValue(read_buffer, *res_type); + } + else + { + bool parsed; + + if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort) + { + if constexpr (to_datetime64) + { + DateTime64 res = 0; + parsed = tryParseDateTime64BestEffort(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); + vec_to[i] = res; + } + else + { + time_t res; + parsed = tryParseDateTimeBestEffort(res, read_buffer, *local_time_zone, *utc_time_zone); + convertFromTime(vec_to[i],res); + } + } + else if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffortUS) + { + if constexpr (to_datetime64) + { + DateTime64 res = 0; + parsed = tryParseDateTime64BestEffortUS(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); + vec_to[i] = res; + } + else + { + time_t res; + parsed = tryParseDateTimeBestEffortUS(res, read_buffer, *local_time_zone, *utc_time_zone); + convertFromTime(vec_to[i],res); + } + } + else + { + if constexpr (to_datetime64) + { + DateTime64 value = 0; + parsed = tryReadDateTime64Text(value, col_to->getScale(), read_buffer, *local_time_zone); + vec_to[i] = value; + } + else if constexpr (IsDataTypeDecimal) + { + parsed = SerializationDecimal::tryReadText( + vec_to[i], read_buffer, ToDataType::maxPrecision(), col_to->getScale()); + } + else if (std::is_same_v && std::is_same_v + && fixed_string_size == IPV6_BINARY_LENGTH) + { + readBinary(vec_to[i], read_buffer); + parsed = true; + } + else + { + parsed = tryParseImpl(vec_to[i], read_buffer, local_time_zone, precise_float_parsing); + } + } + + if (!isAllRead(read_buffer)) + parsed = false; + + if (!parsed) + { + if constexpr (std::is_same_v) + { + vec_to[i] = -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); + } + else + { + vec_to[i] = static_cast(0); + } + } + + if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) + (*vec_null_map_to)[i] = !parsed; + } + + current_offset = next_offset; + } + + if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) + return ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); + else + return col_to; + } +}; + + +/// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type. +struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; }; + +struct AccurateConvertStrategyAdditions +{ + UInt32 scale { 0 }; +}; + +struct AccurateOrNullConvertStrategyAdditions +{ + UInt32 scale { 0 }; +}; + + +struct ConvertDefaultBehaviorTag {}; +struct ConvertReturnNullOnErrorTag {}; +struct ConvertReturnZeroOnErrorTag {}; + +/** Conversion of number types to each other, enums to numbers, dates and datetimes to numbers and back: done by straight assignment. + * (Date is represented internally as number of days from some day; DateTime - as unix timestamp) + */ +template +struct ConvertImpl +{ + template + static ColumnPtr NO_SANITIZE_UNDEFINED execute( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type [[maybe_unused]], size_t input_rows_count, + Additions additions = Additions()) + { + const ColumnWithTypeAndName & named_from = arguments[0]; + + if constexpr ((std::is_same_v && !FromDataType::is_parametric) + || (std::is_same_v && std::is_same_v) + || (std::is_same_v && std::is_same_v)) + { + /// If types are the same, reuse the columns. + /// Conversions between Enum and the underlying type are also free. + return named_from.column; + } + else if constexpr ((std::is_same_v || std::is_same_v) + && std::is_same_v) + { + /// Conversion of DateTime to Date: throw off time component. + /// Conversion of Date32 to Date. + return DateTimeTransformImpl, false>::template execute( + arguments, result_type, input_rows_count); + } + else if constexpr (std::is_same_v && std::is_same_v) + { + /// Conversion of DateTime to Date: throw off time component. + return DateTimeTransformImpl::template execute( + arguments, result_type, input_rows_count); + } + else if constexpr ((std::is_same_v || std::is_same_v) + && std::is_same_v) + { + /// Conversion from Date/Date32 to DateTime. + return DateTimeTransformImpl, false>::template execute( + arguments, result_type, input_rows_count); + } + else if constexpr (std::is_same_v && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::template execute( + arguments, result_type, input_rows_count, additions); + } + /** Special case of converting Int8, Int16, (U)Int32 or (U)Int64 (and also, for convenience, + * Float32, Float64) to Date. If the + * number is less than 65536, then it is treated as DayNum, and if it's greater or equals to 65536, + * then treated as unix timestamp. If the number exceeds UInt32, saturate to MAX_UINT32 then as DayNum. + * It's a bit illogical, as we actually have two functions in one. + * But allows to support frequent case, + * when user write toDate(UInt32), expecting conversion of unix timestamp to Date. + * (otherwise such usage would be frequent mistake). + */ + else if constexpr (( + std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::template execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::template execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::template execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::template execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::template execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::template execute( + arguments, result_type, input_rows_count); + } + /// Special case of converting Int8, Int16, Int32 or (U)Int64 (and also, for convenience, Float32, Float64) to DateTime. + else if constexpr (( + std::is_same_v + || std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::template execute( + arguments, result_type, input_rows_count); + } + else if constexpr (std::is_same_v + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::template execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::template execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::template execute( + arguments, result_type, input_rows_count, additions); + } + else if constexpr (std::is_same_v + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::template execute( + arguments, result_type, input_rows_count, additions); + } + else if constexpr (( + std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::template execute( + arguments, result_type, input_rows_count, additions); + } + /// Conversion of DateTime64 to Date or DateTime: discards fractional part. + else if constexpr (std::is_same_v + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl>, false>::template execute( + arguments, result_type, input_rows_count, additions); + } + else if constexpr (std::is_same_v + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl>, false>::template execute( + arguments, result_type, input_rows_count, additions); + } + /// Conversion of Date or DateTime to DateTime64: add zero sub-second part. + else if constexpr (( + std::is_same_v + || std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl::template execute( + arguments, result_type, input_rows_count, additions); + } + else if constexpr (IsDataTypeDateOrDateTime + && std::is_same_v) + { + /// Date or DateTime to String + + using FromFieldType = typename FromDataType::FieldType; + using ColVecType = ColumnVectorOrDecimal; + + auto datetime_arg = arguments[0]; + + const DateLUTImpl * time_zone = nullptr; + const ColumnConst * time_zone_column = nullptr; + + if (arguments.size() == 1) + { + auto non_null_args = createBlockWithNestedColumns(arguments); + time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0); + } + else /// When we have a column for timezone + { + datetime_arg.column = datetime_arg.column->convertToFullColumnIfConst(); + + if constexpr (std::is_same_v || std::is_same_v) + time_zone = &DateLUT::instance(); + /// For argument of Date or DateTime type, second argument with time zone could be specified. + if constexpr (std::is_same_v || std::is_same_v) + { + if ((time_zone_column = checkAndGetColumnConst(arguments[1].column.get()))) + { + auto non_null_args = createBlockWithNestedColumns(arguments); + time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0); + } + } + } + const auto & col_with_type_and_name = columnGetNested(datetime_arg); + + if (const auto col_from = checkAndGetColumn(col_with_type_and_name.column.get())) + { + auto col_to = ColumnString::create(); + + const typename ColVecType::Container & vec_from = col_from->getData(); + ColumnString::Chars & data_to = col_to->getChars(); + ColumnString::Offsets & offsets_to = col_to->getOffsets(); + size_t size = vec_from.size(); + + if constexpr (std::is_same_v) + data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); + else if constexpr (std::is_same_v) + data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); + else if constexpr (std::is_same_v) + data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss") + 1)); + else if constexpr (std::is_same_v) + data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss.") + col_from->getScale() + 1)); + else + data_to.resize(size * 3); /// Arbitrary + + offsets_to.resize(size); + + WriteBufferFromVector write_buffer(data_to); + const auto & type = static_cast(*col_with_type_and_name.type); + + ColumnUInt8::MutablePtr null_map = copyNullMap(datetime_arg.column); + + if (!null_map && arguments.size() > 1) + null_map = copyNullMap(arguments[1].column->convertToFullColumnIfConst()); + + if (null_map) + { + for (size_t i = 0; i < size; ++i) + { + if (!time_zone_column && arguments.size() > 1) + { + if (!arguments[1].column.get()->getDataAt(i).toString().empty()) + time_zone = &DateLUT::instance(arguments[1].column.get()->getDataAt(i).toString()); + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty"); + } + bool is_ok = FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); + null_map->getData()[i] |= !is_ok; + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } + else + { + for (size_t i = 0; i < size; ++i) + { + if (!time_zone_column && arguments.size() > 1) + { + if (!arguments[1].column.get()->getDataAt(i).toString().empty()) + time_zone = &DateLUT::instance(arguments[1].column.get()->getDataAt(i).toString()); + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty"); + } + FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } + + write_buffer.finalize(); + + if (null_map) + return ColumnNullable::create(std::move(col_to), std::move(null_map)); + return col_to; + } + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), Name::name); + } + /// Conversion from FixedString to String. + /// Cutting sequences of zero bytes from end of strings. + else if constexpr (std::is_same_v + && std::is_same_v + && std::is_same_v) + { + ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); + const auto & nested = columnGetNested(arguments[0]); + if (const ColumnFixedString * col_from = checkAndGetColumn(nested.column.get())) + { + auto col_to = ColumnString::create(); + + const ColumnFixedString::Chars & data_from = col_from->getChars(); + ColumnString::Chars & data_to = col_to->getChars(); + ColumnString::Offsets & offsets_to = col_to->getOffsets(); + size_t size = col_from->size(); + size_t n = col_from->getN(); + data_to.resize(size * (n + 1)); /// + 1 - zero terminator + offsets_to.resize(size); + + size_t offset_from = 0; + size_t offset_to = 0; + for (size_t i = 0; i < size; ++i) + { + if (!null_map || !null_map->getData()[i]) + { + size_t bytes_to_copy = n; + while (bytes_to_copy > 0 && data_from[offset_from + bytes_to_copy - 1] == 0) + --bytes_to_copy; + + memcpy(&data_to[offset_to], &data_from[offset_from], bytes_to_copy); + offset_to += bytes_to_copy; + } + data_to[offset_to] = 0; + ++offset_to; + offsets_to[i] = offset_to; + offset_from += n; + } + + data_to.resize(offset_to); + if (result_type->isNullable() && null_map) + return ColumnNullable::create(std::move(col_to), std::move(null_map)); + return col_to; + } + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), Name::name); + } + else if constexpr (std::is_same_v + && std::is_same_v) + { + /// Anything else to String. + + using FromFieldType = typename FromDataType::FieldType; + using ColVecType = ColumnVectorOrDecimal; + + ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); + + const auto & col_with_type_and_name = columnGetNested(arguments[0]); + const auto & type = static_cast(*col_with_type_and_name.type); + + if (const auto col_from = checkAndGetColumn(col_with_type_and_name.column.get())) + { + auto col_to = ColumnString::create(); + + const typename ColVecType::Container & vec_from = col_from->getData(); + ColumnString::Chars & data_to = col_to->getChars(); + ColumnString::Offsets & offsets_to = col_to->getOffsets(); + size_t size = vec_from.size(); + + data_to.resize(size * 3); + offsets_to.resize(size); + + WriteBufferFromVector write_buffer(data_to); + + if (null_map) + { + for (size_t i = 0; i < size; ++i) + { + bool is_ok = FormatImpl::template execute(vec_from[i], write_buffer, &type, nullptr); + /// We don't use timezones in this branch + null_map->getData()[i] |= !is_ok; + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } + else + { + for (size_t i = 0; i < size; ++i) + { + FormatImpl::template execute(vec_from[i], write_buffer, &type, nullptr); + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } + + write_buffer.finalize(); + + if (null_map) + return ColumnNullable::create(std::move(col_to), std::move(null_map)); + return col_to; + } + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), Name::name); + } + else if constexpr (std::is_same_v + && std::is_same_v + && std::is_same_v) + { + return ConvertImpl::template execute( + arguments, result_type, input_rows_count); + } + else if constexpr ((std::is_same_v || std::is_same_v) + && std::is_same_v) + { + return ConvertThroughParsing::execute( + arguments, result_type, input_rows_count, additions); + } + else if constexpr ((std::is_same_v || std::is_same_v) + && std::is_same_v) + { + return ConvertThroughParsing::execute( + arguments, result_type, input_rows_count, additions); + } + else if constexpr ((std::is_same_v || std::is_same_v) + && is_any_of + && std::is_same_v) + { + return ConvertThroughParsing::execute( + arguments, result_type, input_rows_count, additions); + } + else + { + using FromFieldType = typename FromDataType::FieldType; + using ToFieldType = typename ToDataType::FieldType; + using ColVecFrom = typename FromDataType::ColumnType; + using ColVecTo = typename ToDataType::ColumnType; + + if constexpr ((IsDataTypeDecimal || IsDataTypeDecimal) + && !(std::is_same_v || std::is_same_v) + && (!IsDataTypeDecimalOrNumber || !IsDataTypeDecimalOrNumber)) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + named_from.column->getName(), Name::name); + } + + const ColVecFrom * col_from = checkAndGetColumn(named_from.column.get()); + if (!col_from) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + named_from.column->getName(), Name::name); + + typename ColVecTo::MutablePtr col_to = nullptr; + + if constexpr (IsDataTypeDecimal) + { + UInt32 scale; + + if constexpr (std::is_same_v + || std::is_same_v) + { + scale = additions.scale; + } + else + { + scale = additions; + } + + col_to = ColVecTo::create(0, scale); + } + else + col_to = ColVecTo::create(); + + const auto & vec_from = col_from->getData(); + auto & vec_to = col_to->getData(); + vec_to.resize(input_rows_count); + + ColumnUInt8::MutablePtr col_null_map_to; + ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr; + if constexpr (std::is_same_v) + { + col_null_map_to = ColumnUInt8::create(input_rows_count, false); + vec_null_map_to = &col_null_map_to->getData(); + } + + bool result_is_bool = isBool(result_type); + for (size_t i = 0; i < input_rows_count; ++i) + { + if constexpr (std::is_same_v) + { + if (result_is_bool) + { + vec_to[i] = vec_from[i] != FromFieldType(0); + continue; + } + } + + if constexpr (std::is_same_v && std::is_same_v) + { + static_assert( + std::is_same_v, + "UInt128 and UUID types must be same"); + + vec_to[i].items[1] = vec_from[i].toUnderType().items[0]; + vec_to[i].items[0] = vec_from[i].toUnderType().items[1]; + } + else if constexpr (std::is_same_v && std::is_same_v) + { + static_assert( + std::is_same_v, + "UInt128 and IPv6 types must be same"); + + vec_to[i].items[1] = std::byteswap(vec_from[i].toUnderType().items[0]); + vec_to[i].items[0] = std::byteswap(vec_from[i].toUnderType().items[1]); + } + else if constexpr (std::is_same_v != std::is_same_v) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Conversion between numeric types and UUID is not supported. " + "Probably the passed UUID is unquoted"); + } + else if constexpr ( + (std::is_same_v != std::is_same_v) + && !(is_any_of + || is_any_of)) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Conversion from {} to {} is not supported", + TypeName, TypeName); + } + else if constexpr (std::is_same_v != std::is_same_v + && !(std::is_same_v || std::is_same_v)) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Conversion between numeric types and IPv6 is not supported. " + "Probably the passed IPv6 is unquoted"); + } + else if constexpr (IsDataTypeDecimal || IsDataTypeDecimal) + { + if constexpr (std::is_same_v) + { + ToFieldType result; + bool convert_result = false; + + if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) + convert_result = tryConvertDecimals(vec_from[i], col_from->getScale(), col_to->getScale(), result); + else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) + convert_result = tryConvertFromDecimal(vec_from[i], col_from->getScale(), result); + else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) + convert_result = tryConvertToDecimal(vec_from[i], col_to->getScale(), result); + + if (convert_result) + vec_to[i] = result; + else + { + vec_to[i] = static_cast(0); + (*vec_null_map_to)[i] = true; + } + } + else + { + if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) + vec_to[i] = convertDecimals(vec_from[i], col_from->getScale(), col_to->getScale()); + else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) + vec_to[i] = convertFromDecimal(vec_from[i], col_from->getScale()); + else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) + vec_to[i] = convertToDecimal(vec_from[i], col_to->getScale()); + else + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Unsupported data type in conversion function"); + } + } + else if constexpr (std::is_same_v && std::is_same_v) + { + const uint8_t ip4_cidr[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}; + const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); + if (!matchIPv6Subnet(src, ip4_cidr, 96)) + { + char addr[IPV6_MAX_TEXT_LENGTH + 1] {}; + char * paddr = addr; + formatIPv6(src, paddr); + + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "IPv6 {} in column {} is not in IPv4 mapping block", addr, named_from.column->getName()); + } + + uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); + if constexpr (std::endian::native == std::endian::little) + { + dst[0] = src[15]; + dst[1] = src[14]; + dst[2] = src[13]; + dst[3] = src[12]; + } + else + { + dst[0] = src[12]; + dst[1] = src[13]; + dst[2] = src[14]; + dst[3] = src[15]; + } + } + else if constexpr (std::is_same_v && std::is_same_v) + { + const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); + uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); + std::memset(dst, '\0', IPV6_BINARY_LENGTH); + dst[10] = dst[11] = 0xff; + + if constexpr (std::endian::native == std::endian::little) + { + dst[12] = src[3]; + dst[13] = src[2]; + dst[14] = src[1]; + dst[15] = src[0]; + } + else + { + dst[12] = src[0]; + dst[13] = src[1]; + dst[14] = src[2]; + dst[15] = src[3]; + } + } + else if constexpr (std::is_same_v && std::is_same_v) + { + vec_to[i] = static_cast(static_cast(vec_from[i])); + } + else if constexpr (std::is_same_v + && (std::is_same_v || std::is_same_v)) + { + vec_to[i] = static_cast(vec_from[i] * DATE_SECONDS_PER_DAY); + } + else + { + /// If From Data is Nan or Inf and we convert to integer type, throw exception + if constexpr (std::is_floating_point_v && !std::is_floating_point_v) + { + if (!isFinite(vec_from[i])) + { + if constexpr (std::is_same_v) + { + vec_to[i] = 0; + (*vec_null_map_to)[i] = true; + continue; + } + else + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Unexpected inf or nan to integer conversion"); + } + } + + if constexpr (std::is_same_v + || std::is_same_v) + { + bool convert_result = accurate::convertNumeric(vec_from[i], vec_to[i]); + + if (!convert_result) + { + if (std::is_same_v) + { + vec_to[i] = 0; + (*vec_null_map_to)[i] = true; + } + else + { + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Value in column {} cannot be safely converted into type {}", + named_from.column->getName(), result_type->getName()); + } + } + } + else + { + vec_to[i] = static_cast(vec_from[i]); + } + } + } + + if constexpr (std::is_same_v) + return ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); + else + return col_to; + } + } +}; + + +/// Generic conversion of any type from String. Used for complex types: Array and Tuple or types with custom serialization. +struct ConvertImplGenericFromString +{ + static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) + { + const IColumn & column_from = *arguments[0].column; + const IDataType & data_type_to = *result_type; + auto res = data_type_to.createColumn(); + auto serialization = data_type_to.getDefaultSerialization(); + const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; + + executeImpl(column_from, *res, *serialization, input_rows_count, null_map, result_type.get()); + return res; + } + + static void executeImpl( + const IColumn & column_from, + IColumn & column_to, + const ISerialization & serialization_from, + size_t input_rows_count, + const PaddedPODArray * null_map, + const IDataType * result_type) + { + column_to.reserve(input_rows_count); + + FormatSettings format_settings; + for (size_t i = 0; i < input_rows_count; ++i) + { + if (null_map && (*null_map)[i]) + { + column_to.insertDefault(); + continue; + } + + const auto & val = column_from.getDataAt(i); + ReadBufferFromMemory read_buffer(val.data, val.size); + try + { + serialization_from.deserializeWholeText(column_to, read_buffer, format_settings); + } + catch (const Exception & e) + { + auto * nullable_column = typeid_cast(&column_to); + if (e.code() == ErrorCodes::CANNOT_PARSE_BOOL && nullable_column) + { + auto & col_nullmap = nullable_column->getNullMapData(); + if (col_nullmap.size() != nullable_column->size()) + col_nullmap.resize_fill(nullable_column->size()); + if (nullable_column->size() == (i + 1)) + nullable_column->popBack(1); + nullable_column->insertDefault(); + continue; + } + throw; + } + + if (!read_buffer.eof()) + { + if (result_type) + throwExceptionForIncompletelyParsedValue(read_buffer, *result_type); + else + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, + "Cannot parse string to column {}. Expected eof", column_to.getName()); + } + } + } +}; + + +/// Declared early because used below. +struct NameToDate { static constexpr auto name = "toDate"; }; +struct NameToDate32 { static constexpr auto name = "toDate32"; }; +struct NameToDateTime { static constexpr auto name = "toDateTime"; }; +struct NameToDateTime32 { static constexpr auto name = "toDateTime32"; }; +struct NameToDateTime64 { static constexpr auto name = "toDateTime64"; }; +struct NameToString { static constexpr auto name = "toString"; }; +struct NameToDecimal32 { static constexpr auto name = "toDecimal32"; }; +struct NameToDecimal64 { static constexpr auto name = "toDecimal64"; }; +struct NameToDecimal128 { static constexpr auto name = "toDecimal128"; }; +struct NameToDecimal256 { static constexpr auto name = "toDecimal256"; }; + + +#define DEFINE_NAME_TO_INTERVAL(INTERVAL_KIND) \ + struct NameToInterval ## INTERVAL_KIND \ + { \ + static constexpr auto name = "toInterval" #INTERVAL_KIND; \ + static constexpr auto kind = IntervalKind::Kind::INTERVAL_KIND; \ + }; + +DEFINE_NAME_TO_INTERVAL(Nanosecond) +DEFINE_NAME_TO_INTERVAL(Microsecond) +DEFINE_NAME_TO_INTERVAL(Millisecond) +DEFINE_NAME_TO_INTERVAL(Second) +DEFINE_NAME_TO_INTERVAL(Minute) +DEFINE_NAME_TO_INTERVAL(Hour) +DEFINE_NAME_TO_INTERVAL(Day) +DEFINE_NAME_TO_INTERVAL(Week) +DEFINE_NAME_TO_INTERVAL(Month) +DEFINE_NAME_TO_INTERVAL(Quarter) +DEFINE_NAME_TO_INTERVAL(Year) + +#undef DEFINE_NAME_TO_INTERVAL + +struct NameParseDateTimeBestEffort; +struct NameParseDateTimeBestEffortOrZero; +struct NameParseDateTimeBestEffortOrNull; + +template +inline bool isDateTime64(const ColumnsWithTypeAndName & arguments) +{ + if constexpr (std::is_same_v) + return true; + else if constexpr (std::is_same_v || std::is_same_v + || std::is_same_v || std::is_same_v) + { + return (arguments.size() == 2 && isUInt(arguments[1].type)) || arguments.size() == 3; + } + + return false; +} + +template +class FunctionConvert : public IFunction +{ +public: + using Monotonic = MonotonicityImpl; + + static constexpr auto name = Name::name; + static constexpr bool to_datetime64 = std::is_same_v; + static constexpr bool to_decimal = IsDataTypeDecimal && !to_datetime64; + + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionConvert(ContextPtr context_) : context(context_) {} + + String getName() const override + { + return name; + } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return std::is_same_v; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & arguments) const override + { + return !(IsDataTypeDateOrDateTime && isNumber(*arguments[0].type)); + } + + using DefaultReturnTypeGetter = std::function; + static DataTypePtr getReturnTypeDefaultImplementationForNulls(const ColumnsWithTypeAndName & arguments, const DefaultReturnTypeGetter & getter) + { + NullPresence null_presence = getNullPresense(arguments); + + if (null_presence.has_null_constant) + { + return makeNullable(std::make_shared()); + } + if (null_presence.has_nullable) + { + auto nested_columns = Block(createBlockWithNestedColumns(arguments)); + auto return_type = getter(ColumnsWithTypeAndName(nested_columns.begin(), nested_columns.end())); + return makeNullable(return_type); + } + + return getter(arguments); + } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + auto getter = [&] (const auto & args) { return getReturnTypeImplRemovedNullable(args); }; + auto res = getReturnTypeDefaultImplementationForNulls(arguments, getter); + to_nullable = res->isNullable(); + checked_return_type = true; + return res; + } + + DataTypePtr getReturnTypeImplRemovedNullable(const ColumnsWithTypeAndName & arguments) const + { + FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; + FunctionArgumentDescriptors optional_args; + + if constexpr (to_decimal) + { + mandatory_args.push_back({"scale", static_cast(&isNativeInteger), &isColumnConst, "const Integer"}); + } + + if (!to_decimal && isDateTime64(arguments)) + { + mandatory_args.push_back({"scale", static_cast(&isNativeInteger), &isColumnConst, "const Integer"}); + } + + // toString(DateTime or DateTime64, [timezone: String]) + if ((std::is_same_v && !arguments.empty() && (isDateTime64(arguments[0].type) || isDateTime(arguments[0].type))) + // toUnixTimestamp(value[, timezone : String]) + || std::is_same_v + // toDate(value[, timezone : String]) + || std::is_same_v // TODO: shall we allow timestamp argument for toDate? DateTime knows nothing about timezones and this argument is ignored below. + // toDate32(value[, timezone : String]) + || std::is_same_v + // toDateTime(value[, timezone: String]) + || std::is_same_v + // toDateTime64(value, scale : Integer[, timezone: String]) + || std::is_same_v) + { + optional_args.push_back({"timezone", static_cast(&isString), nullptr, "String"}); + } + + validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + + if constexpr (std::is_same_v) + { + return std::make_shared(Name::kind); + } + else if constexpr (to_decimal) + { + UInt64 scale = extractToDecimalScale(arguments[1]); + + if constexpr (std::is_same_v) + return createDecimalMaxPrecision(scale); + else if constexpr (std::is_same_v) + return createDecimalMaxPrecision(scale); + else if constexpr (std::is_same_v) + return createDecimalMaxPrecision(scale); + else if constexpr (std::is_same_v) + return createDecimalMaxPrecision(scale); + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected branch in code of conversion function: it is a bug."); + } + else + { + // Optional second argument with time zone for DateTime. + UInt8 timezone_arg_position = 1; + UInt32 scale [[maybe_unused]] = DataTypeDateTime64::default_scale; + + // DateTime64 requires more arguments: scale and timezone. Since timezone is optional, scale should be first. + if (isDateTime64(arguments)) + { + timezone_arg_position += 1; + scale = static_cast(arguments[1].column->get64(0)); + + if (to_datetime64 || scale != 0) /// toDateTime('xxxx-xx-xx xx:xx:xx', 0) return DateTime + return std::make_shared(scale, + extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, false)); + + return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, false)); + } + + if constexpr (std::is_same_v) + return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, false)); + else if constexpr (std::is_same_v) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected branch in code of conversion function: it is a bug."); + else + return std::make_shared(); + } + } + + /// Function actually uses default implementation for nulls, + /// but we need to know if return type is Nullable or not, + /// so we use checked_return_type only to intercept the first call to getReturnTypeImpl(...). + bool useDefaultImplementationForNulls() const override + { + bool to_nullable_string = to_nullable && std::is_same_v; + return checked_return_type && !to_nullable_string; + } + + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override + { + if constexpr (std::is_same_v) + return {}; + else if constexpr (std::is_same_v) + return {2}; + return {1}; + } + bool canBeExecutedOnDefaultArguments() const override { return false; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + try + { + return executeInternal(arguments, result_type, input_rows_count); + } + catch (Exception & e) + { + /// More convenient error message. + if (e.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF) + { + e.addMessage("Cannot parse " + + result_type->getName() + " from " + + arguments[0].type->getName() + + ", because value is too short"); + } + else if (e.code() == ErrorCodes::CANNOT_PARSE_NUMBER + || e.code() == ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT + || e.code() == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED + || e.code() == ErrorCodes::CANNOT_PARSE_QUOTED_STRING + || e.code() == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE + || e.code() == ErrorCodes::CANNOT_PARSE_DATE + || e.code() == ErrorCodes::CANNOT_PARSE_DATETIME + || e.code() == ErrorCodes::CANNOT_PARSE_UUID + || e.code() == ErrorCodes::CANNOT_PARSE_IPV4 + || e.code() == ErrorCodes::CANNOT_PARSE_IPV6) + { + e.addMessage("Cannot parse " + + result_type->getName() + " from " + + arguments[0].type->getName()); + } + + throw; + } + } + + bool hasInformationAboutMonotonicity() const override + { + return Monotonic::has(); + } + + Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override + { + return Monotonic::get(type, left, right); + } + +private: + ContextPtr context; + mutable bool checked_return_type = false; + mutable bool to_nullable = false; + + ColumnPtr executeInternal(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + { + if (arguments.empty()) + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least 1 argument", getName()); + + if (result_type->onlyNull()) + return result_type->createColumnConstWithDefaultValue(input_rows_count); + + const DataTypePtr from_type = removeNullable(arguments[0].type); + ColumnPtr result_column; + + [[maybe_unused]] FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior; + + if (context) + date_time_overflow_behavior = context->getSettingsRef().date_time_overflow_behavior.value; + + auto call = [&](const auto & types, const auto & tag) -> bool + { + using Types = std::decay_t; + using LeftDataType = typename Types::LeftType; + using RightDataType = typename Types::RightType; + using SpecialTag = std::decay_t; + + if constexpr (IsDataTypeDecimal) + { + if constexpr (std::is_same_v) + { + /// Account for optional timezone argument. + if (arguments.size() != 2 && arguments.size() != 3) + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects 2 or 3 arguments for DataTypeDateTime64.", getName()); + } + else if (arguments.size() != 2) + { + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects 2 arguments for Decimal.", getName()); + } + + const ColumnWithTypeAndName & scale_column = arguments[1]; + UInt32 scale = extractToDecimalScale(scale_column); + + switch (date_time_overflow_behavior) + { + case FormatSettings::DateTimeOverflowBehavior::Throw: + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); + break; + case FormatSettings::DateTimeOverflowBehavior::Ignore: + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); + break; + case FormatSettings::DateTimeOverflowBehavior::Saturate: + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); + break; + } + + } + else if constexpr (IsDataTypeDateOrDateTime && std::is_same_v) + { + const auto * dt64 = assert_cast(arguments[0].type.get()); + switch (date_time_overflow_behavior) + { + case FormatSettings::DateTimeOverflowBehavior::Throw: + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, dt64->getScale()); + break; + case FormatSettings::DateTimeOverflowBehavior::Ignore: + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, dt64->getScale()); + break; + case FormatSettings::DateTimeOverflowBehavior::Saturate: + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, dt64->getScale()); + break; + } + } +#define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE) \ + case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \ + result_column = ConvertImpl::execute( \ + arguments, result_type, input_rows_count); \ + break; + + else if constexpr (IsDataTypeDecimalOrNumber && IsDataTypeDecimalOrNumber) + { + using LeftT = typename LeftDataType::FieldType; + using RightT = typename RightDataType::FieldType; + + static constexpr bool bad_left = + is_decimal || std::is_floating_point_v || is_big_int_v || is_signed_v; + static constexpr bool bad_right = + is_decimal || std::is_floating_point_v || is_big_int_v || is_signed_v; + + /// Disallow int vs UUID conversion (but support int vs UInt128 conversion) + if constexpr ((bad_left && std::is_same_v) || + (bad_right && std::is_same_v)) + { + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Wrong UUID conversion"); + } + else + { + switch (date_time_overflow_behavior) + { + GENERATE_OVERFLOW_MODE_CASE(Throw) + GENERATE_OVERFLOW_MODE_CASE(Ignore) + GENERATE_OVERFLOW_MODE_CASE(Saturate) + } + } + } + else if constexpr ((IsDataTypeNumber || IsDataTypeDateOrDateTime) + && IsDataTypeDateOrDateTime) + { + switch (date_time_overflow_behavior) + { + GENERATE_OVERFLOW_MODE_CASE(Throw) + GENERATE_OVERFLOW_MODE_CASE(Ignore) + GENERATE_OVERFLOW_MODE_CASE(Saturate) + } + } +#undef GENERATE_OVERFLOW_MODE_CASE + else + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count); + + return true; + }; + + if (isDateTime64(arguments)) + { + /// For toDateTime('xxxx-xx-xx xx:xx:xx.00', 2[, 'timezone']) we need to it convert to DateTime64 + const ColumnWithTypeAndName & scale_column = arguments[1]; + UInt32 scale = extractToDecimalScale(scale_column); + + if (to_datetime64 || scale != 0) /// When scale = 0, the data type is DateTime otherwise the data type is DateTime64 + { + if (!callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{})) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[0].type->getName(), getName()); + + return result_column; + } + } + + if constexpr (std::is_same_v) + { + if (from_type->getCustomSerialization()) + return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); + } + + bool done = false; + if constexpr (is_any_of) + { + done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{}); + } + else + { + bool cast_ipv4_ipv6_default_on_conversion_error = false; + if constexpr (is_any_of) + if (context && (cast_ipv4_ipv6_default_on_conversion_error = context->getSettingsRef().cast_ipv4_ipv6_default_on_conversion_error)) + done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertReturnZeroOnErrorTag{}); + + if (!cast_ipv4_ipv6_default_on_conversion_error) + { + /// We should use ConvertFromStringExceptionMode::Null mode when converting from String (or FixedString) + /// to Nullable type, to avoid 'value is too short' error on attempt to parse empty string from NULL values. + if (to_nullable && WhichDataType(from_type).isStringOrFixedString()) + done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertReturnNullOnErrorTag{}); + else + done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{}); + } + } + + if (!done) + { + /// Generic conversion of any type to String. + if (std::is_same_v) + { + return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); + } + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[0].type->getName(), getName()); + } + + return result_column; + } +}; + + +/** Function toTOrZero (where T is number of date or datetime type): + * try to convert from String to type T through parsing, + * if cannot parse, return default value instead of throwing exception. + * Function toTOrNull will return Nullable type with NULL when cannot parse. + * NOTE Also need to implement tryToUnixTimestamp with timezone. + */ +template +class FunctionConvertFromString : public IFunction +{ +public: + static constexpr auto name = Name::name; + static constexpr bool to_datetime64 = std::is_same_v; + static constexpr bool to_decimal = IsDataTypeDecimal && !to_datetime64; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + bool isVariadic() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + + bool useDefaultImplementationForConstants() const override { return true; } + bool canBeExecutedOnDefaultArguments() const override { return false; } + + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + DataTypePtr res; + + if (isDateTime64(arguments)) + { + validateFunctionArgumentTypes(*this, arguments, + FunctionArgumentDescriptors{{"string", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}}, + // optional + FunctionArgumentDescriptors{ + {"precision", static_cast(&isUInt8), isColumnConst, "const UInt8"}, + {"timezone", static_cast(&isStringOrFixedString), isColumnConst, "const String or FixedString"}, + }); + + UInt64 scale = to_datetime64 ? DataTypeDateTime64::default_scale : 0; + if (arguments.size() > 1) + scale = extractToDecimalScale(arguments[1]); + const auto timezone = extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false); + + res = scale == 0 ? res = std::make_shared(timezone) : std::make_shared(scale, timezone); + } + else + { + if ((arguments.size() != 1 && arguments.size() != 2) || (to_decimal && arguments.size() != 2)) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1 or 2. " + "Second argument only make sense for DateTime (time zone, optional) and Decimal (scale).", + getName(), arguments.size()); + + if (!isStringOrFixedString(arguments[0].type)) + { + if (this->getName().find("OrZero") != std::string::npos || + this->getName().find("OrNull") != std::string::npos) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. " + "Conversion functions with postfix 'OrZero' or 'OrNull' should take String argument", + arguments[0].type->getName(), getName()); + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}", + arguments[0].type->getName(), getName()); + } + + if (arguments.size() == 2) + { + if constexpr (std::is_same_v) + { + if (!isString(arguments[1].type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2nd argument of function {}", + arguments[1].type->getName(), getName()); + } + else if constexpr (to_decimal) + { + if (!isInteger(arguments[1].type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2nd argument of function {}", + arguments[1].type->getName(), getName()); + if (!arguments[1].column) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be constant", getName()); + } + else + { + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1. " + "Second argument makes sense only for DateTime and Decimal.", + getName(), arguments.size()); + } + } + + if constexpr (std::is_same_v) + res = std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0, false)); + else if constexpr (std::is_same_v) + throw Exception(ErrorCodes::LOGICAL_ERROR, "MaterializedMySQL is a bug."); + else if constexpr (to_decimal) + { + UInt64 scale = extractToDecimalScale(arguments[1]); + res = createDecimalMaxPrecision(scale); + if (!res) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Something wrong with toDecimalNNOrZero() or toDecimalNNOrNull()"); + } + else + res = std::make_shared(); + } + + if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) + res = std::make_shared(res); + + return res; + } + + template + ColumnPtr executeInternal(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, UInt32 scale) const + { + const IDataType * from_type = arguments[0].type.get(); + + if (checkAndGetDataType(from_type)) + { + return ConvertThroughParsing::execute( + arguments, result_type, input_rows_count, scale); + } + else if (checkAndGetDataType(from_type)) + { + return ConvertThroughParsing::execute( + arguments, result_type, input_rows_count, scale); + } + + return nullptr; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + ColumnPtr result_column; + + if constexpr (to_decimal) + { + result_column = executeInternal(arguments, result_type, input_rows_count, + assert_cast(*removeNullable(result_type)).getScale()); + } + else if (isDateTime64(arguments)) + { + UInt64 scale = to_datetime64 ? DataTypeDateTime64::default_scale : 0; + if (arguments.size() > 1) + scale = extractToDecimalScale(arguments[1]); + + if (scale == 0) + { + result_column = executeInternal(arguments, result_type, input_rows_count, 0); + } + else + { + result_column = executeInternal(arguments, result_type, input_rows_count, static_cast(scale)); + } + } + else + { + result_column = executeInternal(arguments, result_type, input_rows_count, 0); + } + + if (!result_column) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " + "Only String or FixedString argument is accepted for try-conversion function. For other arguments, " + "use function without 'orZero' or 'orNull'.", arguments[0].type->getName(), getName()); + + return result_column; + } +}; + + +/// Monotonicity. + +struct PositiveMonotonicity +{ + static bool has() { return true; } + static IFunction::Monotonicity get(const IDataType &, const Field &, const Field &) + { + return { .is_monotonic = true }; + } +}; + +struct UnknownMonotonicity +{ + static bool has() { return false; } + static IFunction::Monotonicity get(const IDataType &, const Field &, const Field &) + { + return { }; + } +}; + +template +struct ToNumberMonotonicity +{ + static bool has() { return true; } + + static UInt64 divideByRangeOfType(UInt64 x) + { + if constexpr (sizeof(T) < sizeof(UInt64)) + return x >> (sizeof(T) * 8); + else + return 0; + } + + static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right) + { + if (!type.isValueRepresentedByNumber()) + return {}; + + /// If type is same, the conversion is always monotonic. + /// (Enum has separate case, because it is different data type) + if (checkAndGetDataType>(&type) || + checkAndGetDataType>(&type)) + return { .is_monotonic = true, .is_always_monotonic = true }; + + /// Float cases. + + /// When converting to Float, the conversion is always monotonic. + if constexpr (std::is_floating_point_v) + return { .is_monotonic = true, .is_always_monotonic = true }; + + const auto * low_cardinality = typeid_cast(&type); + const IDataType * low_cardinality_dictionary_type = nullptr; + if (low_cardinality) + low_cardinality_dictionary_type = low_cardinality->getDictionaryType().get(); + + WhichDataType which_type(type); + WhichDataType which_inner_type = low_cardinality + ? WhichDataType(low_cardinality_dictionary_type) + : WhichDataType(type); + + /// If converting from Float, for monotonicity, arguments must fit in range of result type. + if (which_inner_type.isFloat()) + { + if (left.isNull() || right.isNull()) + return {}; + + Float64 left_float = left.get(); + Float64 right_float = right.get(); + + if (left_float >= static_cast(std::numeric_limits::min()) + && left_float <= static_cast(std::numeric_limits::max()) + && right_float >= static_cast(std::numeric_limits::min()) + && right_float <= static_cast(std::numeric_limits::max())) + return { .is_monotonic = true }; + + return {}; + } + + /// Integer cases. + + /// Only support types represented by native integers. + /// It can be extended to big integers, decimals and DateTime64 later. + /// By the way, NULLs are representing unbounded ranges. + if (!((left.isNull() || left.getType() == Field::Types::UInt64 || left.getType() == Field::Types::Int64) + && (right.isNull() || right.getType() == Field::Types::UInt64 || right.getType() == Field::Types::Int64))) + return {}; + + const bool from_is_unsigned = type.isValueRepresentedByUnsignedInteger(); + const bool to_is_unsigned = is_unsigned_v; + + const size_t size_of_from = type.getSizeOfValueInMemory(); + const size_t size_of_to = sizeof(T); + + const bool left_in_first_half = left.isNull() + ? from_is_unsigned + : (left.get() >= 0); + + const bool right_in_first_half = right.isNull() + ? !from_is_unsigned + : (right.get() >= 0); + + /// Size of type is the same. + if (size_of_from == size_of_to) + { + if (from_is_unsigned == to_is_unsigned) + return { .is_monotonic = true, .is_always_monotonic = true }; + + if (left_in_first_half == right_in_first_half) + return { .is_monotonic = true }; + + return {}; + } + + /// Size of type is expanded. + if (size_of_from < size_of_to) + { + if (from_is_unsigned == to_is_unsigned) + return { .is_monotonic = true, .is_always_monotonic = true }; + + if (!to_is_unsigned) + return { .is_monotonic = true, .is_always_monotonic = true }; + + /// signed -> unsigned. If arguments from the same half, then function is monotonic. + if (left_in_first_half == right_in_first_half) + return { .is_monotonic = true }; + + return {}; + } + + /// Size of type is shrunk. + if (size_of_from > size_of_to) + { + /// Function cannot be monotonic on unbounded ranges. + if (left.isNull() || right.isNull()) + return {}; + + /// Function cannot be monotonic when left and right are not on the same ranges. + if (divideByRangeOfType(left.get()) != divideByRangeOfType(right.get())) + return {}; + + if (to_is_unsigned) + return { .is_monotonic = true }; + else + { + // If To is signed, it's possible that the signedness is different after conversion. So we check it explicitly. + const bool is_monotonic = (T(left.get()) >= 0) == (T(right.get()) >= 0); + + return { .is_monotonic = is_monotonic }; + } + } + + UNREACHABLE(); + } +}; + +struct ToDateMonotonicity +{ + static bool has() { return true; } + + static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right) + { + auto which = WhichDataType(type); + if (which.isDateOrDate32() || which.isDateTime() || which.isDateTime64() || which.isInt8() || which.isInt16() || which.isUInt8() + || which.isUInt16()) + { + return {.is_monotonic = true, .is_always_monotonic = true}; + } + else if ( + ((left.getType() == Field::Types::UInt64 || left.isNull()) && (right.getType() == Field::Types::UInt64 || right.isNull()) + && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) + || ((left.getType() == Field::Types::Int64 || left.isNull()) && (right.getType() == Field::Types::Int64 || right.isNull()) + && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) + || (( + (left.getType() == Field::Types::Float64 || left.isNull()) + && (right.getType() == Field::Types::Float64 || right.isNull()) + && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF)))) + || !isNativeNumber(type)) + { + return {}; + } + else + { + return {.is_monotonic = true, .is_always_monotonic = true}; + } + } +}; + +struct ToDateTimeMonotonicity +{ + static bool has() { return true; } + + static IFunction::Monotonicity get(const IDataType & type, const Field &, const Field &) + { + if (type.isValueRepresentedByNumber()) + return {.is_monotonic = true, .is_always_monotonic = true}; + else + return {}; + } +}; + +/** The monotonicity for the `toString` function is mainly determined for test purposes. + * It is doubtful that anyone is looking to optimize queries with conditions `toString(CounterID) = 34`. + */ +struct ToStringMonotonicity +{ + static bool has() { return true; } + + static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right) + { + IFunction::Monotonicity positive{ .is_monotonic = true }; + IFunction::Monotonicity not_monotonic; + + const auto * type_ptr = &type; + if (const auto * low_cardinality_type = checkAndGetDataType(type_ptr)) + type_ptr = low_cardinality_type->getDictionaryType().get(); + + /// Order on enum values (which is the order on integers) is completely arbitrary in respect to the order on strings. + if (WhichDataType(type).isEnum()) + return not_monotonic; + + /// `toString` function is monotonous if the argument is Date or Date32 or DateTime or String, or non-negative numbers with the same number of symbols. + if (checkDataTypes(type_ptr)) + return positive; + + if (left.isNull() || right.isNull()) + return {}; + + if (left.getType() == Field::Types::UInt64 + && right.getType() == Field::Types::UInt64) + { + return (left.get() == 0 && right.get() == 0) + || (floor(log10(left.get())) == floor(log10(right.get()))) + ? positive : not_monotonic; + } + + if (left.getType() == Field::Types::Int64 + && right.getType() == Field::Types::Int64) + { + return (left.get() == 0 && right.get() == 0) + || (left.get() > 0 && right.get() > 0 && floor(log10(left.get())) == floor(log10(right.get()))) + ? positive : not_monotonic; + } + + return not_monotonic; + } +}; + + +struct NameToUInt8 { static constexpr auto name = "toUInt8"; }; +struct NameToUInt16 { static constexpr auto name = "toUInt16"; }; +struct NameToUInt32 { static constexpr auto name = "toUInt32"; }; +struct NameToUInt64 { static constexpr auto name = "toUInt64"; }; +struct NameToUInt128 { static constexpr auto name = "toUInt128"; }; +struct NameToUInt256 { static constexpr auto name = "toUInt256"; }; +struct NameToInt8 { static constexpr auto name = "toInt8"; }; +struct NameToInt16 { static constexpr auto name = "toInt16"; }; +struct NameToInt32 { static constexpr auto name = "toInt32"; }; +struct NameToInt64 { static constexpr auto name = "toInt64"; }; +struct NameToInt128 { static constexpr auto name = "toInt128"; }; +struct NameToInt256 { static constexpr auto name = "toInt256"; }; +struct NameToFloat32 { static constexpr auto name = "toFloat32"; }; +struct NameToFloat64 { static constexpr auto name = "toFloat64"; }; +struct NameToUUID { static constexpr auto name = "toUUID"; }; +struct NameToIPv4 { static constexpr auto name = "toIPv4"; }; +struct NameToIPv6 { static constexpr auto name = "toIPv6"; }; + +using FunctionToUInt8 = FunctionConvert>; +using FunctionToUInt16 = FunctionConvert>; +using FunctionToUInt32 = FunctionConvert>; +using FunctionToUInt64 = FunctionConvert>; +using FunctionToUInt128 = FunctionConvert>; +using FunctionToUInt256 = FunctionConvert>; +using FunctionToInt8 = FunctionConvert>; +using FunctionToInt16 = FunctionConvert>; +using FunctionToInt32 = FunctionConvert>; +using FunctionToInt64 = FunctionConvert>; +using FunctionToInt128 = FunctionConvert>; +using FunctionToInt256 = FunctionConvert>; +using FunctionToFloat32 = FunctionConvert>; +using FunctionToFloat64 = FunctionConvert>; + +using FunctionToDate = FunctionConvert; + +using FunctionToDate32 = FunctionConvert; + +using FunctionToDateTime = FunctionConvert; + +using FunctionToDateTime32 = FunctionConvert; + +using FunctionToDateTime64 = FunctionConvert; + +using FunctionToUUID = FunctionConvert>; +using FunctionToIPv4 = FunctionConvert>; +using FunctionToIPv6 = FunctionConvert>; +using FunctionToString = FunctionConvert; +using FunctionToUnixTimestamp = FunctionConvert>; +using FunctionToDecimal32 = FunctionConvert, NameToDecimal32, UnknownMonotonicity>; +using FunctionToDecimal64 = FunctionConvert, NameToDecimal64, UnknownMonotonicity>; +using FunctionToDecimal128 = FunctionConvert, NameToDecimal128, UnknownMonotonicity>; +using FunctionToDecimal256 = FunctionConvert, NameToDecimal256, UnknownMonotonicity>; + +template struct FunctionTo; + +template <> struct FunctionTo { using Type = FunctionToUInt8; }; +template <> struct FunctionTo { using Type = FunctionToUInt16; }; +template <> struct FunctionTo { using Type = FunctionToUInt32; }; +template <> struct FunctionTo { using Type = FunctionToUInt64; }; +template <> struct FunctionTo { using Type = FunctionToUInt128; }; +template <> struct FunctionTo { using Type = FunctionToUInt256; }; +template <> struct FunctionTo { using Type = FunctionToInt8; }; +template <> struct FunctionTo { using Type = FunctionToInt16; }; +template <> struct FunctionTo { using Type = FunctionToInt32; }; +template <> struct FunctionTo { using Type = FunctionToInt64; }; +template <> struct FunctionTo { using Type = FunctionToInt128; }; +template <> struct FunctionTo { using Type = FunctionToInt256; }; +template <> struct FunctionTo { using Type = FunctionToFloat32; }; +template <> struct FunctionTo { using Type = FunctionToFloat64; }; + +template +struct FunctionTo { using Type = FunctionToDate; }; + +template +struct FunctionTo { using Type = FunctionToDate32; }; + +template +struct FunctionTo { using Type = FunctionToDateTime; }; + +template +struct FunctionTo { using Type = FunctionToDateTime64; }; + +template <> struct FunctionTo { using Type = FunctionToUUID; }; +template <> struct FunctionTo { using Type = FunctionToIPv4; }; +template <> struct FunctionTo { using Type = FunctionToIPv6; }; +template <> struct FunctionTo { using Type = FunctionToString; }; +template <> struct FunctionTo { using Type = FunctionToFixedString; }; +template <> struct FunctionTo> { using Type = FunctionToDecimal32; }; +template <> struct FunctionTo> { using Type = FunctionToDecimal64; }; +template <> struct FunctionTo> { using Type = FunctionToDecimal128; }; +template <> struct FunctionTo> { using Type = FunctionToDecimal256; }; + +template struct FunctionTo> + : FunctionTo> +{ +}; + +struct NameToUInt8OrZero { static constexpr auto name = "toUInt8OrZero"; }; +struct NameToUInt16OrZero { static constexpr auto name = "toUInt16OrZero"; }; +struct NameToUInt32OrZero { static constexpr auto name = "toUInt32OrZero"; }; +struct NameToUInt64OrZero { static constexpr auto name = "toUInt64OrZero"; }; +struct NameToUInt128OrZero { static constexpr auto name = "toUInt128OrZero"; }; +struct NameToUInt256OrZero { static constexpr auto name = "toUInt256OrZero"; }; +struct NameToInt8OrZero { static constexpr auto name = "toInt8OrZero"; }; +struct NameToInt16OrZero { static constexpr auto name = "toInt16OrZero"; }; +struct NameToInt32OrZero { static constexpr auto name = "toInt32OrZero"; }; +struct NameToInt64OrZero { static constexpr auto name = "toInt64OrZero"; }; +struct NameToInt128OrZero { static constexpr auto name = "toInt128OrZero"; }; +struct NameToInt256OrZero { static constexpr auto name = "toInt256OrZero"; }; +struct NameToFloat32OrZero { static constexpr auto name = "toFloat32OrZero"; }; +struct NameToFloat64OrZero { static constexpr auto name = "toFloat64OrZero"; }; +struct NameToDateOrZero { static constexpr auto name = "toDateOrZero"; }; +struct NameToDate32OrZero { static constexpr auto name = "toDate32OrZero"; }; +struct NameToDateTimeOrZero { static constexpr auto name = "toDateTimeOrZero"; }; +struct NameToDateTime64OrZero { static constexpr auto name = "toDateTime64OrZero"; }; +struct NameToDecimal32OrZero { static constexpr auto name = "toDecimal32OrZero"; }; +struct NameToDecimal64OrZero { static constexpr auto name = "toDecimal64OrZero"; }; +struct NameToDecimal128OrZero { static constexpr auto name = "toDecimal128OrZero"; }; +struct NameToDecimal256OrZero { static constexpr auto name = "toDecimal256OrZero"; }; +struct NameToUUIDOrZero { static constexpr auto name = "toUUIDOrZero"; }; +struct NameToIPv4OrZero { static constexpr auto name = "toIPv4OrZero"; }; +struct NameToIPv6OrZero { static constexpr auto name = "toIPv6OrZero"; }; + +using FunctionToUInt8OrZero = FunctionConvertFromString; +using FunctionToUInt16OrZero = FunctionConvertFromString; +using FunctionToUInt32OrZero = FunctionConvertFromString; +using FunctionToUInt64OrZero = FunctionConvertFromString; +using FunctionToUInt128OrZero = FunctionConvertFromString; +using FunctionToUInt256OrZero = FunctionConvertFromString; +using FunctionToInt8OrZero = FunctionConvertFromString; +using FunctionToInt16OrZero = FunctionConvertFromString; +using FunctionToInt32OrZero = FunctionConvertFromString; +using FunctionToInt64OrZero = FunctionConvertFromString; +using FunctionToInt128OrZero = FunctionConvertFromString; +using FunctionToInt256OrZero = FunctionConvertFromString; +using FunctionToFloat32OrZero = FunctionConvertFromString; +using FunctionToFloat64OrZero = FunctionConvertFromString; +using FunctionToDateOrZero = FunctionConvertFromString; +using FunctionToDate32OrZero = FunctionConvertFromString; +using FunctionToDateTimeOrZero = FunctionConvertFromString; +using FunctionToDateTime64OrZero = FunctionConvertFromString; +using FunctionToDecimal32OrZero = FunctionConvertFromString, NameToDecimal32OrZero, ConvertFromStringExceptionMode::Zero>; +using FunctionToDecimal64OrZero = FunctionConvertFromString, NameToDecimal64OrZero, ConvertFromStringExceptionMode::Zero>; +using FunctionToDecimal128OrZero = FunctionConvertFromString, NameToDecimal128OrZero, ConvertFromStringExceptionMode::Zero>; +using FunctionToDecimal256OrZero = FunctionConvertFromString, NameToDecimal256OrZero, ConvertFromStringExceptionMode::Zero>; +using FunctionToUUIDOrZero = FunctionConvertFromString; +using FunctionToIPv4OrZero = FunctionConvertFromString; +using FunctionToIPv6OrZero = FunctionConvertFromString; + +struct NameToUInt8OrNull { static constexpr auto name = "toUInt8OrNull"; }; +struct NameToUInt16OrNull { static constexpr auto name = "toUInt16OrNull"; }; +struct NameToUInt32OrNull { static constexpr auto name = "toUInt32OrNull"; }; +struct NameToUInt64OrNull { static constexpr auto name = "toUInt64OrNull"; }; +struct NameToUInt128OrNull { static constexpr auto name = "toUInt128OrNull"; }; +struct NameToUInt256OrNull { static constexpr auto name = "toUInt256OrNull"; }; +struct NameToInt8OrNull { static constexpr auto name = "toInt8OrNull"; }; +struct NameToInt16OrNull { static constexpr auto name = "toInt16OrNull"; }; +struct NameToInt32OrNull { static constexpr auto name = "toInt32OrNull"; }; +struct NameToInt64OrNull { static constexpr auto name = "toInt64OrNull"; }; +struct NameToInt128OrNull { static constexpr auto name = "toInt128OrNull"; }; +struct NameToInt256OrNull { static constexpr auto name = "toInt256OrNull"; }; +struct NameToFloat32OrNull { static constexpr auto name = "toFloat32OrNull"; }; +struct NameToFloat64OrNull { static constexpr auto name = "toFloat64OrNull"; }; +struct NameToDateOrNull { static constexpr auto name = "toDateOrNull"; }; +struct NameToDate32OrNull { static constexpr auto name = "toDate32OrNull"; }; +struct NameToDateTimeOrNull { static constexpr auto name = "toDateTimeOrNull"; }; +struct NameToDateTime64OrNull { static constexpr auto name = "toDateTime64OrNull"; }; +struct NameToDecimal32OrNull { static constexpr auto name = "toDecimal32OrNull"; }; +struct NameToDecimal64OrNull { static constexpr auto name = "toDecimal64OrNull"; }; +struct NameToDecimal128OrNull { static constexpr auto name = "toDecimal128OrNull"; }; +struct NameToDecimal256OrNull { static constexpr auto name = "toDecimal256OrNull"; }; +struct NameToUUIDOrNull { static constexpr auto name = "toUUIDOrNull"; }; +struct NameToIPv4OrNull { static constexpr auto name = "toIPv4OrNull"; }; +struct NameToIPv6OrNull { static constexpr auto name = "toIPv6OrNull"; }; + +using FunctionToUInt8OrNull = FunctionConvertFromString; +using FunctionToUInt16OrNull = FunctionConvertFromString; +using FunctionToUInt32OrNull = FunctionConvertFromString; +using FunctionToUInt64OrNull = FunctionConvertFromString; +using FunctionToUInt128OrNull = FunctionConvertFromString; +using FunctionToUInt256OrNull = FunctionConvertFromString; +using FunctionToInt8OrNull = FunctionConvertFromString; +using FunctionToInt16OrNull = FunctionConvertFromString; +using FunctionToInt32OrNull = FunctionConvertFromString; +using FunctionToInt64OrNull = FunctionConvertFromString; +using FunctionToInt128OrNull = FunctionConvertFromString; +using FunctionToInt256OrNull = FunctionConvertFromString; +using FunctionToFloat32OrNull = FunctionConvertFromString; +using FunctionToFloat64OrNull = FunctionConvertFromString; +using FunctionToDateOrNull = FunctionConvertFromString; +using FunctionToDate32OrNull = FunctionConvertFromString; +using FunctionToDateTimeOrNull = FunctionConvertFromString; +using FunctionToDateTime64OrNull = FunctionConvertFromString; +using FunctionToDecimal32OrNull = FunctionConvertFromString, NameToDecimal32OrNull, ConvertFromStringExceptionMode::Null>; +using FunctionToDecimal64OrNull = FunctionConvertFromString, NameToDecimal64OrNull, ConvertFromStringExceptionMode::Null>; +using FunctionToDecimal128OrNull = FunctionConvertFromString, NameToDecimal128OrNull, ConvertFromStringExceptionMode::Null>; +using FunctionToDecimal256OrNull = FunctionConvertFromString, NameToDecimal256OrNull, ConvertFromStringExceptionMode::Null>; +using FunctionToUUIDOrNull = FunctionConvertFromString; +using FunctionToIPv4OrNull = FunctionConvertFromString; +using FunctionToIPv6OrNull = FunctionConvertFromString; + +struct NameParseDateTimeBestEffort { static constexpr auto name = "parseDateTimeBestEffort"; }; +struct NameParseDateTimeBestEffortOrZero { static constexpr auto name = "parseDateTimeBestEffortOrZero"; }; +struct NameParseDateTimeBestEffortOrNull { static constexpr auto name = "parseDateTimeBestEffortOrNull"; }; +struct NameParseDateTimeBestEffortUS { static constexpr auto name = "parseDateTimeBestEffortUS"; }; +struct NameParseDateTimeBestEffortUSOrZero { static constexpr auto name = "parseDateTimeBestEffortUSOrZero"; }; +struct NameParseDateTimeBestEffortUSOrNull { static constexpr auto name = "parseDateTimeBestEffortUSOrNull"; }; +struct NameParseDateTime32BestEffort { static constexpr auto name = "parseDateTime32BestEffort"; }; +struct NameParseDateTime32BestEffortOrZero { static constexpr auto name = "parseDateTime32BestEffortOrZero"; }; +struct NameParseDateTime32BestEffortOrNull { static constexpr auto name = "parseDateTime32BestEffortOrNull"; }; +struct NameParseDateTime64BestEffort { static constexpr auto name = "parseDateTime64BestEffort"; }; +struct NameParseDateTime64BestEffortOrZero { static constexpr auto name = "parseDateTime64BestEffortOrZero"; }; +struct NameParseDateTime64BestEffortOrNull { static constexpr auto name = "parseDateTime64BestEffortOrNull"; }; +struct NameParseDateTime64BestEffortUS { static constexpr auto name = "parseDateTime64BestEffortUS"; }; +struct NameParseDateTime64BestEffortUSOrZero { static constexpr auto name = "parseDateTime64BestEffortUSOrZero"; }; +struct NameParseDateTime64BestEffortUSOrNull { static constexpr auto name = "parseDateTime64BestEffortUSOrNull"; }; + + +using FunctionParseDateTimeBestEffort = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTimeBestEffort, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffort>; +using FunctionParseDateTimeBestEffortOrZero = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTimeBestEffortOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffort>; +using FunctionParseDateTimeBestEffortOrNull = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTimeBestEffortOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffort>; + +using FunctionParseDateTimeBestEffortUS = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTimeBestEffortUS, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffortUS>; +using FunctionParseDateTimeBestEffortUSOrZero = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTimeBestEffortUSOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffortUS>; +using FunctionParseDateTimeBestEffortUSOrNull = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTimeBestEffortUSOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffortUS>; + +using FunctionParseDateTime32BestEffort = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTime32BestEffort, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffort>; +using FunctionParseDateTime32BestEffortOrZero = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTime32BestEffortOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffort>; +using FunctionParseDateTime32BestEffortOrNull = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTime32BestEffortOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffort>; + +using FunctionParseDateTime64BestEffort = FunctionConvertFromString< + DataTypeDateTime64, NameParseDateTime64BestEffort, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffort>; +using FunctionParseDateTime64BestEffortOrZero = FunctionConvertFromString< + DataTypeDateTime64, NameParseDateTime64BestEffortOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffort>; +using FunctionParseDateTime64BestEffortOrNull = FunctionConvertFromString< + DataTypeDateTime64, NameParseDateTime64BestEffortOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffort>; + +using FunctionParseDateTime64BestEffortUS = FunctionConvertFromString< + DataTypeDateTime64, NameParseDateTime64BestEffortUS, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffortUS>; +using FunctionParseDateTime64BestEffortUSOrZero = FunctionConvertFromString< + DataTypeDateTime64, NameParseDateTime64BestEffortUSOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffortUS>; +using FunctionParseDateTime64BestEffortUSOrNull = FunctionConvertFromString< + DataTypeDateTime64, NameParseDateTime64BestEffortUSOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffortUS>; + + +class ExecutableFunctionCast : public IExecutableFunction +{ +public: + using WrapperType = std::function; + + explicit ExecutableFunctionCast( + WrapperType && wrapper_function_, const char * name_, std::optional diagnostic_) + : wrapper_function(std::move(wrapper_function_)), name(name_), diagnostic(std::move(diagnostic_)) {} + + String getName() const override { return name; } + +protected: + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + /// drop second argument, pass others + ColumnsWithTypeAndName new_arguments{arguments.front()}; + if (arguments.size() > 2) + new_arguments.insert(std::end(new_arguments), std::next(std::begin(arguments), 2), std::end(arguments)); + + try + { + return wrapper_function(new_arguments, result_type, nullptr, input_rows_count); + } + catch (Exception & e) + { + if (diagnostic) + e.addMessage("while converting source column " + backQuoteIfNeed(diagnostic->column_from) + + " to destination column " + backQuoteIfNeed(diagnostic->column_to)); + throw; + } + } + + bool useDefaultImplementationForNulls() const override { return false; } + /// CAST(Nothing, T) -> T + bool useDefaultImplementationForNothing() const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + +private: + WrapperType wrapper_function; + const char * name; + std::optional diagnostic; +}; + + +struct FunctionCastName +{ + static constexpr auto name = "CAST"; +}; + +class FunctionCast final : public IFunctionBase +{ +public: + using MonotonicityForRange = std::function; + using WrapperType = std::function; + + FunctionCast(ContextPtr context_ + , const char * cast_name_ + , MonotonicityForRange && monotonicity_for_range_ + , const DataTypes & argument_types_ + , const DataTypePtr & return_type_ + , std::optional diagnostic_ + , CastType cast_type_) + : cast_name(cast_name_), monotonicity_for_range(std::move(monotonicity_for_range_)) + , argument_types(argument_types_), return_type(return_type_), diagnostic(std::move(diagnostic_)) + , cast_type(cast_type_) + , context(context_) + { + } + + const DataTypes & getArgumentTypes() const override { return argument_types; } + const DataTypePtr & getResultType() const override { return return_type; } + + ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName & /*sample_columns*/) const override + { + try + { + return std::make_unique( + prepareUnpackDictionaries(getArgumentTypes()[0], getResultType()), cast_name, diagnostic); + } + catch (Exception & e) + { + if (diagnostic) + e.addMessage("while converting source column " + backQuoteIfNeed(diagnostic->column_from) + + " to destination column " + backQuoteIfNeed(diagnostic->column_to)); + throw; + } + } + + String getName() const override { return cast_name; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + bool hasInformationAboutMonotonicity() const override + { + return static_cast(monotonicity_for_range); + } + + Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override + { + return monotonicity_for_range(type, left, right); + } + +private: + const char * cast_name; + MonotonicityForRange monotonicity_for_range; + + DataTypes argument_types; + DataTypePtr return_type; + + std::optional diagnostic; + CastType cast_type; + ContextPtr context; + + static WrapperType createFunctionAdaptor(FunctionPtr function, const DataTypePtr & from_type) + { + auto function_adaptor = std::make_unique(function)->build({ColumnWithTypeAndName{nullptr, from_type, ""}}); + + return [function_adaptor] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) + { + return function_adaptor->execute(arguments, result_type, input_rows_count); + }; + } + + static WrapperType createToNullableColumnWrapper() + { + return [] (ColumnsWithTypeAndName &, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) + { + ColumnPtr res = result_type->createColumn(); + ColumnUInt8::Ptr col_null_map_to = ColumnUInt8::create(input_rows_count, true); + return ColumnNullable::create(res->cloneResized(input_rows_count), std::move(col_null_map_to)); + }; + } + + template + WrapperType createWrapper(const DataTypePtr & from_type, const ToDataType * const to_type, bool requested_result_is_nullable) const + { + TypeIndex from_type_index = from_type->getTypeId(); + WhichDataType which(from_type_index); + bool can_apply_accurate_cast = (cast_type == CastType::accurate || cast_type == CastType::accurateOrNull) + && (which.isInt() || which.isUInt() || which.isFloat()); + + FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior; + if (context) + date_time_overflow_behavior = context->getSettingsRef().date_time_overflow_behavior; + + if (requested_result_is_nullable && checkAndGetDataType(from_type.get())) + { + /// In case when converting to Nullable type, we apply different parsing rule, + /// that will not throw an exception but return NULL in case of malformed input. + FunctionPtr function = FunctionConvertFromString::create(context); + return createFunctionAdaptor(function, from_type); + } + else if (!can_apply_accurate_cast) + { + FunctionPtr function = FunctionTo::Type::create(context); + return createFunctionAdaptor(function, from_type); + } + + return [wrapper_cast_type = cast_type, from_type_index, to_type, date_time_overflow_behavior] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) + { + ColumnPtr result_column; + auto res = callOnIndexAndDataType(from_type_index, [&](const auto & types) -> bool + { + using Types = std::decay_t; + using LeftDataType = typename Types::LeftType; + using RightDataType = typename Types::RightType; + + if constexpr (IsDataTypeNumber) + { + if constexpr (IsDataTypeNumber) + { +#define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE, ADDITIONS) \ + case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \ + result_column = ConvertImpl::execute( \ + arguments, result_type, input_rows_count, ADDITIONS()); \ + break; + if (wrapper_cast_type == CastType::accurate) + { + switch (date_time_overflow_behavior) + { + GENERATE_OVERFLOW_MODE_CASE(Throw, AccurateConvertStrategyAdditions) + GENERATE_OVERFLOW_MODE_CASE(Ignore, AccurateConvertStrategyAdditions) + GENERATE_OVERFLOW_MODE_CASE(Saturate, AccurateConvertStrategyAdditions) + } + } + else + { + switch (date_time_overflow_behavior) + { + GENERATE_OVERFLOW_MODE_CASE(Throw, AccurateOrNullConvertStrategyAdditions) + GENERATE_OVERFLOW_MODE_CASE(Ignore, AccurateOrNullConvertStrategyAdditions) + GENERATE_OVERFLOW_MODE_CASE(Saturate, AccurateOrNullConvertStrategyAdditions) + } + } +#undef GENERATE_OVERFLOW_MODE_CASE + + return true; + } + + if constexpr (std::is_same_v || std::is_same_v) + { +#define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE, ADDITIONS) \ + case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \ + result_column = ConvertImpl::template execute( \ +arguments, result_type, input_rows_count); \ + break; + if (wrapper_cast_type == CastType::accurate) + { + switch (date_time_overflow_behavior) + { + GENERATE_OVERFLOW_MODE_CASE(Throw, DateTimeAccurateConvertStrategyAdditions) + GENERATE_OVERFLOW_MODE_CASE(Ignore, DateTimeAccurateConvertStrategyAdditions) + GENERATE_OVERFLOW_MODE_CASE(Saturate, DateTimeAccurateConvertStrategyAdditions) + } + } + else + { + switch (date_time_overflow_behavior) + { + GENERATE_OVERFLOW_MODE_CASE(Throw, DateTimeAccurateOrNullConvertStrategyAdditions) + GENERATE_OVERFLOW_MODE_CASE(Ignore, DateTimeAccurateOrNullConvertStrategyAdditions) + GENERATE_OVERFLOW_MODE_CASE(Saturate, DateTimeAccurateOrNullConvertStrategyAdditions) + } + } +#undef GENERATE_OVERFLOW_MODE_CASE + return true; + } + } + + return false; + }); + + /// Additionally check if callOnIndexAndDataType wasn't called at all. + if (!res) + { + if (wrapper_cast_type == CastType::accurateOrNull) + { + auto nullable_column_wrapper = FunctionCast::createToNullableColumnWrapper(); + return nullable_column_wrapper(arguments, result_type, column_nullable, input_rows_count); + } + else + { + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, + "Conversion from {} to {} is not supported", + from_type_index, to_type->getName()); + } + } + + return result_column; + }; + } + + template + WrapperType createBoolWrapper(const DataTypePtr & from_type, const ToDataType * const to_type, bool requested_result_is_nullable) const + { + if (checkAndGetDataType(from_type.get())) + { + return &ConvertImplGenericFromString::execute; + } + + return createWrapper(from_type, to_type, requested_result_is_nullable); + } + + WrapperType createUInt8ToBoolWrapper(const DataTypePtr from_type, const DataTypePtr to_type) const + { + return [from_type, to_type] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) -> ColumnPtr + { + /// Special case when we convert UInt8 column to Bool column. + /// both columns have type UInt8, but we shouldn't use identity wrapper, + /// because Bool column can contain only 0 and 1. + auto res_column = to_type->createColumn(); + const auto & data_from = checkAndGetColumn(arguments[0].column.get())->getData(); + auto & data_to = assert_cast(res_column.get())->getData(); + data_to.resize(data_from.size()); + for (size_t i = 0; i != data_from.size(); ++i) + data_to[i] = static_cast(data_from[i]); + return res_column; + }; + } + + WrapperType createStringWrapper(const DataTypePtr & from_type) const + { + FunctionPtr function = FunctionToString::create(context); + return createFunctionAdaptor(function, from_type); + } + + WrapperType createFixedStringWrapper(const DataTypePtr & from_type, const size_t N) const + { + if (!isStringOrFixedString(from_type)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "CAST AS FixedString is only implemented for types String and FixedString"); + + bool exception_mode_null = cast_type == CastType::accurateOrNull; + return [exception_mode_null, N] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) + { + if (exception_mode_null) + return FunctionToFixedString::executeForN(arguments, N); + else + return FunctionToFixedString::executeForN(arguments, N); + }; + } + +#define GENERATE_INTERVAL_CASE(INTERVAL_KIND) \ + case IntervalKind::Kind::INTERVAL_KIND: \ + return createFunctionAdaptor(FunctionConvert::create(context), from_type); + + WrapperType createIntervalWrapper(const DataTypePtr & from_type, IntervalKind kind) const + { + switch (kind.kind) + { + GENERATE_INTERVAL_CASE(Nanosecond) + GENERATE_INTERVAL_CASE(Microsecond) + GENERATE_INTERVAL_CASE(Millisecond) + GENERATE_INTERVAL_CASE(Second) + GENERATE_INTERVAL_CASE(Minute) + GENERATE_INTERVAL_CASE(Hour) + GENERATE_INTERVAL_CASE(Day) + GENERATE_INTERVAL_CASE(Week) + GENERATE_INTERVAL_CASE(Month) + GENERATE_INTERVAL_CASE(Quarter) + GENERATE_INTERVAL_CASE(Year) + } + throw Exception{ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion to unexpected IntervalKind: {}", kind.toString()}; + } + +#undef GENERATE_INTERVAL_CASE + + template + requires IsDataTypeDecimal + WrapperType createDecimalWrapper(const DataTypePtr & from_type, const ToDataType * to_type, bool requested_result_is_nullable) const + { + TypeIndex type_index = from_type->getTypeId(); + UInt32 scale = to_type->getScale(); + + WhichDataType which(type_index); + bool ok = which.isNativeInt() || which.isNativeUInt() || which.isDecimal() || which.isFloat() || which.isDateOrDate32() || which.isDateTime() || which.isDateTime64() + || which.isStringOrFixedString(); + if (!ok) + { + if (cast_type == CastType::accurateOrNull) + return createToNullableColumnWrapper(); + else + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", + from_type->getName(), to_type->getName()); + } + + auto wrapper_cast_type = cast_type; + + return [wrapper_cast_type, type_index, scale, to_type, requested_result_is_nullable] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *column_nullable, size_t input_rows_count) + { + ColumnPtr result_column; + auto res = callOnIndexAndDataType(type_index, [&](const auto & types) -> bool + { + using Types = std::decay_t; + using LeftDataType = typename Types::LeftType; + using RightDataType = typename Types::RightType; + + if constexpr (IsDataTypeDecimalOrNumber && IsDataTypeDecimalOrNumber && !std::is_same_v) + { + if (wrapper_cast_type == CastType::accurate) + { + AccurateConvertStrategyAdditions additions; + additions.scale = scale; + result_column = ConvertImpl::execute( + arguments, result_type, input_rows_count, additions); + + return true; + } + else if (wrapper_cast_type == CastType::accurateOrNull) + { + AccurateOrNullConvertStrategyAdditions additions; + additions.scale = scale; + result_column = ConvertImpl::execute( + arguments, result_type, input_rows_count, additions); + + return true; + } + } + else if constexpr (std::is_same_v) + { + if (requested_result_is_nullable) + { + /// Consistent with CAST(Nullable(String) AS Nullable(Numbers)) + /// In case when converting to Nullable type, we apply different parsing rule, + /// that will not throw an exception but return NULL in case of malformed input. + result_column = ConvertImpl::execute( + arguments, result_type, input_rows_count, scale); + + return true; + } + } + + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); + + return true; + }); + + /// Additionally check if callOnIndexAndDataType wasn't called at all. + if (!res) + { + if (wrapper_cast_type == CastType::accurateOrNull) + { + auto nullable_column_wrapper = FunctionCast::createToNullableColumnWrapper(); + return nullable_column_wrapper(arguments, result_type, column_nullable, input_rows_count); + } + else + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, + "Conversion from {} to {} is not supported", + type_index, to_type->getName()); + } + + return result_column; + }; + } + + WrapperType createAggregateFunctionWrapper(const DataTypePtr & from_type_untyped, const DataTypeAggregateFunction * to_type) const + { + /// Conversion from String through parsing. + if (checkAndGetDataType(from_type_untyped.get())) + { + return &ConvertImplGenericFromString::execute; + } + else if (const auto * agg_type = checkAndGetDataType(from_type_untyped.get())) + { + if (agg_type->getFunction()->haveSameStateRepresentation(*to_type->getFunction())) + { + return [function = to_type->getFunction()]( + ColumnsWithTypeAndName & arguments, + const DataTypePtr & /* result_type */, + const ColumnNullable * /* nullable_source */, + size_t /*input_rows_count*/) -> ColumnPtr + { + const auto & argument_column = arguments.front(); + const auto * col_agg = checkAndGetColumn(argument_column.column.get()); + if (col_agg) + { + auto new_col_agg = ColumnAggregateFunction::create(*col_agg); + new_col_agg->set(function); + return new_col_agg; + } + else + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Illegal column {} for function CAST AS AggregateFunction", + argument_column.column->getName()); + } + }; + } + } + + if (cast_type == CastType::accurateOrNull) + return createToNullableColumnWrapper(); + else + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", + from_type_untyped->getName(), to_type->getName()); + } + + WrapperType createArrayWrapper(const DataTypePtr & from_type_untyped, const DataTypeArray & to_type) const + { + /// Conversion from String through parsing. + if (checkAndGetDataType(from_type_untyped.get())) + { + return &ConvertImplGenericFromString::execute; + } + + DataTypePtr from_type_holder; + const auto * from_type = checkAndGetDataType(from_type_untyped.get()); + const auto * from_type_map = checkAndGetDataType(from_type_untyped.get()); + + /// Convert from Map + if (from_type_map) + { + /// Recreate array of unnamed tuples because otherwise it may work + /// unexpectedly while converting to array of named tuples. + from_type_holder = from_type_map->getNestedTypeWithUnnamedTuple(); + from_type = assert_cast(from_type_holder.get()); + } + + if (!from_type) + { + throw Exception(ErrorCodes::TYPE_MISMATCH, + "CAST AS Array can only be performed between same-dimensional Array, Map or String types"); + } + + DataTypePtr from_nested_type = from_type->getNestedType(); + + /// In query SELECT CAST([] AS Array(Array(String))) from type is Array(Nothing) + bool from_empty_array = isNothing(from_nested_type); + + if (from_type->getNumberOfDimensions() != to_type.getNumberOfDimensions() && !from_empty_array) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "CAST AS Array can only be performed between same-dimensional array types"); + + const DataTypePtr & to_nested_type = to_type.getNestedType(); + + /// Prepare nested type conversion + const auto nested_function = prepareUnpackDictionaries(from_nested_type, to_nested_type); + + return [nested_function, from_nested_type, to_nested_type]( + ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr + { + const auto & argument_column = arguments.front(); + + const ColumnArray * col_array = nullptr; + + if (const ColumnMap * col_map = checkAndGetColumn(argument_column.column.get())) + col_array = &col_map->getNestedColumn(); + else + col_array = checkAndGetColumn(argument_column.column.get()); + + if (col_array) + { + /// create columns for converting nested column containing original and result columns + ColumnsWithTypeAndName nested_columns{{ col_array->getDataPtr(), from_nested_type, "" }}; + + /// convert nested column + auto result_column = nested_function(nested_columns, to_nested_type, nullable_source, nested_columns.front().column->size()); + + /// set converted nested column to result + return ColumnArray::create(result_column, col_array->getOffsetsPtr()); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Illegal column {} for function CAST AS Array", + argument_column.column->getName()); + } + }; + } + + using ElementWrappers = std::vector; + + ElementWrappers getElementWrappers(const DataTypes & from_element_types, const DataTypes & to_element_types) const + { + ElementWrappers element_wrappers; + element_wrappers.reserve(from_element_types.size()); + + /// Create conversion wrapper for each element in tuple + for (size_t i = 0; i < from_element_types.size(); ++i) + { + const DataTypePtr & from_element_type = from_element_types[i]; + const DataTypePtr & to_element_type = to_element_types[i]; + element_wrappers.push_back(prepareUnpackDictionaries(from_element_type, to_element_type)); + } + + return element_wrappers; + } + + WrapperType createTupleWrapper(const DataTypePtr & from_type_untyped, const DataTypeTuple * to_type) const + { + /// Conversion from String through parsing. + if (checkAndGetDataType(from_type_untyped.get())) + { + return &ConvertImplGenericFromString::execute; + } + + const auto * from_type = checkAndGetDataType(from_type_untyped.get()); + if (!from_type) + throw Exception(ErrorCodes::TYPE_MISMATCH, "CAST AS Tuple can only be performed between tuple types or from String.\n" + "Left type: {}, right type: {}", from_type_untyped->getName(), to_type->getName()); + + const auto & from_element_types = from_type->getElements(); + const auto & to_element_types = to_type->getElements(); + + std::vector element_wrappers; + std::vector> to_reverse_index; + + /// For named tuples allow conversions for tuples with + /// different sets of elements. If element exists in @to_type + /// and doesn't exist in @to_type it will be filled by default values. + if (from_type->haveExplicitNames() && to_type->haveExplicitNames()) + { + const auto & from_names = from_type->getElementNames(); + std::unordered_map from_positions; + from_positions.reserve(from_names.size()); + for (size_t i = 0; i < from_names.size(); ++i) + from_positions[from_names[i]] = i; + + const auto & to_names = to_type->getElementNames(); + element_wrappers.reserve(to_names.size()); + to_reverse_index.reserve(from_names.size()); + + for (size_t i = 0; i < to_names.size(); ++i) + { + auto it = from_positions.find(to_names[i]); + if (it != from_positions.end()) + { + element_wrappers.emplace_back(prepareUnpackDictionaries(from_element_types[it->second], to_element_types[i])); + to_reverse_index.emplace_back(it->second); + } + else + { + element_wrappers.emplace_back(); + to_reverse_index.emplace_back(); + } + } + } + else + { + if (from_element_types.size() != to_element_types.size()) + throw Exception(ErrorCodes::TYPE_MISMATCH, "CAST AS Tuple can only be performed between tuple types " + "with the same number of elements or from String.\nLeft type: {}, right type: {}", + from_type->getName(), to_type->getName()); + + element_wrappers = getElementWrappers(from_element_types, to_element_types); + to_reverse_index.reserve(to_element_types.size()); + for (size_t i = 0; i < to_element_types.size(); ++i) + to_reverse_index.emplace_back(i); + } + + return [element_wrappers, from_element_types, to_element_types, to_reverse_index] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t input_rows_count) -> ColumnPtr + { + const auto * col = arguments.front().column.get(); + + size_t tuple_size = to_element_types.size(); + const ColumnTuple & column_tuple = typeid_cast(*col); + + Columns converted_columns(tuple_size); + + /// invoke conversion for each element + for (size_t i = 0; i < tuple_size; ++i) + { + if (to_reverse_index[i]) + { + size_t from_idx = *to_reverse_index[i]; + ColumnsWithTypeAndName element = {{column_tuple.getColumns()[from_idx], from_element_types[from_idx], "" }}; + converted_columns[i] = element_wrappers[i](element, to_element_types[i], nullable_source, input_rows_count); + } + else + { + converted_columns[i] = to_element_types[i]->createColumn()->cloneResized(input_rows_count); + } + } + + return ColumnTuple::create(converted_columns); + }; + } + + /// The case of: tuple([key1, key2, ..., key_n], [value1, value2, ..., value_n]) + WrapperType createTupleToMapWrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const + { + return [element_wrappers = getElementWrappers(from_kv_types, to_kv_types), from_kv_types, to_kv_types] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr + { + const auto * col = arguments.front().column.get(); + const auto & column_tuple = assert_cast(*col); + + Columns offsets(2); + Columns converted_columns(2); + for (size_t i = 0; i < 2; ++i) + { + const auto & column_array = assert_cast(column_tuple.getColumn(i)); + ColumnsWithTypeAndName element = {{column_array.getDataPtr(), from_kv_types[i], ""}}; + converted_columns[i] = element_wrappers[i](element, to_kv_types[i], nullable_source, (element[0].column)->size()); + offsets[i] = column_array.getOffsetsPtr(); + } + + const auto & keys_offsets = assert_cast(*offsets[0]).getData(); + const auto & values_offsets = assert_cast(*offsets[1]).getData(); + if (keys_offsets != values_offsets) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "CAST AS Map can only be performed from tuple of arrays with equal sizes."); + + return ColumnMap::create(converted_columns[0], converted_columns[1], offsets[0]); + }; + } + + WrapperType createMapToMapWrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const + { + return [element_wrappers = getElementWrappers(from_kv_types, to_kv_types), from_kv_types, to_kv_types] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr + { + const auto * col = arguments.front().column.get(); + const auto & column_map = typeid_cast(*col); + const auto & nested_data = column_map.getNestedData(); + + Columns converted_columns(2); + for (size_t i = 0; i < 2; ++i) + { + ColumnsWithTypeAndName element = {{nested_data.getColumnPtr(i), from_kv_types[i], ""}}; + converted_columns[i] = element_wrappers[i](element, to_kv_types[i], nullable_source, (element[0].column)->size()); + } + + return ColumnMap::create(converted_columns[0], converted_columns[1], column_map.getNestedColumn().getOffsetsPtr()); + }; + } + + /// The case of: [(key1, value1), (key2, value2), ...] + WrapperType createArrayToMapWrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const + { + return [element_wrappers = getElementWrappers(from_kv_types, to_kv_types), from_kv_types, to_kv_types] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr + { + const auto * col = arguments.front().column.get(); + const auto & column_array = typeid_cast(*col); + const auto & nested_data = typeid_cast(column_array.getData()); + + Columns converted_columns(2); + for (size_t i = 0; i < 2; ++i) + { + ColumnsWithTypeAndName element = {{nested_data.getColumnPtr(i), from_kv_types[i], ""}}; + converted_columns[i] = element_wrappers[i](element, to_kv_types[i], nullable_source, (element[0].column)->size()); + } + + return ColumnMap::create(converted_columns[0], converted_columns[1], column_array.getOffsetsPtr()); + }; + } + + + WrapperType createMapWrapper(const DataTypePtr & from_type_untyped, const DataTypeMap * to_type) const + { + if (const auto * from_tuple = checkAndGetDataType(from_type_untyped.get())) + { + if (from_tuple->getElements().size() != 2) + throw Exception( + ErrorCodes::TYPE_MISMATCH, + "CAST AS Map from tuple requires 2 elements. " + "Left type: {}, right type: {}", + from_tuple->getName(), + to_type->getName()); + + DataTypes from_kv_types; + const auto & to_kv_types = to_type->getKeyValueTypes(); + + for (const auto & elem : from_tuple->getElements()) + { + const auto * type_array = checkAndGetDataType(elem.get()); + if (!type_array) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "CAST AS Map can only be performed from tuples of array. Got: {}", from_tuple->getName()); + + from_kv_types.push_back(type_array->getNestedType()); + } + + return createTupleToMapWrapper(from_kv_types, to_kv_types); + } + else if (const auto * from_array = typeid_cast(from_type_untyped.get())) + { + const auto * nested_tuple = typeid_cast(from_array->getNestedType().get()); + if (!nested_tuple || nested_tuple->getElements().size() != 2) + throw Exception( + ErrorCodes::TYPE_MISMATCH, + "CAST AS Map from array requires nested tuple of 2 elements. " + "Left type: {}, right type: {}", + from_array->getName(), + to_type->getName()); + + return createArrayToMapWrapper(nested_tuple->getElements(), to_type->getKeyValueTypes()); + } + else if (const auto * from_type = checkAndGetDataType(from_type_untyped.get())) + { + return createMapToMapWrapper(from_type->getKeyValueTypes(), to_type->getKeyValueTypes()); + } + else + { + throw Exception(ErrorCodes::TYPE_MISMATCH, "Unsupported types to CAST AS Map. " + "Left type: {}, right type: {}", from_type_untyped->getName(), to_type->getName()); + } + } + + WrapperType createTupleToObjectWrapper(const DataTypeTuple & from_tuple, bool has_nullable_subcolumns) const + { + if (!from_tuple.haveExplicitNames()) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Cast to Object can be performed only from flatten Named Tuple. Got: {}", from_tuple.getName()); + + PathsInData paths; + DataTypes from_types; + + std::tie(paths, from_types) = flattenTuple(from_tuple.getPtr()); + auto to_types = from_types; + + for (auto & type : to_types) + { + if (isTuple(type) || isNested(type)) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Cast to Object can be performed only from flatten Named Tuple. Got: {}", + from_tuple.getName()); + + type = recursiveRemoveLowCardinality(type); + } + + return [element_wrappers = getElementWrappers(from_types, to_types), + has_nullable_subcolumns, from_types, to_types, paths] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t input_rows_count) + { + size_t tuple_size = to_types.size(); + auto flattened_column = flattenTuple(arguments.front().column); + const auto & column_tuple = assert_cast(*flattened_column); + + if (tuple_size != column_tuple.getColumns().size()) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Expected tuple with {} subcolumn, but got {} subcolumns", + tuple_size, column_tuple.getColumns().size()); + + auto res = ColumnObject::create(has_nullable_subcolumns); + for (size_t i = 0; i < tuple_size; ++i) + { + ColumnsWithTypeAndName element = {{column_tuple.getColumns()[i], from_types[i], "" }}; + auto converted_column = element_wrappers[i](element, to_types[i], nullable_source, input_rows_count); + res->addSubcolumn(paths[i], converted_column->assumeMutable()); + } + + return res; + }; + } + + WrapperType createMapToObjectWrapper(const DataTypeMap & from_map, bool has_nullable_subcolumns) const + { + auto key_value_types = from_map.getKeyValueTypes(); + + if (!isStringOrFixedString(key_value_types[0])) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Cast to Object from Map can be performed only from Map " + "with String or FixedString key. Got: {}", from_map.getName()); + + const auto & value_type = key_value_types[1]; + auto to_value_type = value_type; + + if (!has_nullable_subcolumns && value_type->isNullable()) + to_value_type = removeNullable(value_type); + + if (has_nullable_subcolumns && !value_type->isNullable()) + to_value_type = makeNullable(value_type); + + DataTypes to_key_value_types{std::make_shared(), std::move(to_value_type)}; + auto element_wrappers = getElementWrappers(key_value_types, to_key_value_types); + + return [has_nullable_subcolumns, element_wrappers, key_value_types, to_key_value_types] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t) -> ColumnPtr + { + const auto & column_map = assert_cast(*arguments.front().column); + const auto & offsets = column_map.getNestedColumn().getOffsets(); + auto key_value_columns = column_map.getNestedData().getColumnsCopy(); + + for (size_t i = 0; i < 2; ++i) + { + ColumnsWithTypeAndName element{{key_value_columns[i], key_value_types[i], ""}}; + key_value_columns[i] = element_wrappers[i](element, to_key_value_types[i], nullable_source, key_value_columns[i]->size()); + } + + const auto & key_column_str = assert_cast(*key_value_columns[0]); + const auto & value_column = *key_value_columns[1]; + + using SubcolumnsMap = HashMap; + SubcolumnsMap subcolumns; + + for (size_t row = 0; row < offsets.size(); ++row) + { + for (size_t i = offsets[static_cast(row) - 1]; i < offsets[row]; ++i) + { + auto ref = key_column_str.getDataAt(i); + + bool inserted; + SubcolumnsMap::LookupResult it; + subcolumns.emplace(ref, it, inserted); + auto & subcolumn = it->getMapped(); + + if (inserted) + subcolumn = value_column.cloneEmpty()->cloneResized(row); + + /// Map can have duplicated keys. We insert only first one. + if (subcolumn->size() == row) + subcolumn->insertFrom(value_column, i); + } + + /// Insert default values for keys missed in current row. + for (const auto & [_, subcolumn] : subcolumns) + if (subcolumn->size() == row) + subcolumn->insertDefault(); + } + + auto column_object = ColumnObject::create(has_nullable_subcolumns); + for (auto && [key, subcolumn] : subcolumns) + { + PathInData path(key.toView()); + column_object->addSubcolumn(path, std::move(subcolumn)); + } + + return column_object; + }; + } + + WrapperType createObjectWrapper(const DataTypePtr & from_type, const DataTypeObject * to_type) const + { + if (const auto * from_tuple = checkAndGetDataType(from_type.get())) + { + return createTupleToObjectWrapper(*from_tuple, to_type->hasNullableSubcolumns()); + } + else if (const auto * from_map = checkAndGetDataType(from_type.get())) + { + return createMapToObjectWrapper(*from_map, to_type->hasNullableSubcolumns()); + } + else if (checkAndGetDataType(from_type.get())) + { + return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count) + { + auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable(); + res->finalize(); + return res; + }; + } + else if (checkAndGetDataType(from_type.get())) + { + return [is_nullable = to_type->hasNullableSubcolumns()] (ColumnsWithTypeAndName & arguments, const DataTypePtr & , const ColumnNullable * , size_t) -> ColumnPtr + { + const auto & column_object = assert_cast(*arguments.front().column); + auto res = ColumnObject::create(is_nullable); + for (size_t i = 0; i < column_object.size(); i++) + res->insert(column_object[i]); + + res->finalize(); + return res; + }; + } + + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Cast to Object can be performed only from flatten named Tuple, Map or String. Got: {}", from_type->getName()); + } + + WrapperType createVariantToVariantWrapper(const DataTypeVariant & from_variant, const DataTypeVariant & to_variant) const + { + /// We support only extension of variant type, so, only new types can be added. + /// For example: Variant(T1, T2) -> Variant(T1, T2, T3) is supported, but Variant(T1, T2) -> Variant(T1, T3) is not supported. + /// We want to extend Variant type for free without rewriting the data, but we sort data types inside Variant during type creation + /// (we do it because we want Variant(T1, T2) to be the same as Variant(T2, T1)), but after extension the order of variant types + /// (and so their discriminators) can be different. For example: Variant(T1, T3) -> Variant(T1, T2, T3). + /// To avoid full rewrite of discriminators column, ColumnVariant supports it's local order of variant columns (and so local + /// discriminators) and stores mapping global order -> local order. + /// So, to extend Variant with new types for free, we should keep old local order for old variants, append new variants and change + /// mapping global order -> local order according to the new global order. + + /// Create map (new variant type) -> (it's global discriminator in new order). + const auto & new_variants = to_variant.getVariants(); + std::unordered_map new_variant_types_to_new_global_discriminator; + new_variant_types_to_new_global_discriminator.reserve(new_variants.size()); + for (size_t i = 0; i != new_variants.size(); ++i) + new_variant_types_to_new_global_discriminator[new_variants[i]->getName()] = i; + + /// Create set of old variant types. + const auto & old_variants = from_variant.getVariants(); + std::unordered_map old_variant_types_to_old_global_discriminator; + old_variant_types_to_old_global_discriminator.reserve(old_variants.size()); + for (size_t i = 0; i != old_variants.size(); ++i) + old_variant_types_to_old_global_discriminator[old_variants[i]->getName()] = i; + + /// Check that the set of old variants types is a subset of new variant types and collect new global discriminator for each old global discriminator. + std::unordered_map old_global_discriminator_to_new; + old_global_discriminator_to_new.reserve(old_variants.size()); + for (const auto & [old_variant_type, old_discriminator] : old_variant_types_to_old_global_discriminator) + { + auto it = new_variant_types_to_new_global_discriminator.find(old_variant_type); + if (it == new_variant_types_to_new_global_discriminator.end()) + throw Exception( + ErrorCodes::CANNOT_CONVERT_TYPE, + "Cannot convert type {} to {}. Conversion between Variant types is allowed only when new Variant type is an extension " + "of an initial one", from_variant.getName(), to_variant.getName()); + old_global_discriminator_to_new[old_discriminator] = it->second; + } + + /// Collect variant types and their global discriminators that should be added to the old Variant to get the new Variant. + std::vector> variant_types_and_discriminators_to_add; + variant_types_and_discriminators_to_add.reserve(new_variants.size() - old_variants.size()); + for (size_t i = 0; i != new_variants.size(); ++i) + { + if (!old_variant_types_to_old_global_discriminator.contains(new_variants[i]->getName())) + variant_types_and_discriminators_to_add.emplace_back(new_variants[i], i); + } + + return [old_global_discriminator_to_new, variant_types_and_discriminators_to_add] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr + { + const auto & column_variant = assert_cast(*arguments.front().column.get()); + size_t num_old_variants = column_variant.getNumVariants(); + Columns new_variant_columns; + new_variant_columns.reserve(num_old_variants + variant_types_and_discriminators_to_add.size()); + std::vector new_local_to_global_discriminators; + new_local_to_global_discriminators.reserve(num_old_variants + variant_types_and_discriminators_to_add.size()); + for (size_t i = 0; i != num_old_variants; ++i) + { + new_variant_columns.push_back(column_variant.getVariantPtrByLocalDiscriminator(i)); + new_local_to_global_discriminators.push_back(old_global_discriminator_to_new.at(column_variant.globalDiscriminatorByLocal(i))); + } + + for (const auto & [new_variant_type, new_global_discriminator] : variant_types_and_discriminators_to_add) + { + new_variant_columns.push_back(new_variant_type->createColumn()); + new_local_to_global_discriminators.push_back(new_global_discriminator); + } + + return ColumnVariant::create(column_variant.getLocalDiscriminatorsPtr(), column_variant.getOffsetsPtr(), new_variant_columns, new_local_to_global_discriminators); + }; + } + + WrapperType createVariantToColumnWrapper(const DataTypeVariant & from_variant, const DataTypePtr & to_type) const + { + const auto & variant_types = from_variant.getVariants(); + std::vector variant_wrappers; + variant_wrappers.reserve(variant_types.size()); + + /// Create conversion wrapper for each variant. + for (const auto & variant_type : variant_types) + variant_wrappers.push_back(prepareUnpackDictionaries(variant_type, to_type)); + + return [variant_wrappers, variant_types, to_type] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr + { + const auto & column_variant = assert_cast(*arguments.front().column.get()); + + /// First, cast each variant to the result type. + std::vector casted_variant_columns; + casted_variant_columns.reserve(variant_types.size()); + for (size_t i = 0; i != variant_types.size(); ++i) + { + auto variant_col = column_variant.getVariantPtrByLocalDiscriminator(i); + ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], "" }}; + const auto & variant_wrapper = variant_wrappers[column_variant.globalDiscriminatorByLocal(i)]; + casted_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_col->size())); + } + + /// Second, construct resulting column from casted variant columns according to discriminators. + const auto & local_discriminators = column_variant.getLocalDiscriminators(); + auto res = result_type->createColumn(); + res->reserve(input_rows_count); + for (size_t i = 0; i != input_rows_count; ++i) + { + auto local_discr = local_discriminators[i]; + if (local_discr == ColumnVariant::NULL_DISCRIMINATOR) + res->insertDefault(); + else + res->insertFrom(*casted_variant_columns[local_discr], column_variant.offsetAt(i)); + } + + return res; + }; + } + + static ColumnPtr createVariantFromDescriptorsAndOneNonEmptyVariant(const DataTypes & variant_types, const ColumnPtr & discriminators, const ColumnPtr & variant, ColumnVariant::Discriminator variant_discr) + { + Columns variants; + variants.reserve(variant_types.size()); + for (size_t i = 0; i != variant_types.size(); ++i) + { + if (i == variant_discr) + variants.emplace_back(variant); + else + variants.push_back(variant_types[i]->createColumn()); + } + + return ColumnVariant::create(discriminators, variants); + } + + WrapperType createColumnToVariantWrapper(const DataTypePtr & from_type, const DataTypeVariant & to_variant) const + { + /// We allow converting NULL to Variant(...) as Variant can store NULLs. + if (from_type->onlyNull()) + { + return [](ColumnsWithTypeAndName &, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr + { + auto result_column = result_type->createColumn(); + result_column->insertManyDefaults(input_rows_count); + return result_column; + }; + } + + auto variant_discr_opt = to_variant.tryGetVariantDiscriminator(*removeNullableOrLowCardinalityNullable(from_type)); + if (!variant_discr_opt) + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert type {} to {}. Conversion to Variant allowed only for types from this Variant", from_type->getName(), to_variant.getName()); + + return [variant_discr = *variant_discr_opt] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t) -> ColumnPtr + { + const auto & result_variant_type = assert_cast(*result_type); + const auto & variant_types = result_variant_type.getVariants(); + if (const ColumnNullable * col_nullable = typeid_cast(arguments.front().column.get())) + { + const auto & column = col_nullable->getNestedColumnPtr(); + const auto & null_map = col_nullable->getNullMapData(); + IColumn::Filter filter; + filter.reserve(column->size()); + auto discriminators = ColumnVariant::ColumnDiscriminators::create(); + auto & discriminators_data = discriminators->getData(); + discriminators_data.reserve(column->size()); + size_t variant_size_hint = 0; + for (size_t i = 0; i != column->size(); ++i) + { + if (null_map[i]) + { + discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR); + filter.push_back(0); + } + else + { + discriminators_data.push_back(variant_discr); + filter.push_back(1); + ++variant_size_hint; + } + } + + ColumnPtr variant_column; + /// If there were no NULLs, just use the column. + if (variant_size_hint == column->size()) + variant_column = column; + /// Otherwise we should use filtered column. + else + variant_column = column->filter(filter, variant_size_hint); + return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), variant_column, variant_discr); + } + else if (isColumnLowCardinalityNullable(*arguments.front().column)) + { + const auto & column = arguments.front().column; + + /// Variant column cannot have LowCardinality(Nullable(...)) variant, as Variant column stores NULLs itself. + /// We should create a null-map, insert NULL_DISCRIMINATOR on NULL values and filter initial column. + const auto & col_lc = assert_cast(*column); + const auto & indexes = col_lc.getIndexes(); + auto null_index = col_lc.getDictionary().getNullValueIndex(); + IColumn::Filter filter; + filter.reserve(col_lc.size()); + auto discriminators = ColumnVariant::ColumnDiscriminators::create(); + auto & discriminators_data = discriminators->getData(); + discriminators_data.reserve(col_lc.size()); + size_t variant_size_hint = 0; + for (size_t i = 0; i != col_lc.size(); ++i) + { + if (indexes.getUInt(i) == null_index) + { + discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR); + filter.push_back(0); + } + else + { + discriminators_data.push_back(variant_discr); + filter.push_back(1); + ++variant_size_hint; + } + } + + MutableColumnPtr variant_column; + /// If there were no NULLs, we can just clone the column. + if (variant_size_hint == col_lc.size()) + variant_column = IColumn::mutate(column); + /// Otherwise we should filter column. + else + variant_column = column->filter(filter, variant_size_hint)->assumeMutable(); + + assert_cast(*variant_column).nestedRemoveNullable(); + return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), std::move(variant_column), variant_discr); + } + else + { + const auto & column = arguments.front().column; + auto discriminators = ColumnVariant::ColumnDiscriminators::create(); + discriminators->getData().resize_fill(column->size(), variant_discr); + return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), column, variant_discr); + } + }; + } + + /// Wrapper for conversion to/from Variant type + WrapperType createVariantWrapper(const DataTypePtr & from_type, const DataTypePtr & to_type) const + { + if (const auto * from_variant = checkAndGetDataType(from_type.get())) + { + if (const auto * to_variant = checkAndGetDataType(to_type.get())) + return createVariantToVariantWrapper(*from_variant, *to_variant); + + return createVariantToColumnWrapper(*from_variant, to_type); + } + + return createColumnToVariantWrapper(from_type, assert_cast(*to_type)); + } + + template + WrapperType createEnumWrapper(const DataTypePtr & from_type, const DataTypeEnum * to_type) const + { + using EnumType = DataTypeEnum; + using Function = typename FunctionTo::Type; + + if (const auto * from_enum8 = checkAndGetDataType(from_type.get())) + checkEnumToEnumConversion(from_enum8, to_type); + else if (const auto * from_enum16 = checkAndGetDataType(from_type.get())) + checkEnumToEnumConversion(from_enum16, to_type); + + if (checkAndGetDataType(from_type.get())) + return createStringToEnumWrapper(); + else if (checkAndGetDataType(from_type.get())) + return createStringToEnumWrapper(); + else if (isNativeNumber(from_type) || isEnum(from_type)) + { + auto function = Function::create(context); + return createFunctionAdaptor(function, from_type); + } + else + { + if (cast_type == CastType::accurateOrNull) + return createToNullableColumnWrapper(); + else + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", + from_type->getName(), to_type->getName()); + } + } + + template + void checkEnumToEnumConversion(const EnumTypeFrom * from_type, const EnumTypeTo * to_type) const + { + const auto & from_values = from_type->getValues(); + const auto & to_values = to_type->getValues(); + + using ValueType = std::common_type_t; + using NameValuePair = std::pair; + using EnumValues = std::vector; + + EnumValues name_intersection; + std::set_intersection(std::begin(from_values), std::end(from_values), + std::begin(to_values), std::end(to_values), std::back_inserter(name_intersection), + [] (auto && from, auto && to) { return from.first < to.first; }); + + for (const auto & name_value : name_intersection) + { + const auto & old_value = name_value.second; + const auto & new_value = to_type->getValue(name_value.first); + if (old_value != new_value) + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Enum conversion changes value for element '{}' from {} to {}", + name_value.first, toString(old_value), toString(new_value)); + } + } + + template + WrapperType createStringToEnumWrapper() const + { + const char * function_name = cast_name; + return [function_name] ( + ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, const ColumnNullable * nullable_col, size_t /*input_rows_count*/) + { + const auto & first_col = arguments.front().column.get(); + const auto & result_type = typeid_cast(*res_type); + + const ColumnStringType * col = typeid_cast(first_col); + + if (col && nullable_col && nullable_col->size() != col->size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnNullable is not compatible with original"); + + if (col) + { + const auto size = col->size(); + + auto res = result_type.createColumn(); + auto & out_data = static_cast(*res).getData(); + out_data.resize(size); + + auto default_enum_value = result_type.getValues().front().second; + + if (nullable_col) + { + for (size_t i = 0; i < size; ++i) + { + if (!nullable_col->isNullAt(i)) + out_data[i] = result_type.getValue(col->getDataAt(i)); + else + out_data[i] = default_enum_value; + } + } + else + { + for (size_t i = 0; i < size; ++i) + out_data[i] = result_type.getValue(col->getDataAt(i)); + } + + return res; + } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected column {} as first argument of function {}", + first_col->getName(), function_name); + }; + } + + template + WrapperType createEnumToStringWrapper() const + { + const char * function_name = cast_name; + return [function_name] ( + ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, const ColumnNullable * nullable_col, size_t /*input_rows_count*/) + { + using ColumnEnumType = EnumType::ColumnType; + + const auto & first_col = arguments.front().column.get(); + const auto & first_type = arguments.front().type.get(); + + const ColumnEnumType * enum_col = typeid_cast(first_col); + const EnumType * enum_type = typeid_cast(first_type); + + if (enum_col && nullable_col && nullable_col->size() != enum_col->size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnNullable is not compatible with original"); + + if (enum_col && enum_type) + { + const auto size = enum_col->size(); + const auto & enum_data = enum_col->getData(); + + auto res = res_type->createColumn(); + + if (nullable_col) + { + for (size_t i = 0; i < size; ++i) + { + if (!nullable_col->isNullAt(i)) + { + const auto & value = enum_type->getNameForValue(enum_data[i]); + res->insertData(value.data, value.size); + } + else + res->insertDefault(); + } + } + else + { + for (size_t i = 0; i < size; ++i) + { + const auto & value = enum_type->getNameForValue(enum_data[i]); + res->insertData(value.data, value.size); + } + } + + return res; + } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected column {} as first argument of function {}", + first_col->getName(), function_name); + }; + } + + static WrapperType createIdentityWrapper(const DataTypePtr &) + { + return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) + { + return arguments.front().column; + }; + } + + static WrapperType createNothingWrapper(const IDataType * to_type) + { + ColumnPtr res = to_type->createColumnConstWithDefaultValue(1); + return [res] (ColumnsWithTypeAndName &, const DataTypePtr &, const ColumnNullable *, size_t input_rows_count) + { + /// Column of Nothing type is trivially convertible to any other column + return res->cloneResized(input_rows_count)->convertToFullColumnIfConst(); + }; + } + + WrapperType prepareUnpackDictionaries(const DataTypePtr & from_type, const DataTypePtr & to_type) const + { + /// Conversion from/to Variant data type is processed in a special way. + /// We don't need to remove LowCardinality/Nullable. + if (isVariant(to_type) || isVariant(from_type)) + return createVariantWrapper(from_type, to_type); + + const auto * from_low_cardinality = typeid_cast(from_type.get()); + const auto * to_low_cardinality = typeid_cast(to_type.get()); + const auto & from_nested = from_low_cardinality ? from_low_cardinality->getDictionaryType() : from_type; + const auto & to_nested = to_low_cardinality ? to_low_cardinality->getDictionaryType() : to_type; + + if (from_type->onlyNull()) + { + if (!to_nested->isNullable() && !isVariant(to_type)) + { + if (cast_type == CastType::accurateOrNull) + { + return createToNullableColumnWrapper(); + } + else + { + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert NULL to a non-nullable type"); + } + } + + return [](ColumnsWithTypeAndName &, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) + { + return result_type->createColumnConstWithDefaultValue(input_rows_count)->convertToFullColumnIfConst(); + }; + } + + bool skip_not_null_check = false; + + if (from_low_cardinality && from_nested->isNullable() && !to_nested->isNullable()) + /// Disable check for dictionary. Will check that column doesn't contain NULL in wrapper below. + skip_not_null_check = true; + + auto wrapper = prepareRemoveNullable(from_nested, to_nested, skip_not_null_check); + if (!from_low_cardinality && !to_low_cardinality) + return wrapper; + + return [wrapper, from_low_cardinality, to_low_cardinality, skip_not_null_check] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count) -> ColumnPtr + { + ColumnsWithTypeAndName args = {arguments[0]}; + auto & arg = args.front(); + auto res_type = result_type; + + ColumnPtr converted_column; + + ColumnPtr res_indexes; + /// For some types default can't be casted (for example, String to Int). In that case convert column to full. + bool src_converted_to_full_column = false; + + { + auto tmp_rows_count = input_rows_count; + + if (to_low_cardinality) + res_type = to_low_cardinality->getDictionaryType(); + + if (from_low_cardinality) + { + const auto & col_low_cardinality = typeid_cast(*arguments[0].column); + + if (skip_not_null_check && col_low_cardinality.containsNull()) + throw Exception(ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN, "Cannot convert NULL value to non-Nullable type"); + + arg.column = col_low_cardinality.getDictionary().getNestedColumn(); + arg.type = from_low_cardinality->getDictionaryType(); + + /// TODO: Make map with defaults conversion. + src_converted_to_full_column = !removeNullable(arg.type)->equals(*removeNullable(res_type)); + if (src_converted_to_full_column) + arg.column = arg.column->index(col_low_cardinality.getIndexes(), 0); + else + res_indexes = col_low_cardinality.getIndexesPtr(); + + tmp_rows_count = arg.column->size(); + } + + /// Perform the requested conversion. + converted_column = wrapper(args, res_type, nullable_source, tmp_rows_count); + } + + if (to_low_cardinality) + { + auto res_column = to_low_cardinality->createColumn(); + auto & col_low_cardinality = typeid_cast(*res_column); + + if (from_low_cardinality && !src_converted_to_full_column) + col_low_cardinality.insertRangeFromDictionaryEncodedColumn(*converted_column, *res_indexes); + else + col_low_cardinality.insertRangeFromFullColumn(*converted_column, 0, converted_column->size()); + + return res_column; + } + else if (!src_converted_to_full_column) + return converted_column->index(*res_indexes, 0); + else + return converted_column; + }; + } + + WrapperType prepareRemoveNullable(const DataTypePtr & from_type, const DataTypePtr & to_type, bool skip_not_null_check) const + { + /// Determine whether pre-processing and/or post-processing must take place during conversion. + + bool source_is_nullable = from_type->isNullable(); + bool result_is_nullable = to_type->isNullable(); + + auto wrapper = prepareImpl(removeNullable(from_type), removeNullable(to_type), result_is_nullable); + + if (result_is_nullable) + { + return [wrapper, source_is_nullable] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr + { + /// Create a temporary columns on which to perform the operation. + const auto & nullable_type = static_cast(*result_type); + const auto & nested_type = nullable_type.getNestedType(); + + ColumnsWithTypeAndName tmp_args; + if (source_is_nullable) + tmp_args = createBlockWithNestedColumns(arguments); + else + tmp_args = arguments; + + const ColumnNullable * nullable_source = nullptr; + + /// Add original ColumnNullable for createStringToEnumWrapper() + if (source_is_nullable) + { + if (arguments.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid number of arguments"); + nullable_source = typeid_cast(arguments.front().column.get()); + } + + /// Perform the requested conversion. + auto tmp_res = wrapper(tmp_args, nested_type, nullable_source, input_rows_count); + + /// May happen in fuzzy tests. For debug purpose. + if (!tmp_res) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Couldn't convert {} to {} in prepareRemoveNullable wrapper.", + arguments[0].type->getName(), nested_type->getName()); + + return wrapInNullable(tmp_res, arguments, nested_type, input_rows_count); + }; + } + else if (source_is_nullable) + { + /// Conversion from Nullable to non-Nullable. + + return [wrapper, skip_not_null_check] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr + { + auto tmp_args = createBlockWithNestedColumns(arguments); + auto nested_type = removeNullable(result_type); + + /// Check that all values are not-NULL. + /// Check can be skipped in case if LowCardinality dictionary is transformed. + /// In that case, correctness will be checked beforehand. + if (!skip_not_null_check) + { + const auto & col = arguments[0].column; + const auto & nullable_col = assert_cast(*col); + const auto & null_map = nullable_col.getNullMapData(); + + if (!memoryIsZero(null_map.data(), 0, null_map.size())) + throw Exception(ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN, "Cannot convert NULL value to non-Nullable type"); + } + const ColumnNullable * nullable_source = typeid_cast(arguments.front().column.get()); + return wrapper(tmp_args, nested_type, nullable_source, input_rows_count); + }; + } + else + return wrapper; + } + + /// 'from_type' and 'to_type' are nested types in case of Nullable. + /// 'requested_result_is_nullable' is true if CAST to Nullable type is requested. + WrapperType prepareImpl(const DataTypePtr & from_type, const DataTypePtr & to_type, bool requested_result_is_nullable) const + { + if (isUInt8(from_type) && isBool(to_type)) + return createUInt8ToBoolWrapper(from_type, to_type); + + /// We can cast IPv6 into IPv6, IPv4 into IPv4, but we should not allow to cast FixedString(16) into IPv6 as part of identity cast + bool safe_convert_custom_types = true; + + if (const auto * to_type_custom_name = to_type->getCustomName()) + safe_convert_custom_types = from_type->getCustomName() && from_type->getCustomName()->getName() == to_type_custom_name->getName(); + else if (const auto * from_type_custom_name = from_type->getCustomName()) + safe_convert_custom_types = to_type->getCustomName() && from_type_custom_name->getName() == to_type->getCustomName()->getName(); + + if (from_type->equals(*to_type) && safe_convert_custom_types) + { + /// We can only use identity conversion for DataTypeAggregateFunction when they are strictly equivalent. + if (typeid_cast(from_type.get())) + { + if (DataTypeAggregateFunction::strictEquals(from_type, to_type)) + return createIdentityWrapper(from_type); + } + else + return createIdentityWrapper(from_type); + } + else if (WhichDataType(from_type).isNothing()) + return createNothingWrapper(to_type.get()); + + WrapperType ret; + + auto make_default_wrapper = [&](const auto & types) -> bool + { + using Types = std::decay_t; + using ToDataType = typename Types::LeftType; + + if constexpr (is_any_of) + { + ret = createWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); + return true; + } + if constexpr (std::is_same_v) + { + if (isBool(to_type)) + ret = createBoolWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); + else + ret = createWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); + return true; + } + if constexpr ( + std::is_same_v || + std::is_same_v) + { + ret = createEnumWrapper(from_type, checkAndGetDataType(to_type.get())); + return true; + } + if constexpr (is_any_of, DataTypeDecimal, + DataTypeDecimal, DataTypeDecimal, + DataTypeDateTime64>) + { + ret = createDecimalWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); + return true; + } + + return false; + }; + + bool cast_ipv4_ipv6_default_on_conversion_error_value = context && context->getSettingsRef().cast_ipv4_ipv6_default_on_conversion_error; + bool input_format_ipv4_default_on_conversion_error_value = context && context->getSettingsRef().input_format_ipv4_default_on_conversion_error; + bool input_format_ipv6_default_on_conversion_error_value = context && context->getSettingsRef().input_format_ipv6_default_on_conversion_error; + + auto make_custom_serialization_wrapper = [&, cast_ipv4_ipv6_default_on_conversion_error_value, input_format_ipv4_default_on_conversion_error_value, input_format_ipv6_default_on_conversion_error_value](const auto & types) -> bool + { + using Types = std::decay_t; + using ToDataType = typename Types::RightType; + using FromDataType = typename Types::LeftType; + + if constexpr (WhichDataType(FromDataType::type_id).isStringOrFixedString()) + { + if constexpr (std::is_same_v) + { + ret = [cast_ipv4_ipv6_default_on_conversion_error_value, + input_format_ipv4_default_on_conversion_error_value, + requested_result_is_nullable]( + ColumnsWithTypeAndName & arguments, + const DataTypePtr & result_type, + const ColumnNullable * column_nullable, + size_t) -> ColumnPtr + { + if (!WhichDataType(result_type).isIPv4()) + throw Exception(ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv4", result_type->getName()); + + const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; + if (requested_result_is_nullable) + return convertToIPv4(arguments[0].column, null_map); + else if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv4_default_on_conversion_error_value) + return convertToIPv4(arguments[0].column, null_map); + else + return convertToIPv4(arguments[0].column, null_map); + }; + + return true; + } + + if constexpr (std::is_same_v) + { + ret = [cast_ipv4_ipv6_default_on_conversion_error_value, + input_format_ipv6_default_on_conversion_error_value, + requested_result_is_nullable]( + ColumnsWithTypeAndName & arguments, + const DataTypePtr & result_type, + const ColumnNullable * column_nullable, + size_t) -> ColumnPtr + { + if (!WhichDataType(result_type).isIPv6()) + throw Exception( + ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv6", result_type->getName()); + + const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; + if (requested_result_is_nullable) + return convertToIPv6(arguments[0].column, null_map); + else if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv6_default_on_conversion_error_value) + return convertToIPv6(arguments[0].column, null_map); + else + return convertToIPv6(arguments[0].column, null_map); + }; + + return true; + } + + if (to_type->getCustomSerialization() && to_type->getCustomName()) + { + ret = [requested_result_is_nullable]( + ColumnsWithTypeAndName & arguments, + const DataTypePtr & result_type, + const ColumnNullable * column_nullable, + size_t input_rows_count) -> ColumnPtr + { + auto wrapped_result_type = result_type; + if (requested_result_is_nullable) + wrapped_result_type = makeNullable(result_type); + return ConvertImplGenericFromString::execute( + arguments, wrapped_result_type, column_nullable, input_rows_count); + }; + return true; + } + } + else if constexpr (WhichDataType(FromDataType::type_id).isIPv6() && WhichDataType(ToDataType::type_id).isIPv4()) + { + ret = [cast_ipv4_ipv6_default_on_conversion_error_value, requested_result_is_nullable]( + ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t) + -> ColumnPtr + { + if (!WhichDataType(result_type).isIPv4()) + throw Exception( + ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv4", result_type->getName()); + + const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; + if (requested_result_is_nullable) + return convertIPv6ToIPv4(arguments[0].column, null_map); + else if (cast_ipv4_ipv6_default_on_conversion_error_value) + return convertIPv6ToIPv4(arguments[0].column, null_map); + else + return convertIPv6ToIPv4(arguments[0].column, null_map); + }; + + return true; + } + + if constexpr (WhichDataType(ToDataType::type_id).isStringOrFixedString()) + { + if constexpr (WhichDataType(FromDataType::type_id).isEnum()) + { + ret = createEnumToStringWrapper(); + return true; + } + else if (from_type->getCustomSerialization()) + { + ret = [](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr + { + return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); + }; + return true; + } + } + + return false; + }; + + if (callOnTwoTypeIndexes(from_type->getTypeId(), to_type->getTypeId(), make_custom_serialization_wrapper)) + return ret; + + if (callOnIndexAndDataType(to_type->getTypeId(), make_default_wrapper)) + return ret; + + switch (to_type->getTypeId()) + { + case TypeIndex::String: + return createStringWrapper(from_type); + case TypeIndex::FixedString: + return createFixedStringWrapper(from_type, checkAndGetDataType(to_type.get())->getN()); + case TypeIndex::Array: + return createArrayWrapper(from_type, static_cast(*to_type)); + case TypeIndex::Tuple: + return createTupleWrapper(from_type, checkAndGetDataType(to_type.get())); + case TypeIndex::Map: + return createMapWrapper(from_type, checkAndGetDataType(to_type.get())); + case TypeIndex::Object: + return createObjectWrapper(from_type, checkAndGetDataType(to_type.get())); + case TypeIndex::AggregateFunction: + return createAggregateFunctionWrapper(from_type, checkAndGetDataType(to_type.get())); + case TypeIndex::Interval: + return createIntervalWrapper(from_type, checkAndGetDataType(to_type.get())->getKind()); + default: + break; + } + + if (cast_type == CastType::accurateOrNull) + return createToNullableColumnWrapper(); + else + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", + from_type->getName(), to_type->getName()); + } +}; + +} + + +FunctionBasePtr createFunctionBaseCast( + ContextPtr context, + const char * name, + const ColumnsWithTypeAndName & arguments, + const DataTypePtr & return_type, + std::optional diagnostic, + CastType cast_type) +{ + DataTypes data_types(arguments.size()); + + for (size_t i = 0; i < arguments.size(); ++i) + data_types[i] = arguments[i].type; + + FunctionCast::MonotonicityForRange monotonicity; + + if (isEnum(arguments.front().type) + && castTypeToEither(return_type.get(), [&](auto & type) + { + monotonicity = FunctionTo>::Type::Monotonic::get; + return true; + })) + { + } + else if (castTypeToEither< + DataTypeUInt8, DataTypeUInt16, DataTypeUInt32, DataTypeUInt64, DataTypeUInt128, DataTypeUInt256, + DataTypeInt8, DataTypeInt16, DataTypeInt32, DataTypeInt64, DataTypeInt128, DataTypeInt256, + DataTypeFloat32, DataTypeFloat64, + DataTypeDate, DataTypeDate32, DataTypeDateTime, + DataTypeString>(return_type.get(), [&](auto & type) + { + monotonicity = FunctionTo>::Type::Monotonic::get; + return true; + })) + { + } + + return std::make_unique(context, name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type); +} + REGISTER_FUNCTION(Conversion) { factory.registerFunction(); @@ -32,7 +4843,7 @@ REGISTER_FUNCTION(Conversion) /// MySQL compatibility alias. Cannot be registered as alias, /// because we don't want it to be normalized to toDate in queries, /// otherwise CREATE DICTIONARY query breaks. - factory.registerFunction("DATE", {}, FunctionFactory::CaseInsensitive); + factory.registerFunction("DATE", &FunctionToDate::create, {}, FunctionFactory::CaseInsensitive); factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h deleted file mode 100644 index f338af28240..00000000000 --- a/src/Functions/FunctionsConversion.h +++ /dev/null @@ -1,4990 +0,0 @@ -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ATTEMPT_TO_READ_AFTER_EOF; - extern const int CANNOT_PARSE_NUMBER; - extern const int CANNOT_READ_ARRAY_FROM_TEXT; - extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; - extern const int CANNOT_PARSE_QUOTED_STRING; - extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; - extern const int CANNOT_PARSE_DATE; - extern const int CANNOT_PARSE_DATETIME; - extern const int CANNOT_PARSE_TEXT; - extern const int CANNOT_PARSE_UUID; - extern const int CANNOT_PARSE_IPV4; - extern const int CANNOT_PARSE_IPV6; - extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; - extern const int LOGICAL_ERROR; - extern const int TYPE_MISMATCH; - extern const int CANNOT_CONVERT_TYPE; - extern const int ILLEGAL_COLUMN; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int NOT_IMPLEMENTED; - extern const int CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN; - extern const int CANNOT_PARSE_BOOL; - extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; -} - -/** Type conversion functions. - * toType - conversion in "natural way"; - */ - -inline UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column) -{ - const auto * arg_type = named_column.type.get(); - bool ok = checkAndGetDataType(arg_type) - || checkAndGetDataType(arg_type) - || checkAndGetDataType(arg_type) - || checkAndGetDataType(arg_type); - if (!ok) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of toDecimal() scale {}", named_column.type->getName()); - - Field field; - named_column.column->get(0, field); - return static_cast(field.get()); -} - -/// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type. -struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; }; - -struct AccurateConvertStrategyAdditions -{ - UInt32 scale { 0 }; -}; - -struct AccurateOrNullConvertStrategyAdditions -{ - UInt32 scale { 0 }; -}; - - -struct ConvertDefaultBehaviorTag {}; -struct ConvertReturnNullOnErrorTag {}; -struct ConvertReturnZeroOnErrorTag {}; - -/** Conversion of number types to each other, enums to numbers, dates and datetimes to numbers and back: done by straight assignment. - * (Date is represented internally as number of days from some day; DateTime - as unix timestamp) - */ -template -struct ConvertImpl -{ - using FromFieldType = typename FromDataType::FieldType; - using ToFieldType = typename ToDataType::FieldType; - - template - static ColumnPtr NO_SANITIZE_UNDEFINED execute( - const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type [[maybe_unused]], size_t input_rows_count, - Additions additions [[maybe_unused]] = Additions()) - { - const ColumnWithTypeAndName & named_from = arguments[0]; - - using ColVecFrom = typename FromDataType::ColumnType; - using ColVecTo = typename ToDataType::ColumnType; - - if constexpr ((IsDataTypeDecimal || IsDataTypeDecimal) - && !(std::is_same_v || std::is_same_v)) - { - if constexpr (!IsDataTypeDecimalOrNumber || !IsDataTypeDecimalOrNumber) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - named_from.column->getName(), Name::name); - } - } - - if (const ColVecFrom * col_from = checkAndGetColumn(named_from.column.get())) - { - typename ColVecTo::MutablePtr col_to = nullptr; - - if constexpr (IsDataTypeDecimal) - { - UInt32 scale; - - if constexpr (std::is_same_v - || std::is_same_v) - { - scale = additions.scale; - } - else - { - scale = additions; - } - - col_to = ColVecTo::create(0, scale); - } - else - col_to = ColVecTo::create(); - - const auto & vec_from = col_from->getData(); - auto & vec_to = col_to->getData(); - vec_to.resize(input_rows_count); - - ColumnUInt8::MutablePtr col_null_map_to; - ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr; - if constexpr (std::is_same_v) - { - col_null_map_to = ColumnUInt8::create(input_rows_count, false); - vec_null_map_to = &col_null_map_to->getData(); - } - - bool result_is_bool = isBool(result_type); - for (size_t i = 0; i < input_rows_count; ++i) - { - if constexpr (std::is_same_v) - { - if (result_is_bool) - { - vec_to[i] = vec_from[i] != FromFieldType(0); - continue; - } - } - - if constexpr (std::is_same_v && std::is_same_v) - { - static_assert( - std::is_same_v, - "UInt128 and UUID types must be same"); - - vec_to[i].items[1] = vec_from[i].toUnderType().items[0]; - vec_to[i].items[0] = vec_from[i].toUnderType().items[1]; - - continue; - } - - if constexpr (std::is_same_v && std::is_same_v) - { - static_assert( - std::is_same_v, - "UInt128 and IPv6 types must be same"); - - vec_to[i].items[1] = std::byteswap(vec_from[i].toUnderType().items[0]); - vec_to[i].items[0] = std::byteswap(vec_from[i].toUnderType().items[1]); - - continue; - } - - if constexpr (std::is_same_v != std::is_same_v) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Conversion between numeric types and UUID is not supported. " - "Probably the passed UUID is unquoted"); - } - else if constexpr ( - (std::is_same_v != std::is_same_v) - && !(is_any_of || is_any_of) - ) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Conversion from {} to {} is not supported", - TypeName, TypeName); - } - else if constexpr (std::is_same_v != std::is_same_v && !(std::is_same_v || std::is_same_v)) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Conversion between numeric types and IPv6 is not supported. " - "Probably the passed IPv6 is unquoted"); - } - else - { - if constexpr (IsDataTypeDecimal || IsDataTypeDecimal) - { - if constexpr (std::is_same_v) - { - ToFieldType result; - bool convert_result = false; - - if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) - convert_result = tryConvertDecimals(vec_from[i], col_from->getScale(), col_to->getScale(), result); - else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) - convert_result = tryConvertFromDecimal(vec_from[i], col_from->getScale(), result); - else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) - convert_result = tryConvertToDecimal(vec_from[i], col_to->getScale(), result); - - if (convert_result) - vec_to[i] = result; - else - { - vec_to[i] = static_cast(0); - (*vec_null_map_to)[i] = true; - } - } - else - { - if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) - vec_to[i] = convertDecimals(vec_from[i], col_from->getScale(), col_to->getScale()); - else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) - vec_to[i] = convertFromDecimal(vec_from[i], col_from->getScale()); - else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) - vec_to[i] = convertToDecimal(vec_from[i], col_to->getScale()); - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Unsupported data type in conversion function"); - } - } - else - { - /// If From Data is Nan or Inf and we convert to integer type, throw exception - if constexpr (std::is_floating_point_v && !std::is_floating_point_v) - { - if (!isFinite(vec_from[i])) - { - if constexpr (std::is_same_v) - { - vec_to[i] = 0; - (*vec_null_map_to)[i] = true; - continue; - } - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Unexpected inf or nan to integer conversion"); - } - } - - if constexpr (std::is_same_v - || std::is_same_v) - { - bool convert_result = accurate::convertNumeric(vec_from[i], vec_to[i]); - - if (!convert_result) - { - if (std::is_same_v) - { - vec_to[i] = 0; - (*vec_null_map_to)[i] = true; - } - else - { - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Value in column {} cannot be safely converted into type {}", - named_from.column->getName(), result_type->getName()); - } - } - } - else - { - if constexpr (std::is_same_v && std::is_same_v) - { - const uint8_t ip4_cidr[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}; - const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); - if (!matchIPv6Subnet(src, ip4_cidr, 96)) - { - char addr[IPV6_MAX_TEXT_LENGTH + 1] {}; - char * paddr = addr; - formatIPv6(src, paddr); - - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "IPv6 {} in column {} is not in IPv4 mapping block", addr, named_from.column->getName()); - } - - uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); - if constexpr (std::endian::native == std::endian::little) - { - dst[0] = src[15]; - dst[1] = src[14]; - dst[2] = src[13]; - dst[3] = src[12]; - } - else - { - dst[0] = src[12]; - dst[1] = src[13]; - dst[2] = src[14]; - dst[3] = src[15]; - } - } - else if constexpr (std::is_same_v && std::is_same_v) - { - const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); - uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); - std::memset(dst, '\0', IPV6_BINARY_LENGTH); - dst[10] = dst[11] = 0xff; - - if constexpr (std::endian::native == std::endian::little) - { - dst[12] = src[3]; - dst[13] = src[2]; - dst[14] = src[1]; - dst[15] = src[0]; - } - else - { - dst[12] = src[0]; - dst[13] = src[1]; - dst[14] = src[2]; - dst[15] = src[3]; - } - } - else if constexpr (std::is_same_v && std::is_same_v) - vec_to[i] = static_cast(static_cast(vec_from[i])); - else if constexpr (std::is_same_v && (std::is_same_v || std::is_same_v)) - vec_to[i] = static_cast(vec_from[i] * DATE_SECONDS_PER_DAY); - else - vec_to[i] = static_cast(vec_from[i]); - } - } - } - } - - if constexpr (std::is_same_v) - return ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); - else - return col_to; - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - named_from.column->getName(), Name::name); - } -}; - -/** Conversion of DateTime to Date: throw off time component. - */ -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -/** Conversion of DateTime to Date32: throw off time component. - */ -template -struct ConvertImpl - : DateTimeTransformImpl {}; - -/** Conversion of Date to DateTime: adding 00:00:00 time component. - */ -template -struct ToDateTimeImpl -{ - static constexpr auto name = "toDateTime"; - - static UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (d > MAX_DATETIME_DAY_NUM) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Day number {} is out of bounds of type DateTime", d); - } - else if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) - { - if (d > MAX_DATETIME_DAY_NUM) - d = MAX_DATETIME_DAY_NUM; - } - return static_cast(time_zone.fromDayNum(DayNum(d))); - } - - static UInt32 execute(Int32 d, const DateLUTImpl & time_zone) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) - { - if (d < 0) - return 0; - else if (d > MAX_DATETIME_DAY_NUM) - d = MAX_DATETIME_DAY_NUM; - } - else if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (d < 0 || d > MAX_DATETIME_DAY_NUM) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type DateTime", d); - } - return static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); - } - - static UInt32 execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) - { - return dt; - } - - static UInt32 execute(Int64 dt64, const DateLUTImpl & /*time_zone*/) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Ignore) - return static_cast(dt64); - else - { - if (dt64 < 0 || dt64 >= MAX_DATETIME_TIMESTAMP) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) - return dt64 < 0 ? 0 : std::numeric_limits::max(); - else - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type DateTime", dt64); - } - else - return static_cast(dt64); - } - } -}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -/// Implementation of toDate function. - -template -struct ToDateTransform32Or64 -{ - static constexpr auto name = "toDate"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from > MAX_DATETIME_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); - } - /// if value is smaller (or equal) than maximum day value for Date, than treat it as day num, - /// otherwise treat it as unix timestamp. This is a bit weird, but we leave this behavior. - if (from <= DATE_LUT_MAX_DAY_NUM) - return from; - else - return time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); - } -}; - -/** Conversion of Date32 to Date. - */ -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ToDateTransform32Or64Signed -{ - static constexpr auto name = "toDate"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) - { - // TODO: decide narrow or extended range based on FromType - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < 0 || from > MAX_DATE_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); - } - else - { - if (from < 0) - return 0; - } - return (from <= DATE_LUT_MAX_DAY_NUM) - ? static_cast(from) - : time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATE_TIMESTAMP))); - } -}; - -template -struct ToDateTransform8Or16Signed -{ - static constexpr auto name = "toDate"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - if (from < 0) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); - else - return 0; - } - return from; - } -}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -/// Implementation of toDate32 function. - -template -struct ToDate32Transform32Or64 -{ - static constexpr auto name = "toDate32"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) - { - if (from < DATE_LUT_MAX_EXTEND_DAY_NUM) - return static_cast(from); - else - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type Date32", from); - } - return time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATETIME64_TIMESTAMP))); - } - } -}; - -template -struct ToDate32Transform32Or64Signed -{ - static constexpr auto name = "toDate32"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) - { - static const Int32 daynum_min_offset = -static_cast(time_zone.getDayNumOffsetEpoch()); - - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < daynum_min_offset || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type Date32", from); - } - - if (from < daynum_min_offset) - return daynum_min_offset; - - return (from < DATE_LUT_MAX_EXTEND_DAY_NUM) - ? static_cast(from) - : time_zone.toDayNum(std::min(time_t(Int64(from)), time_t(MAX_DATETIME64_TIMESTAMP))); - } -}; - -template -struct ToDate32Transform8Or16Signed -{ - static constexpr auto name = "toDate32"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - return from; - } -}; - -/** Special case of converting Int8, Int16, (U)Int32 or (U)Int64 (and also, for convenience, - * Float32, Float64) to Date. If the - * number is less than 65536, then it is treated as DayNum, and if it's greater or equals to 65536, - * then treated as unix timestamp. If the number exceeds UInt32, saturate to MAX_UINT32 then as DayNum. - * It's a bit illogical, as we actually have two functions in one. - * But allows to support frequent case, - * when user write toDate(UInt32), expecting conversion of unix timestamp to Date. - * (otherwise such usage would be frequent mistake). - */ -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - - -template -struct ToDateTimeTransform64 -{ - static constexpr auto name = "toDateTime"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from > MAX_DATETIME_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); - } - return static_cast(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); - } -}; - -template -struct ToDateTimeTransformSigned -{ - static constexpr auto name = "toDateTime"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - if (from < 0) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); - else - return 0; - } - return from; - } -}; - -template -struct ToDateTimeTransform64Signed -{ - static constexpr auto name = "toDateTime"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < 0 || from > MAX_DATETIME_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); - } - - if (from < 0) - return 0; - return static_cast(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); - } -}; - -/// Special case of converting Int8, Int16, Int32 or (U)Int64 (and also, for convenience, Float32, Float64) to DateTime. -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -/** Conversion of numeric to DateTime64 - */ - -template -struct ToDateTime64TransformUnsigned -{ - static constexpr auto name = "toDateTime64"; - - const DateTime64::NativeType scale_multiplier = 1; - - ToDateTime64TransformUnsigned(UInt32 scale = 0) /// NOLINT - : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) - {} - - NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); - else - return DecimalUtils::decimalFromComponentsWithMultiplier(from, 0, scale_multiplier); - } - else - return DecimalUtils::decimalFromComponentsWithMultiplier(std::min(from, MAX_DATETIME64_TIMESTAMP), 0, scale_multiplier); - } -}; -template -struct ToDateTime64TransformSigned -{ - static constexpr auto name = "toDateTime64"; - - const DateTime64::NativeType scale_multiplier = 1; - - ToDateTime64TransformSigned(UInt32 scale = 0) /// NOLINT - : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) - {} - - NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < MIN_DATETIME64_TIMESTAMP || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); - } - from = static_cast(std::max(from, MIN_DATETIME64_TIMESTAMP)); - from = static_cast(std::min(from, MAX_DATETIME64_TIMESTAMP)); - - return DecimalUtils::decimalFromComponentsWithMultiplier(from, 0, scale_multiplier); - } -}; -template -struct ToDateTime64TransformFloat -{ - static constexpr auto name = "toDateTime64"; - - const UInt32 scale = 1; - - ToDateTime64TransformFloat(UInt32 scale_ = 0) /// NOLINT - : scale(scale_) - {} - - NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < MIN_DATETIME64_TIMESTAMP || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); - } - - from = std::max(from, static_cast(MIN_DATETIME64_TIMESTAMP)); - from = std::min(from, static_cast(MAX_DATETIME64_TIMESTAMP)); - return convertToDecimal(from, scale); - } -}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - - -/** Conversion of DateTime64 to Date or DateTime: discards fractional part. - */ -template -struct FromDateTime64Transform -{ - static constexpr auto name = Transform::name; - - const DateTime64::NativeType scale_multiplier = 1; - - FromDateTime64Transform(UInt32 scale) /// NOLINT - : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) - {} - - auto execute(DateTime64::NativeType dt, const DateLUTImpl & time_zone) const - { - const auto c = DecimalUtils::splitWithScaleMultiplier(DateTime64(dt), scale_multiplier); - return Transform::execute(static_cast(c.whole), time_zone); - } -}; - -/** Conversion of DateTime64 to Date or DateTime: discards fractional part. - */ -template -struct ConvertImpl - : DateTimeTransformImpl>, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl>, false> {}; - -struct ToDateTime64Transform -{ - static constexpr auto name = "toDateTime64"; - - const DateTime64::NativeType scale_multiplier = 1; - - ToDateTime64Transform(UInt32 scale = 0) /// NOLINT - : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) - {} - - DateTime64::NativeType execute(UInt16 d, const DateLUTImpl & time_zone) const - { - const auto dt = ToDateTimeImpl<>::execute(d, time_zone); - return execute(dt, time_zone); - } - - DateTime64::NativeType execute(Int32 d, const DateLUTImpl & time_zone) const - { - Int64 dt = static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); - return DecimalUtils::decimalFromComponentsWithMultiplier(dt, 0, scale_multiplier); - } - - DateTime64::NativeType execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) const - { - return DecimalUtils::decimalFromComponentsWithMultiplier(dt, 0, scale_multiplier); - } -}; - -/** Conversion of Date or DateTime to DateTime64: add zero sub-second part. - */ -template -struct ConvertImpl - : DateTimeTransformImpl {}; - -template -struct ConvertImpl - : DateTimeTransformImpl {}; - -template -struct ConvertImpl - : DateTimeTransformImpl {}; - - -/** Transformation of numbers, dates, datetimes to strings: through formatting. - */ -template -struct FormatImpl -{ - template - static ReturnType execute(const typename DataType::FieldType x, WriteBuffer & wb, const DataType *, const DateLUTImpl *) - { - writeText(x, wb); - return ReturnType(true); - } -}; - -template <> -struct FormatImpl -{ - template - static ReturnType execute(const DataTypeDate::FieldType x, WriteBuffer & wb, const DataTypeDate *, const DateLUTImpl * time_zone) - { - writeDateText(DayNum(x), wb, *time_zone); - return ReturnType(true); - } -}; - -template <> -struct FormatImpl -{ - template - static ReturnType execute(const DataTypeDate32::FieldType x, WriteBuffer & wb, const DataTypeDate32 *, const DateLUTImpl * time_zone) - { - writeDateText(ExtendedDayNum(x), wb, *time_zone); - return ReturnType(true); - } -}; - -template <> -struct FormatImpl -{ - template - static ReturnType execute(const DataTypeDateTime::FieldType x, WriteBuffer & wb, const DataTypeDateTime *, const DateLUTImpl * time_zone) - { - writeDateTimeText(x, wb, *time_zone); - return ReturnType(true); - } -}; - -template <> -struct FormatImpl -{ - template - static ReturnType execute(const DataTypeDateTime64::FieldType x, WriteBuffer & wb, const DataTypeDateTime64 * type, const DateLUTImpl * time_zone) - { - writeDateTimeText(DateTime64(x), type->getScale(), wb, *time_zone); - return ReturnType(true); - } -}; - - -template -struct FormatImpl> -{ - template - static ReturnType execute(const FieldType x, WriteBuffer & wb, const DataTypeEnum * type, const DateLUTImpl *) - { - static constexpr bool throw_exception = std::is_same_v; - - if constexpr (throw_exception) - { - writeString(type->getNameForValue(x), wb); - } - else - { - StringRef res; - bool is_ok = type->getNameForValue(x, res); - if (is_ok) - writeString(res, wb); - return ReturnType(is_ok); - } - } -}; - -template -struct FormatImpl> -{ - template - static ReturnType execute(const FieldType x, WriteBuffer & wb, const DataTypeDecimal * type, const DateLUTImpl *) - { - writeText(x, type->getScale(), wb, false); - return ReturnType(true); - } -}; - - -/// DataTypeEnum to DataType free conversion -template -struct ConvertImpl, DataTypeNumber, Name, ConvertDefaultBehaviorTag> -{ - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) - { - return arguments[0].column; - } -}; - -static inline ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col) -{ - ColumnUInt8::MutablePtr null_map = nullptr; - if (const auto * col_null = checkAndGetColumn(col.get())) - { - null_map = ColumnUInt8::create(); - null_map->insertRangeFrom(col_null->getNullMapColumn(), 0, col_null->size()); - } - return null_map; -} - -template -requires (!std::is_same_v) -struct ConvertImpl -{ - using FromFieldType = typename FromDataType::FieldType; - using ColVecType = ColumnVectorOrDecimal; - - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) - { - if constexpr (IsDataTypeDateOrDateTime) - { - auto datetime_arg = arguments[0]; - - const DateLUTImpl * time_zone = nullptr; - const ColumnConst * time_zone_column = nullptr; - - if (arguments.size() == 1) - { - auto non_null_args = createBlockWithNestedColumns(arguments); - time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0); - } - else /// When we have a column for timezone - { - datetime_arg.column = datetime_arg.column->convertToFullColumnIfConst(); - - if constexpr (std::is_same_v || std::is_same_v) - time_zone = &DateLUT::instance(); - /// For argument of Date or DateTime type, second argument with time zone could be specified. - if constexpr (std::is_same_v || std::is_same_v) - { - if ((time_zone_column = checkAndGetColumnConst(arguments[1].column.get()))) - { - auto non_null_args = createBlockWithNestedColumns(arguments); - time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0); - } - } - } - const auto & col_with_type_and_name = columnGetNested(datetime_arg); - - if (const auto col_from = checkAndGetColumn(col_with_type_and_name.column.get())) - { - auto col_to = ColumnString::create(); - - const typename ColVecType::Container & vec_from = col_from->getData(); - ColumnString::Chars & data_to = col_to->getChars(); - ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = vec_from.size(); - - if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); - else if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); - else if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss") + 1)); - else if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss.") + col_from->getScale() + 1)); - else - data_to.resize(size * 3); /// Arbitrary - - offsets_to.resize(size); - - WriteBufferFromVector write_buffer(data_to); - const auto & type = static_cast(*col_with_type_and_name.type); - - ColumnUInt8::MutablePtr null_map = copyNullMap(datetime_arg.column); - - if (!null_map && arguments.size() > 1) - null_map = copyNullMap(arguments[1].column->convertToFullColumnIfConst()); - - if (null_map) - { - for (size_t i = 0; i < size; ++i) - { - if (!time_zone_column && arguments.size() > 1) - { - if (!arguments[1].column.get()->getDataAt(i).toString().empty()) - time_zone = &DateLUT::instance(arguments[1].column.get()->getDataAt(i).toString()); - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty"); - } - bool is_ok = FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); - null_map->getData()[i] |= !is_ok; - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); - } - } - else - { - for (size_t i = 0; i < size; ++i) - { - if (!time_zone_column && arguments.size() > 1) - { - if (!arguments[1].column.get()->getDataAt(i).toString().empty()) - time_zone = &DateLUT::instance(arguments[1].column.get()->getDataAt(i).toString()); - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty"); - } - FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); - } - } - - write_buffer.finalize(); - - if (null_map) - return ColumnNullable::create(std::move(col_to), std::move(null_map)); - return col_to; - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), Name::name); - } - else - { - ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); - - const auto & col_with_type_and_name = columnGetNested(arguments[0]); - const auto & type = static_cast(*col_with_type_and_name.type); - - if (const auto col_from = checkAndGetColumn(col_with_type_and_name.column.get())) - { - auto col_to = ColumnString::create(); - - const typename ColVecType::Container & vec_from = col_from->getData(); - ColumnString::Chars & data_to = col_to->getChars(); - ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = vec_from.size(); - - data_to.resize(size * 3); - offsets_to.resize(size); - - WriteBufferFromVector write_buffer(data_to); - - if (null_map) - { - for (size_t i = 0; i < size; ++i) - { - bool is_ok = FormatImpl::template execute(vec_from[i], write_buffer, &type, nullptr); - /// We don't use timezones in this branch - null_map->getData()[i] |= !is_ok; - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); - } - } - else - { - for (size_t i = 0; i < size; ++i) - { - FormatImpl::template execute(vec_from[i], write_buffer, &type, nullptr); - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); - } - } - - write_buffer.finalize(); - - if (null_map) - return ColumnNullable::create(std::move(col_to), std::move(null_map)); - return col_to; - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), Name::name); - } - } -}; - - -/// Generic conversion of any type to String or FixedString via serialization to text. -template -struct ConvertImplGenericToString -{ - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) - { - static_assert(std::is_same_v || std::is_same_v, - "Can be used only to serialize to ColumnString or ColumnFixedString"); - - ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); - - const auto & col_with_type_and_name = columnGetNested(arguments[0]); - const IDataType & type = *col_with_type_and_name.type; - const IColumn & col_from = *col_with_type_and_name.column; - - size_t size = col_from.size(); - auto col_to = removeNullable(result_type)->createColumn(); - - { - ColumnStringHelpers::WriteHelper write_helper( - assert_cast(*col_to), - size); - - auto & write_buffer = write_helper.getWriteBuffer(); - - FormatSettings format_settings; - auto serialization = type.getDefaultSerialization(); - for (size_t row = 0; row < size; ++row) - { - serialization->serializeText(col_from, row, write_buffer, format_settings); - write_helper.rowWritten(); - } - - write_helper.finalize(); - } - - if (result_type->isNullable() && null_map) - return ColumnNullable::create(std::move(col_to), std::move(null_map)); - return col_to; - } -}; - -/** Conversion of time_t to UInt16, Int32, UInt32 - */ -template -void convertFromTime(typename DataType::FieldType & x, time_t & time) -{ - x = time; -} - -template <> -inline void convertFromTime(DataTypeDate::FieldType & x, time_t & time) -{ - if (unlikely(time < 0)) - x = 0; - else if (unlikely(time > 0xFFFF)) - x = 0xFFFF; - else - x = time; -} - -template <> -inline void convertFromTime(DataTypeDate32::FieldType & x, time_t & time) -{ - x = static_cast(time); -} - -template <> -inline void convertFromTime(DataTypeDateTime::FieldType & x, time_t & time) -{ - if (unlikely(time < 0)) - x = 0; - else if (unlikely(time > MAX_DATETIME_TIMESTAMP)) - x = MAX_DATETIME_TIMESTAMP; - else - x = static_cast(time); -} - -/** Conversion of strings to numbers, dates, datetimes: through parsing. - */ -template -void parseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool precise_float_parsing) -{ - if constexpr (std::is_floating_point_v) - { - if (precise_float_parsing) - readFloatTextPrecise(x, rb); - else - readFloatTextFast(x, rb); - } - else - readText(x, rb); -} - -template <> -inline void parseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) -{ - DayNum tmp(0); - readDateText(tmp, rb, *time_zone); - x = tmp; -} - -template <> -inline void parseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) -{ - ExtendedDayNum tmp(0); - readDateText(tmp, rb, *time_zone); - x = tmp; -} - - -// NOTE: no need of extra overload of DateTime64, since readDateTimeText64 has different signature and that case is explicitly handled in the calling code. -template <> -inline void parseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) -{ - time_t time = 0; - readDateTimeText(time, rb, *time_zone); - convertFromTime(x, time); -} - -template <> -inline void parseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) -{ - UUID tmp; - readUUIDText(tmp, rb); - x = tmp.toUnderType(); -} - -template <> -inline void parseImpl(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) -{ - IPv4 tmp; - readIPv4Text(tmp, rb); - x = tmp.toUnderType(); -} - -template <> -inline void parseImpl(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) -{ - IPv6 tmp; - readIPv6Text(tmp, rb); - x = tmp; -} - -template -bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool precise_float_parsing) -{ - if constexpr (std::is_floating_point_v) - { - if (precise_float_parsing) - return tryReadFloatTextPrecise(x, rb); - else - return tryReadFloatTextFast(x, rb); - } - else /*if constexpr (is_integer_v)*/ - return tryReadIntText(x, rb); -} - -template <> -inline bool tryParseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) -{ - DayNum tmp(0); - if (!tryReadDateText(tmp, rb, *time_zone)) - return false; - x = tmp; - return true; -} - -template <> -inline bool tryParseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) -{ - ExtendedDayNum tmp(0); - if (!tryReadDateText(tmp, rb, *time_zone)) - return false; - x = tmp; - return true; -} - -template <> -inline bool tryParseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) -{ - time_t time = 0; - if (!tryReadDateTimeText(time, rb, *time_zone)) - return false; - convertFromTime(x, time); - return true; -} - -template <> -inline bool tryParseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) -{ - UUID tmp; - if (!tryReadUUIDText(tmp, rb)) - return false; - - x = tmp.toUnderType(); - return true; -} - -template <> -inline bool tryParseImpl(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) -{ - IPv4 tmp; - if (!tryReadIPv4Text(tmp, rb)) - return false; - - x = tmp.toUnderType(); - return true; -} - -template <> -inline bool tryParseImpl(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) -{ - IPv6 tmp; - if (!tryReadIPv6Text(tmp, rb)) - return false; - - x = tmp; - return true; -} - - -/** Throw exception with verbose message when string value is not parsed completely. - */ -[[noreturn]] inline void throwExceptionForIncompletelyParsedValue(ReadBuffer & read_buffer, const IDataType & result_type) -{ - WriteBufferFromOwnString message_buf; - message_buf << "Cannot parse string " << quote << String(read_buffer.buffer().begin(), read_buffer.buffer().size()) - << " as " << result_type.getName() - << ": syntax error"; - - if (read_buffer.offset()) - message_buf << " at position " << read_buffer.offset() - << " (parsed just " << quote << String(read_buffer.buffer().begin(), read_buffer.offset()) << ")"; - else - message_buf << " at begin of string"; - - // Currently there are no functions toIPv{4,6}Or{Null,Zero} - if (isNativeNumber(result_type) && !(result_type.getName() == "IPv4" || result_type.getName() == "IPv6")) - message_buf << ". Note: there are to" << result_type.getName() << "OrZero and to" << result_type.getName() << "OrNull functions, which returns zero/NULL instead of throwing exception."; - - throw Exception(PreformattedMessage{message_buf.str(), "Cannot parse string {} as {}: syntax error {}"}, ErrorCodes::CANNOT_PARSE_TEXT); -} - - -enum class ConvertFromStringExceptionMode -{ - Throw, /// Throw exception if value cannot be parsed. - Zero, /// Fill with zero or default if value cannot be parsed. - Null /// Return ColumnNullable with NULLs when value cannot be parsed. -}; - -enum class ConvertFromStringParsingMode -{ - Normal, - BestEffort, /// Only applicable for DateTime. Will use sophisticated method, that is slower. - BestEffortUS -}; - -template -struct ConvertThroughParsing -{ - static_assert(std::is_same_v || std::is_same_v, - "ConvertThroughParsing is only applicable for String or FixedString data types"); - - static constexpr bool to_datetime64 = std::is_same_v; - - static bool isAllRead(ReadBuffer & in) - { - /// In case of FixedString, skip zero bytes at end. - if constexpr (std::is_same_v) - while (!in.eof() && *in.position() == 0) - ++in.position(); - - if (in.eof()) - return true; - - /// Special case, that allows to parse string with DateTime or DateTime64 as Date or Date32. - if constexpr (std::is_same_v || std::is_same_v) - { - if (!in.eof() && (*in.position() == ' ' || *in.position() == 'T')) - { - if (in.buffer().size() == strlen("YYYY-MM-DD hh:mm:ss")) - return true; - - if (in.buffer().size() >= strlen("YYYY-MM-DD hh:mm:ss.x") - && in.buffer().begin()[19] == '.') - { - in.position() = in.buffer().begin() + 20; - - while (!in.eof() && isNumericASCII(*in.position())) - ++in.position(); - - if (in.eof()) - return true; - } - } - } - - return false; - } - - template - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, size_t input_rows_count, - Additions additions [[maybe_unused]] = Additions()) - { - using ColVecTo = typename ToDataType::ColumnType; - - const DateLUTImpl * local_time_zone [[maybe_unused]] = nullptr; - const DateLUTImpl * utc_time_zone [[maybe_unused]] = nullptr; - - /// For conversion to Date or DateTime type, second argument with time zone could be specified. - if constexpr (std::is_same_v || to_datetime64) - { - const auto result_type = removeNullable(res_type); - // Time zone is already figured out during result type resolution, no need to do it here. - if (const auto dt_col = checkAndGetDataType(result_type.get())) - local_time_zone = &dt_col->getTimeZone(); - else - local_time_zone = &extractTimeZoneFromFunctionArguments(arguments, 1, 0); - - if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort || parsing_mode == ConvertFromStringParsingMode::BestEffortUS) - utc_time_zone = &DateLUT::instance("UTC"); - } - else if constexpr (std::is_same_v || std::is_same_v) - { - // Timezone is more or less dummy when parsing Date/Date32 from string. - local_time_zone = &DateLUT::instance(); - utc_time_zone = &DateLUT::instance("UTC"); - } - - const IColumn * col_from = arguments[0].column.get(); - const ColumnString * col_from_string = checkAndGetColumn(col_from); - const ColumnFixedString * col_from_fixed_string = checkAndGetColumn(col_from); - - if (std::is_same_v && !col_from_string) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - col_from->getName(), Name::name); - - if (std::is_same_v && !col_from_fixed_string) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - col_from->getName(), Name::name); - - size_t size = input_rows_count; - typename ColVecTo::MutablePtr col_to = nullptr; - - if constexpr (IsDataTypeDecimal) - { - UInt32 scale = additions; - if constexpr (to_datetime64) - { - ToDataType check_bounds_in_ctor(scale, local_time_zone ? local_time_zone->getTimeZone() : String{}); - } - else - { - ToDataType check_bounds_in_ctor(ToDataType::maxPrecision(), scale); - } - col_to = ColVecTo::create(size, scale); - } - else - col_to = ColVecTo::create(size); - - typename ColVecTo::Container & vec_to = col_to->getData(); - - ColumnUInt8::MutablePtr col_null_map_to; - ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr; - if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) - { - col_null_map_to = ColumnUInt8::create(size); - vec_null_map_to = &col_null_map_to->getData(); - } - - const ColumnString::Chars * chars = nullptr; - const IColumn::Offsets * offsets = nullptr; - size_t fixed_string_size = 0; - - if constexpr (std::is_same_v) - { - chars = &col_from_string->getChars(); - offsets = &col_from_string->getOffsets(); - } - else - { - chars = &col_from_fixed_string->getChars(); - fixed_string_size = col_from_fixed_string->getN(); - } - - size_t current_offset = 0; - - bool precise_float_parsing = false; - - if (DB::CurrentThread::isInitialized()) - { - const DB::ContextPtr query_context = DB::CurrentThread::get().getQueryContext(); - - if (query_context) - precise_float_parsing = query_context->getSettingsRef().precise_float_parsing; - } - - for (size_t i = 0; i < size; ++i) - { - size_t next_offset = std::is_same_v ? (*offsets)[i] : (current_offset + fixed_string_size); - size_t string_size = std::is_same_v ? next_offset - current_offset - 1 : fixed_string_size; - - ReadBufferFromMemory read_buffer(&(*chars)[current_offset], string_size); - - if constexpr (exception_mode == ConvertFromStringExceptionMode::Throw) - { - if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort) - { - if constexpr (to_datetime64) - { - DateTime64 res = 0; - parseDateTime64BestEffort(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; - } - else - { - time_t res; - parseDateTimeBestEffort(res, read_buffer, *local_time_zone, *utc_time_zone); - convertFromTime(vec_to[i], res); - } - } - else if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffortUS) - { - if constexpr (to_datetime64) - { - DateTime64 res = 0; - parseDateTime64BestEffortUS(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; - } - else - { - time_t res; - parseDateTimeBestEffortUS(res, read_buffer, *local_time_zone, *utc_time_zone); - convertFromTime(vec_to[i], res); - } - } - else - { - if constexpr (to_datetime64) - { - DateTime64 value = 0; - readDateTime64Text(value, col_to->getScale(), read_buffer, *local_time_zone); - vec_to[i] = value; - } - else if constexpr (IsDataTypeDecimal) - { - SerializationDecimal::readText( - vec_to[i], read_buffer, ToDataType::maxPrecision(), col_to->getScale()); - } - else - { - /// we want to utilize constexpr condition here, which is not mixable with value comparison - do - { - if constexpr (std::is_same_v && std::is_same_v) - { - if (fixed_string_size == IPV6_BINARY_LENGTH) - { - readBinary(vec_to[i], read_buffer); - break; - } - } - parseImpl(vec_to[i], read_buffer, local_time_zone, precise_float_parsing); - } while (false); - } - } - - if (!isAllRead(read_buffer)) - throwExceptionForIncompletelyParsedValue(read_buffer, *res_type); - } - else - { - bool parsed; - - if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort) - { - if constexpr (to_datetime64) - { - DateTime64 res = 0; - parsed = tryParseDateTime64BestEffort(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; - } - else - { - time_t res; - parsed = tryParseDateTimeBestEffort(res, read_buffer, *local_time_zone, *utc_time_zone); - convertFromTime(vec_to[i],res); - } - } - else if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffortUS) - { - if constexpr (to_datetime64) - { - DateTime64 res = 0; - parsed = tryParseDateTime64BestEffortUS(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; - } - else - { - time_t res; - parsed = tryParseDateTimeBestEffortUS(res, read_buffer, *local_time_zone, *utc_time_zone); - convertFromTime(vec_to[i],res); - } - } - else - { - if constexpr (to_datetime64) - { - DateTime64 value = 0; - parsed = tryReadDateTime64Text(value, col_to->getScale(), read_buffer, *local_time_zone); - vec_to[i] = value; - } - else if constexpr (IsDataTypeDecimal) - { - parsed = SerializationDecimal::tryReadText( - vec_to[i], read_buffer, ToDataType::maxPrecision(), col_to->getScale()); - } - else - { - /// we want to utilize constexpr condition here, which is not mixable with value comparison - do - { - if constexpr (std::is_same_v && std::is_same_v) - { - if (fixed_string_size == IPV6_BINARY_LENGTH) - { - readBinary(vec_to[i], read_buffer); - parsed = true; - break; - } - } - - parsed = tryParseImpl(vec_to[i], read_buffer, local_time_zone, precise_float_parsing); - } while (false); - } - } - - if (!isAllRead(read_buffer)) - parsed = false; - - if (!parsed) - { - if constexpr (std::is_same_v) - { - vec_to[i] = -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); - } - else - { - vec_to[i] = static_cast(0); - } - } - - if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) - (*vec_null_map_to)[i] = !parsed; - } - - current_offset = next_offset; - } - - if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) - return ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); - else - return col_to; - } -}; - - -template -requires (!std::is_same_v) -struct ConvertImpl - : ConvertThroughParsing {}; - -template -requires (!std::is_same_v) -struct ConvertImpl - : ConvertThroughParsing {}; - -template -requires (!std::is_same_v) -struct ConvertImpl - : ConvertThroughParsing {}; - -template -requires (!std::is_same_v) -struct ConvertImpl - : ConvertThroughParsing {}; - -template -requires (is_any_of && is_any_of) -struct ConvertImpl - : ConvertThroughParsing {}; - -/// Generic conversion of any type from String. Used for complex types: Array and Tuple or types with custom serialization. -template -struct ConvertImplGenericFromString -{ - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) - { - static_assert(std::is_same_v || std::is_same_v, - "Can be used only to parse from ColumnString or ColumnFixedString"); - - const IColumn & column_from = *arguments[0].column; - const IDataType & data_type_to = *result_type; - auto res = data_type_to.createColumn(); - auto serialization = data_type_to.getDefaultSerialization(); - const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; - - executeImpl(column_from, *res, *serialization, input_rows_count, null_map, result_type.get()); - return res; - } - - static void executeImpl( - const IColumn & column_from, - IColumn & column_to, - const ISerialization & serialization_from, - size_t input_rows_count, - const PaddedPODArray * null_map = nullptr, - const IDataType * result_type = nullptr) - { - static_assert(std::is_same_v || std::is_same_v, - "Can be used only to parse from ColumnString or ColumnFixedString"); - - if (const StringColumnType * col_from_string = checkAndGetColumn(&column_from)) - { - column_to.reserve(input_rows_count); - - FormatSettings format_settings; - for (size_t i = 0; i < input_rows_count; ++i) - { - if (null_map && (*null_map)[i]) - { - column_to.insertDefault(); - continue; - } - - const auto & val = col_from_string->getDataAt(i); - ReadBufferFromMemory read_buffer(val.data, val.size); - try - { - serialization_from.deserializeWholeText(column_to, read_buffer, format_settings); - } - catch (const Exception & e) - { - auto * nullable_column = typeid_cast(&column_to); - if (e.code() == ErrorCodes::CANNOT_PARSE_BOOL && nullable_column) - { - auto & col_nullmap = nullable_column->getNullMapData(); - if (col_nullmap.size() != nullable_column->size()) - col_nullmap.resize_fill(nullable_column->size()); - if (nullable_column->size() == (i + 1)) - nullable_column->popBack(1); - nullable_column->insertDefault(); - continue; - } - throw; - } - - if (!read_buffer.eof()) - { - if (result_type) - throwExceptionForIncompletelyParsedValue(read_buffer, *result_type); - else - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, - "Cannot parse string to column {}. Expected eof", column_to.getName()); - } - } - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column {} of first argument of conversion function from string", - column_from.getName()); - } - -}; - - -template <> -struct ConvertImpl - : ConvertImpl {}; - -template <> -struct ConvertImpl - : ConvertImpl {}; - -/** If types are identical, just take reference to column. - */ -template -requires (!T::is_parametric) -struct ConvertImpl -{ - template - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/, - Additions additions [[maybe_unused]] = Additions()) - { - return arguments[0].column; - } -}; - -template -struct ConvertImpl -{ - template - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/, - Additions additions [[maybe_unused]] = Additions()) - { - - return arguments[0].column; - } -}; - - -/** Conversion from FixedString to String. - * Cutting sequences of zero bytes from end of strings. - */ -template -struct ConvertImpl -{ - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type, size_t /*input_rows_count*/) - { - ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); - const auto & nested = columnGetNested(arguments[0]); - if (const ColumnFixedString * col_from = checkAndGetColumn(nested.column.get())) - { - auto col_to = ColumnString::create(); - - const ColumnFixedString::Chars & data_from = col_from->getChars(); - ColumnString::Chars & data_to = col_to->getChars(); - ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = col_from->size(); - size_t n = col_from->getN(); - data_to.resize(size * (n + 1)); /// + 1 - zero terminator - offsets_to.resize(size); - - size_t offset_from = 0; - size_t offset_to = 0; - for (size_t i = 0; i < size; ++i) - { - if (!null_map || !null_map->getData()[i]) - { - size_t bytes_to_copy = n; - while (bytes_to_copy > 0 && data_from[offset_from + bytes_to_copy - 1] == 0) - --bytes_to_copy; - - memcpy(&data_to[offset_to], &data_from[offset_from], bytes_to_copy); - offset_to += bytes_to_copy; - } - data_to[offset_to] = 0; - ++offset_to; - offsets_to[i] = offset_to; - offset_from += n; - } - - data_to.resize(offset_to); - if (return_type->isNullable() && null_map) - return ColumnNullable::create(std::move(col_to), std::move(null_map)); - return col_to; - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), Name::name); - } -}; - - -/// Declared early because used below. -struct NameToDate { static constexpr auto name = "toDate"; }; -struct NameToDate32 { static constexpr auto name = "toDate32"; }; -struct NameToDateTime { static constexpr auto name = "toDateTime"; }; -struct NameToDateTime32 { static constexpr auto name = "toDateTime32"; }; -struct NameToDateTime64 { static constexpr auto name = "toDateTime64"; }; -struct NameToString { static constexpr auto name = "toString"; }; -struct NameToDecimal32 { static constexpr auto name = "toDecimal32"; }; -struct NameToDecimal64 { static constexpr auto name = "toDecimal64"; }; -struct NameToDecimal128 { static constexpr auto name = "toDecimal128"; }; -struct NameToDecimal256 { static constexpr auto name = "toDecimal256"; }; - - -#define DEFINE_NAME_TO_INTERVAL(INTERVAL_KIND) \ - struct NameToInterval ## INTERVAL_KIND \ - { \ - static constexpr auto name = "toInterval" #INTERVAL_KIND; \ - static constexpr auto kind = IntervalKind::Kind::INTERVAL_KIND; \ - }; - -DEFINE_NAME_TO_INTERVAL(Nanosecond) -DEFINE_NAME_TO_INTERVAL(Microsecond) -DEFINE_NAME_TO_INTERVAL(Millisecond) -DEFINE_NAME_TO_INTERVAL(Second) -DEFINE_NAME_TO_INTERVAL(Minute) -DEFINE_NAME_TO_INTERVAL(Hour) -DEFINE_NAME_TO_INTERVAL(Day) -DEFINE_NAME_TO_INTERVAL(Week) -DEFINE_NAME_TO_INTERVAL(Month) -DEFINE_NAME_TO_INTERVAL(Quarter) -DEFINE_NAME_TO_INTERVAL(Year) - -#undef DEFINE_NAME_TO_INTERVAL - -struct NameParseDateTimeBestEffort; -struct NameParseDateTimeBestEffortOrZero; -struct NameParseDateTimeBestEffortOrNull; - -template -static inline bool isDateTime64(const ColumnsWithTypeAndName & arguments) -{ - if constexpr (std::is_same_v) - return true; - else if constexpr (std::is_same_v || std::is_same_v - || std::is_same_v || std::is_same_v) - { - return (arguments.size() == 2 && isUInt(arguments[1].type)) || arguments.size() == 3; - } - - return false; -} - -template -class FunctionConvert : public IFunction -{ -public: - using Monotonic = MonotonicityImpl; - - static constexpr auto name = Name::name; - static constexpr bool to_decimal = - std::is_same_v || std::is_same_v - || std::is_same_v || std::is_same_v; - - static constexpr bool to_datetime64 = std::is_same_v; - - static constexpr bool to_string_or_fixed_string = std::is_same_v || - std::is_same_v; - - static constexpr bool to_date_or_datetime = std::is_same_v || - std::is_same_v || - std::is_same_v; - - static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - static FunctionPtr create() { return std::make_shared(); } - - FunctionConvert() = default; - explicit FunctionConvert(ContextPtr context_) : context(context_) {} - - String getName() const override - { - return name; - } - - bool isVariadic() const override { return true; } - size_t getNumberOfArguments() const override { return 0; } - bool isInjective(const ColumnsWithTypeAndName &) const override { return std::is_same_v; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & arguments) const override - { - /// TODO: We can make more optimizations here. - return !(to_date_or_datetime && isNumber(*arguments[0].type)); - } - - using DefaultReturnTypeGetter = std::function; - static DataTypePtr getReturnTypeDefaultImplementationForNulls(const ColumnsWithTypeAndName & arguments, const DefaultReturnTypeGetter & getter) - { - NullPresence null_presence = getNullPresense(arguments); - - if (null_presence.has_null_constant) - { - return makeNullable(std::make_shared()); - } - if (null_presence.has_nullable) - { - auto nested_columns = Block(createBlockWithNestedColumns(arguments)); - auto return_type = getter(ColumnsWithTypeAndName(nested_columns.begin(), nested_columns.end())); - return makeNullable(return_type); - } - - return getter(arguments); - } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - auto getter = [&] (const auto & args) { return getReturnTypeImplRemovedNullable(args); }; - auto res = getReturnTypeDefaultImplementationForNulls(arguments, getter); - to_nullable = res->isNullable(); - checked_return_type = true; - return res; - } - - DataTypePtr getReturnTypeImplRemovedNullable(const ColumnsWithTypeAndName & arguments) const - { - FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; - FunctionArgumentDescriptors optional_args; - - if constexpr (to_decimal) - { - mandatory_args.push_back({"scale", static_cast(&isNativeInteger), &isColumnConst, "const Integer"}); - } - - if (!to_decimal && isDateTime64(arguments)) - { - mandatory_args.push_back({"scale", static_cast(&isNativeInteger), &isColumnConst, "const Integer"}); - } - - // toString(DateTime or DateTime64, [timezone: String]) - if ((std::is_same_v && !arguments.empty() && (isDateTime64(arguments[0].type) || isDateTime(arguments[0].type))) - // toUnixTimestamp(value[, timezone : String]) - || std::is_same_v - // toDate(value[, timezone : String]) - || std::is_same_v // TODO: shall we allow timestamp argument for toDate? DateTime knows nothing about timezones and this argument is ignored below. - // toDate32(value[, timezone : String]) - || std::is_same_v - // toDateTime(value[, timezone: String]) - || std::is_same_v - // toDateTime64(value, scale : Integer[, timezone: String]) - || std::is_same_v) - { - optional_args.push_back({"timezone", static_cast(&isString), nullptr, "String"}); - } - - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); - - if constexpr (std::is_same_v) - { - return std::make_shared(Name::kind); - } - else if constexpr (to_decimal) - { - UInt64 scale = extractToDecimalScale(arguments[1]); - - if constexpr (std::is_same_v) - return createDecimalMaxPrecision(scale); - else if constexpr (std::is_same_v) - return createDecimalMaxPrecision(scale); - else if constexpr (std::is_same_v) - return createDecimalMaxPrecision(scale); - else if constexpr (std::is_same_v) - return createDecimalMaxPrecision(scale); - - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected branch in code of conversion function: it is a bug."); - } - else - { - // Optional second argument with time zone for DateTime. - UInt8 timezone_arg_position = 1; - UInt32 scale [[maybe_unused]] = DataTypeDateTime64::default_scale; - - // DateTime64 requires more arguments: scale and timezone. Since timezone is optional, scale should be first. - if (isDateTime64(arguments)) - { - timezone_arg_position += 1; - scale = static_cast(arguments[1].column->get64(0)); - - if (to_datetime64 || scale != 0) /// toDateTime('xxxx-xx-xx xx:xx:xx', 0) return DateTime - return std::make_shared(scale, - extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, false)); - - return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, false)); - } - - if constexpr (std::is_same_v) - return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, false)); - else if constexpr (std::is_same_v) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected branch in code of conversion function: it is a bug."); - else - return std::make_shared(); - } - } - - /// Function actually uses default implementation for nulls, - /// but we need to know if return type is Nullable or not, - /// so we use checked_return_type only to intercept the first call to getReturnTypeImpl(...). - bool useDefaultImplementationForNulls() const override - { - bool to_nullable_string = to_nullable && std::is_same_v; - return checked_return_type && !to_nullable_string; - } - - bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override - { - if constexpr (std::is_same_v) - return {}; - else if constexpr (std::is_same_v) - return {2}; - return {1}; - } - bool canBeExecutedOnDefaultArguments() const override { return false; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override - { - try - { - return executeInternal(arguments, result_type, input_rows_count); - } - catch (Exception & e) - { - /// More convenient error message. - if (e.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF) - { - e.addMessage("Cannot parse " - + result_type->getName() + " from " - + arguments[0].type->getName() - + ", because value is too short"); - } - else if (e.code() == ErrorCodes::CANNOT_PARSE_NUMBER - || e.code() == ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT - || e.code() == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED - || e.code() == ErrorCodes::CANNOT_PARSE_QUOTED_STRING - || e.code() == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE - || e.code() == ErrorCodes::CANNOT_PARSE_DATE - || e.code() == ErrorCodes::CANNOT_PARSE_DATETIME - || e.code() == ErrorCodes::CANNOT_PARSE_UUID - || e.code() == ErrorCodes::CANNOT_PARSE_IPV4 - || e.code() == ErrorCodes::CANNOT_PARSE_IPV6) - { - e.addMessage("Cannot parse " - + result_type->getName() + " from " - + arguments[0].type->getName()); - } - - throw; - } - } - - bool hasInformationAboutMonotonicity() const override - { - return Monotonic::has(); - } - - Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override - { - return Monotonic::get(type, left, right); - } - -private: - ContextPtr context; - mutable bool checked_return_type = false; - mutable bool to_nullable = false; - - ColumnPtr executeInternal(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const - { - if (arguments.empty()) - throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least 1 argument", getName()); - - if (result_type->onlyNull()) - return result_type->createColumnConstWithDefaultValue(input_rows_count); - - const DataTypePtr from_type = removeNullable(arguments[0].type); - ColumnPtr result_column; - - [[maybe_unused]] FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior; - - if (context) - date_time_overflow_behavior = context->getSettingsRef().date_time_overflow_behavior.value; - - auto call = [&](const auto & types, const auto & tag) -> bool - { - using Types = std::decay_t; - using LeftDataType = typename Types::LeftType; - using RightDataType = typename Types::RightType; - using SpecialTag = std::decay_t; - - if constexpr (IsDataTypeDecimal) - { - if constexpr (std::is_same_v) - { - /// Account for optional timezone argument. - if (arguments.size() != 2 && arguments.size() != 3) - throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects 2 or 3 arguments for DataTypeDateTime64.", getName()); - } - else if (arguments.size() != 2) - { - throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects 2 arguments for Decimal.", getName()); - } - - const ColumnWithTypeAndName & scale_column = arguments[1]; - UInt32 scale = extractToDecimalScale(scale_column); - - switch (date_time_overflow_behavior) - { - case FormatSettings::DateTimeOverflowBehavior::Throw: - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); - break; - case FormatSettings::DateTimeOverflowBehavior::Ignore: - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); - break; - case FormatSettings::DateTimeOverflowBehavior::Saturate: - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); - break; - } - - } - else if constexpr (IsDataTypeDateOrDateTime && std::is_same_v) - { - const auto * dt64 = assert_cast(arguments[0].type.get()); - switch (date_time_overflow_behavior) - { - case FormatSettings::DateTimeOverflowBehavior::Throw: - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, dt64->getScale()); - break; - case FormatSettings::DateTimeOverflowBehavior::Ignore: - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, dt64->getScale()); - break; - case FormatSettings::DateTimeOverflowBehavior::Saturate: - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, dt64->getScale()); - break; - } - } -#define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE) \ - case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \ - result_column = ConvertImpl::execute( \ - arguments, result_type, input_rows_count); \ - break; - - else if constexpr (IsDataTypeDecimalOrNumber && IsDataTypeDecimalOrNumber) - { - using LeftT = typename LeftDataType::FieldType; - using RightT = typename RightDataType::FieldType; - - static constexpr bool bad_left = - is_decimal || std::is_floating_point_v || is_big_int_v || is_signed_v; - static constexpr bool bad_right = - is_decimal || std::is_floating_point_v || is_big_int_v || is_signed_v; - - /// Disallow int vs UUID conversion (but support int vs UInt128 conversion) - if constexpr ((bad_left && std::is_same_v) || - (bad_right && std::is_same_v)) - { - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Wrong UUID conversion"); - } - else - { - switch (date_time_overflow_behavior) - { - GENERATE_OVERFLOW_MODE_CASE(Throw) - GENERATE_OVERFLOW_MODE_CASE(Ignore) - GENERATE_OVERFLOW_MODE_CASE(Saturate) - } - } - } - else if constexpr ((IsDataTypeNumber || IsDataTypeDateOrDateTime) - && IsDataTypeDateOrDateTime) - { - switch (date_time_overflow_behavior) - { - GENERATE_OVERFLOW_MODE_CASE(Throw) - GENERATE_OVERFLOW_MODE_CASE(Ignore) - GENERATE_OVERFLOW_MODE_CASE(Saturate) - } - } -#undef GENERATE_OVERFLOW_MODE_CASE - else - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count); - - return true; - }; - - if (isDateTime64(arguments)) - { - /// For toDateTime('xxxx-xx-xx xx:xx:xx.00', 2[, 'timezone']) we need to it convert to DateTime64 - const ColumnWithTypeAndName & scale_column = arguments[1]; - UInt32 scale = extractToDecimalScale(scale_column); - - if (to_datetime64 || scale != 0) /// When scale = 0, the data type is DateTime otherwise the data type is DateTime64 - { - if (!callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{})) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", - arguments[0].type->getName(), getName()); - - return result_column; - } - } - - if constexpr (std::is_same_v) - { - if (from_type->getCustomSerialization()) - return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); - } - - bool done = false; - if constexpr (to_string_or_fixed_string) - { - done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{}); - } - else - { - bool cast_ipv4_ipv6_default_on_conversion_error = false; - if constexpr (is_any_of) - if (context && (cast_ipv4_ipv6_default_on_conversion_error = context->getSettingsRef().cast_ipv4_ipv6_default_on_conversion_error)) - done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertReturnZeroOnErrorTag{}); - - if (!cast_ipv4_ipv6_default_on_conversion_error) - { - /// We should use ConvertFromStringExceptionMode::Null mode when converting from String (or FixedString) - /// to Nullable type, to avoid 'value is too short' error on attempt to parse empty string from NULL values. - if (to_nullable && WhichDataType(from_type).isStringOrFixedString()) - done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertReturnNullOnErrorTag{}); - else - done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{}); - } - } - - if (!done) - { - /// Generic conversion of any type to String. - if (std::is_same_v) - { - return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); - } - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", - arguments[0].type->getName(), getName()); - } - - return result_column; - } -}; - - -/** Function toTOrZero (where T is number of date or datetime type): - * try to convert from String to type T through parsing, - * if cannot parse, return default value instead of throwing exception. - * Function toTOrNull will return Nullable type with NULL when cannot parse. - * NOTE Also need to implement tryToUnixTimestamp with timezone. - */ -template -class FunctionConvertFromString : public IFunction -{ -public: - static constexpr auto name = Name::name; - static constexpr bool to_decimal = - std::is_same_v> || - std::is_same_v> || - std::is_same_v> || - std::is_same_v>; - - static constexpr bool to_datetime64 = std::is_same_v; - - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - static FunctionPtr create() { return std::make_shared(); } - - String getName() const override - { - return name; - } - - bool isVariadic() const override { return true; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - size_t getNumberOfArguments() const override { return 0; } - - bool useDefaultImplementationForConstants() const override { return true; } - bool canBeExecutedOnDefaultArguments() const override { return false; } - - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - DataTypePtr res; - - if (isDateTime64(arguments)) - { - validateFunctionArgumentTypes(*this, arguments, - FunctionArgumentDescriptors{{"string", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}}, - // optional - FunctionArgumentDescriptors{ - {"precision", static_cast(&isUInt8), isColumnConst, "const UInt8"}, - {"timezone", static_cast(&isStringOrFixedString), isColumnConst, "const String or FixedString"}, - }); - - UInt64 scale = to_datetime64 ? DataTypeDateTime64::default_scale : 0; - if (arguments.size() > 1) - scale = extractToDecimalScale(arguments[1]); - const auto timezone = extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false); - - res = scale == 0 ? res = std::make_shared(timezone) : std::make_shared(scale, timezone); - } - else - { - if ((arguments.size() != 1 && arguments.size() != 2) || (to_decimal && arguments.size() != 2)) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 1 or 2. " - "Second argument only make sense for DateTime (time zone, optional) and Decimal (scale).", - getName(), arguments.size()); - - if (!isStringOrFixedString(arguments[0].type)) - { - if (this->getName().find("OrZero") != std::string::npos || - this->getName().find("OrNull") != std::string::npos) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. " - "Conversion functions with postfix 'OrZero' or 'OrNull' should take String argument", - arguments[0].type->getName(), getName()); - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}", - arguments[0].type->getName(), getName()); - } - - if (arguments.size() == 2) - { - if constexpr (std::is_same_v) - { - if (!isString(arguments[1].type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2nd argument of function {}", - arguments[1].type->getName(), getName()); - } - else if constexpr (to_decimal) - { - if (!isInteger(arguments[1].type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2nd argument of function {}", - arguments[1].type->getName(), getName()); - if (!arguments[1].column) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be constant", getName()); - } - else - { - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 1. " - "Second argument makes sense only for DateTime and Decimal.", - getName(), arguments.size()); - } - } - - if constexpr (std::is_same_v) - res = std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0, false)); - else if constexpr (std::is_same_v) - throw Exception(ErrorCodes::LOGICAL_ERROR, "MaterializedMySQL is a bug."); - else if constexpr (to_decimal) - { - UInt64 scale = extractToDecimalScale(arguments[1]); - res = createDecimalMaxPrecision(scale); - if (!res) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Something wrong with toDecimalNNOrZero() or toDecimalNNOrNull()"); - } - else - res = std::make_shared(); - } - - if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) - res = std::make_shared(res); - - return res; - } - - template - ColumnPtr executeInternal(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, UInt32 scale = 0) const - { - const IDataType * from_type = arguments[0].type.get(); - - if (checkAndGetDataType(from_type)) - { - return ConvertThroughParsing::execute( - arguments, result_type, input_rows_count, scale); - } - else if (checkAndGetDataType(from_type)) - { - return ConvertThroughParsing::execute( - arguments, result_type, input_rows_count, scale); - } - - return nullptr; - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override - { - ColumnPtr result_column; - - if constexpr (to_decimal) - result_column = executeInternal(arguments, result_type, input_rows_count, - assert_cast(*removeNullable(result_type)).getScale()); - else - { - if (isDateTime64(arguments)) - { - UInt64 scale = to_datetime64 ? DataTypeDateTime64::default_scale : 0; - if (arguments.size() > 1) - scale = extractToDecimalScale(arguments[1]); - - if (scale == 0) - result_column = executeInternal(arguments, result_type, input_rows_count); - else - { - result_column = executeInternal(arguments, result_type, input_rows_count, static_cast(scale)); - } - } - else - { - result_column = executeInternal(arguments, result_type, input_rows_count); - } - } - - if (!result_column) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " - "Only String or FixedString argument is accepted for try-conversion function. For other arguments, " - "use function without 'orZero' or 'orNull'.", arguments[0].type->getName(), getName()); - - return result_column; - } -}; - - -/// Monotonicity. - -struct PositiveMonotonicity -{ - static bool has() { return true; } - static IFunction::Monotonicity get(const IDataType &, const Field &, const Field &) - { - return { .is_monotonic = true }; - } -}; - -struct UnknownMonotonicity -{ - static bool has() { return false; } - static IFunction::Monotonicity get(const IDataType &, const Field &, const Field &) - { - return { }; - } -}; - -template -struct ToNumberMonotonicity -{ - static bool has() { return true; } - - static UInt64 divideByRangeOfType(UInt64 x) - { - if constexpr (sizeof(T) < sizeof(UInt64)) - return x >> (sizeof(T) * 8); - else - return 0; - } - - static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right) - { - if (!type.isValueRepresentedByNumber()) - return {}; - - /// If type is same, the conversion is always monotonic. - /// (Enum has separate case, because it is different data type) - if (checkAndGetDataType>(&type) || - checkAndGetDataType>(&type)) - return { .is_monotonic = true, .is_always_monotonic = true }; - - /// Float cases. - - /// When converting to Float, the conversion is always monotonic. - if constexpr (std::is_floating_point_v) - return { .is_monotonic = true, .is_always_monotonic = true }; - - const auto * low_cardinality = typeid_cast(&type); - const IDataType * low_cardinality_dictionary_type = nullptr; - if (low_cardinality) - low_cardinality_dictionary_type = low_cardinality->getDictionaryType().get(); - - WhichDataType which_type(type); - WhichDataType which_inner_type = low_cardinality - ? WhichDataType(low_cardinality_dictionary_type) - : WhichDataType(type); - - /// If converting from Float, for monotonicity, arguments must fit in range of result type. - if (which_inner_type.isFloat()) - { - if (left.isNull() || right.isNull()) - return {}; - - Float64 left_float = left.get(); - Float64 right_float = right.get(); - - if (left_float >= static_cast(std::numeric_limits::min()) - && left_float <= static_cast(std::numeric_limits::max()) - && right_float >= static_cast(std::numeric_limits::min()) - && right_float <= static_cast(std::numeric_limits::max())) - return { .is_monotonic = true }; - - return {}; - } - - /// Integer cases. - - /// Only support types represented by native integers. - /// It can be extended to big integers, decimals and DateTime64 later. - /// By the way, NULLs are representing unbounded ranges. - if (!((left.isNull() || left.getType() == Field::Types::UInt64 || left.getType() == Field::Types::Int64) - && (right.isNull() || right.getType() == Field::Types::UInt64 || right.getType() == Field::Types::Int64))) - return {}; - - const bool from_is_unsigned = type.isValueRepresentedByUnsignedInteger(); - const bool to_is_unsigned = is_unsigned_v; - - const size_t size_of_from = type.getSizeOfValueInMemory(); - const size_t size_of_to = sizeof(T); - - const bool left_in_first_half = left.isNull() - ? from_is_unsigned - : (left.get() >= 0); - - const bool right_in_first_half = right.isNull() - ? !from_is_unsigned - : (right.get() >= 0); - - /// Size of type is the same. - if (size_of_from == size_of_to) - { - if (from_is_unsigned == to_is_unsigned) - return { .is_monotonic = true, .is_always_monotonic = true }; - - if (left_in_first_half == right_in_first_half) - return { .is_monotonic = true }; - - return {}; - } - - /// Size of type is expanded. - if (size_of_from < size_of_to) - { - if (from_is_unsigned == to_is_unsigned) - return { .is_monotonic = true, .is_always_monotonic = true }; - - if (!to_is_unsigned) - return { .is_monotonic = true, .is_always_monotonic = true }; - - /// signed -> unsigned. If arguments from the same half, then function is monotonic. - if (left_in_first_half == right_in_first_half) - return { .is_monotonic = true }; - - return {}; - } - - /// Size of type is shrunk. - if (size_of_from > size_of_to) - { - /// Function cannot be monotonic on unbounded ranges. - if (left.isNull() || right.isNull()) - return {}; - - /// Function cannot be monotonic when left and right are not on the same ranges. - if (divideByRangeOfType(left.get()) != divideByRangeOfType(right.get())) - return {}; - - if (to_is_unsigned) - return { .is_monotonic = true }; - else - { - // If To is signed, it's possible that the signedness is different after conversion. So we check it explicitly. - const bool is_monotonic = (T(left.get()) >= 0) == (T(right.get()) >= 0); - - return { .is_monotonic = is_monotonic }; - } - } - - UNREACHABLE(); - } -}; - -struct ToDateMonotonicity -{ - static bool has() { return true; } - - static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right) - { - auto which = WhichDataType(type); - if (which.isDateOrDate32() || which.isDateTime() || which.isDateTime64() || which.isInt8() || which.isInt16() || which.isUInt8() - || which.isUInt16()) - { - return {.is_monotonic = true, .is_always_monotonic = true}; - } - else if ( - ((left.getType() == Field::Types::UInt64 || left.isNull()) && (right.getType() == Field::Types::UInt64 || right.isNull()) - && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) - || ((left.getType() == Field::Types::Int64 || left.isNull()) && (right.getType() == Field::Types::Int64 || right.isNull()) - && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) - || (( - (left.getType() == Field::Types::Float64 || left.isNull()) - && (right.getType() == Field::Types::Float64 || right.isNull()) - && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF)))) - || !isNativeNumber(type)) - { - return {}; - } - else - { - return {.is_monotonic = true, .is_always_monotonic = true}; - } - } -}; - -struct ToDateTimeMonotonicity -{ - static bool has() { return true; } - - static IFunction::Monotonicity get(const IDataType & type, const Field &, const Field &) - { - if (type.isValueRepresentedByNumber()) - return {.is_monotonic = true, .is_always_monotonic = true}; - else - return {}; - } -}; - -/** The monotonicity for the `toString` function is mainly determined for test purposes. - * It is doubtful that anyone is looking to optimize queries with conditions `toString(CounterID) = 34`. - */ -struct ToStringMonotonicity -{ - static bool has() { return true; } - - static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right) - { - IFunction::Monotonicity positive{ .is_monotonic = true }; - IFunction::Monotonicity not_monotonic; - - const auto * type_ptr = &type; - if (const auto * low_cardinality_type = checkAndGetDataType(type_ptr)) - type_ptr = low_cardinality_type->getDictionaryType().get(); - - /// Order on enum values (which is the order on integers) is completely arbitrary in respect to the order on strings. - if (WhichDataType(type).isEnum()) - return not_monotonic; - - /// `toString` function is monotonous if the argument is Date or Date32 or DateTime or String, or non-negative numbers with the same number of symbols. - if (checkDataTypes(type_ptr)) - return positive; - - if (left.isNull() || right.isNull()) - return {}; - - if (left.getType() == Field::Types::UInt64 - && right.getType() == Field::Types::UInt64) - { - return (left.get() == 0 && right.get() == 0) - || (floor(log10(left.get())) == floor(log10(right.get()))) - ? positive : not_monotonic; - } - - if (left.getType() == Field::Types::Int64 - && right.getType() == Field::Types::Int64) - { - return (left.get() == 0 && right.get() == 0) - || (left.get() > 0 && right.get() > 0 && floor(log10(left.get())) == floor(log10(right.get()))) - ? positive : not_monotonic; - } - - return not_monotonic; - } -}; - - -struct NameToUInt8 { static constexpr auto name = "toUInt8"; }; -struct NameToUInt16 { static constexpr auto name = "toUInt16"; }; -struct NameToUInt32 { static constexpr auto name = "toUInt32"; }; -struct NameToUInt64 { static constexpr auto name = "toUInt64"; }; -struct NameToUInt128 { static constexpr auto name = "toUInt128"; }; -struct NameToUInt256 { static constexpr auto name = "toUInt256"; }; -struct NameToInt8 { static constexpr auto name = "toInt8"; }; -struct NameToInt16 { static constexpr auto name = "toInt16"; }; -struct NameToInt32 { static constexpr auto name = "toInt32"; }; -struct NameToInt64 { static constexpr auto name = "toInt64"; }; -struct NameToInt128 { static constexpr auto name = "toInt128"; }; -struct NameToInt256 { static constexpr auto name = "toInt256"; }; -struct NameToFloat32 { static constexpr auto name = "toFloat32"; }; -struct NameToFloat64 { static constexpr auto name = "toFloat64"; }; -struct NameToUUID { static constexpr auto name = "toUUID"; }; -struct NameToIPv4 { static constexpr auto name = "toIPv4"; }; -struct NameToIPv6 { static constexpr auto name = "toIPv6"; }; - -using FunctionToUInt8 = FunctionConvert>; -using FunctionToUInt16 = FunctionConvert>; -using FunctionToUInt32 = FunctionConvert>; -using FunctionToUInt64 = FunctionConvert>; -using FunctionToUInt128 = FunctionConvert>; -using FunctionToUInt256 = FunctionConvert>; -using FunctionToInt8 = FunctionConvert>; -using FunctionToInt16 = FunctionConvert>; -using FunctionToInt32 = FunctionConvert>; -using FunctionToInt64 = FunctionConvert>; -using FunctionToInt128 = FunctionConvert>; -using FunctionToInt256 = FunctionConvert>; -using FunctionToFloat32 = FunctionConvert>; -using FunctionToFloat64 = FunctionConvert>; - -using FunctionToDate = FunctionConvert; - -using FunctionToDate32 = FunctionConvert; - -using FunctionToDateTime = FunctionConvert; - -using FunctionToDateTime32 = FunctionConvert; - -using FunctionToDateTime64 = FunctionConvert; - -using FunctionToUUID = FunctionConvert>; -using FunctionToIPv4 = FunctionConvert>; -using FunctionToIPv6 = FunctionConvert>; -using FunctionToString = FunctionConvert; -using FunctionToUnixTimestamp = FunctionConvert>; -using FunctionToDecimal32 = FunctionConvert, NameToDecimal32, UnknownMonotonicity>; -using FunctionToDecimal64 = FunctionConvert, NameToDecimal64, UnknownMonotonicity>; -using FunctionToDecimal128 = FunctionConvert, NameToDecimal128, UnknownMonotonicity>; -using FunctionToDecimal256 = FunctionConvert, NameToDecimal256, UnknownMonotonicity>; - -template struct FunctionTo; - -template <> struct FunctionTo { using Type = FunctionToUInt8; }; -template <> struct FunctionTo { using Type = FunctionToUInt16; }; -template <> struct FunctionTo { using Type = FunctionToUInt32; }; -template <> struct FunctionTo { using Type = FunctionToUInt64; }; -template <> struct FunctionTo { using Type = FunctionToUInt128; }; -template <> struct FunctionTo { using Type = FunctionToUInt256; }; -template <> struct FunctionTo { using Type = FunctionToInt8; }; -template <> struct FunctionTo { using Type = FunctionToInt16; }; -template <> struct FunctionTo { using Type = FunctionToInt32; }; -template <> struct FunctionTo { using Type = FunctionToInt64; }; -template <> struct FunctionTo { using Type = FunctionToInt128; }; -template <> struct FunctionTo { using Type = FunctionToInt256; }; -template <> struct FunctionTo { using Type = FunctionToFloat32; }; -template <> struct FunctionTo { using Type = FunctionToFloat64; }; - -template -struct FunctionTo { using Type = FunctionToDate; }; - -template -struct FunctionTo { using Type = FunctionToDate32; }; - -template -struct FunctionTo { using Type = FunctionToDateTime; }; - -template -struct FunctionTo { using Type = FunctionToDateTime64; }; - -template <> struct FunctionTo { using Type = FunctionToUUID; }; -template <> struct FunctionTo { using Type = FunctionToIPv4; }; -template <> struct FunctionTo { using Type = FunctionToIPv6; }; -template <> struct FunctionTo { using Type = FunctionToString; }; -template <> struct FunctionTo { using Type = FunctionToFixedString; }; -template <> struct FunctionTo> { using Type = FunctionToDecimal32; }; -template <> struct FunctionTo> { using Type = FunctionToDecimal64; }; -template <> struct FunctionTo> { using Type = FunctionToDecimal128; }; -template <> struct FunctionTo> { using Type = FunctionToDecimal256; }; - -template struct FunctionTo> - : FunctionTo> -{ -}; - -struct NameToUInt8OrZero { static constexpr auto name = "toUInt8OrZero"; }; -struct NameToUInt16OrZero { static constexpr auto name = "toUInt16OrZero"; }; -struct NameToUInt32OrZero { static constexpr auto name = "toUInt32OrZero"; }; -struct NameToUInt64OrZero { static constexpr auto name = "toUInt64OrZero"; }; -struct NameToUInt128OrZero { static constexpr auto name = "toUInt128OrZero"; }; -struct NameToUInt256OrZero { static constexpr auto name = "toUInt256OrZero"; }; -struct NameToInt8OrZero { static constexpr auto name = "toInt8OrZero"; }; -struct NameToInt16OrZero { static constexpr auto name = "toInt16OrZero"; }; -struct NameToInt32OrZero { static constexpr auto name = "toInt32OrZero"; }; -struct NameToInt64OrZero { static constexpr auto name = "toInt64OrZero"; }; -struct NameToInt128OrZero { static constexpr auto name = "toInt128OrZero"; }; -struct NameToInt256OrZero { static constexpr auto name = "toInt256OrZero"; }; -struct NameToFloat32OrZero { static constexpr auto name = "toFloat32OrZero"; }; -struct NameToFloat64OrZero { static constexpr auto name = "toFloat64OrZero"; }; -struct NameToDateOrZero { static constexpr auto name = "toDateOrZero"; }; -struct NameToDate32OrZero { static constexpr auto name = "toDate32OrZero"; }; -struct NameToDateTimeOrZero { static constexpr auto name = "toDateTimeOrZero"; }; -struct NameToDateTime64OrZero { static constexpr auto name = "toDateTime64OrZero"; }; -struct NameToDecimal32OrZero { static constexpr auto name = "toDecimal32OrZero"; }; -struct NameToDecimal64OrZero { static constexpr auto name = "toDecimal64OrZero"; }; -struct NameToDecimal128OrZero { static constexpr auto name = "toDecimal128OrZero"; }; -struct NameToDecimal256OrZero { static constexpr auto name = "toDecimal256OrZero"; }; -struct NameToUUIDOrZero { static constexpr auto name = "toUUIDOrZero"; }; -struct NameToIPv4OrZero { static constexpr auto name = "toIPv4OrZero"; }; -struct NameToIPv6OrZero { static constexpr auto name = "toIPv6OrZero"; }; - -using FunctionToUInt8OrZero = FunctionConvertFromString; -using FunctionToUInt16OrZero = FunctionConvertFromString; -using FunctionToUInt32OrZero = FunctionConvertFromString; -using FunctionToUInt64OrZero = FunctionConvertFromString; -using FunctionToUInt128OrZero = FunctionConvertFromString; -using FunctionToUInt256OrZero = FunctionConvertFromString; -using FunctionToInt8OrZero = FunctionConvertFromString; -using FunctionToInt16OrZero = FunctionConvertFromString; -using FunctionToInt32OrZero = FunctionConvertFromString; -using FunctionToInt64OrZero = FunctionConvertFromString; -using FunctionToInt128OrZero = FunctionConvertFromString; -using FunctionToInt256OrZero = FunctionConvertFromString; -using FunctionToFloat32OrZero = FunctionConvertFromString; -using FunctionToFloat64OrZero = FunctionConvertFromString; -using FunctionToDateOrZero = FunctionConvertFromString; -using FunctionToDate32OrZero = FunctionConvertFromString; -using FunctionToDateTimeOrZero = FunctionConvertFromString; -using FunctionToDateTime64OrZero = FunctionConvertFromString; -using FunctionToDecimal32OrZero = FunctionConvertFromString, NameToDecimal32OrZero, ConvertFromStringExceptionMode::Zero>; -using FunctionToDecimal64OrZero = FunctionConvertFromString, NameToDecimal64OrZero, ConvertFromStringExceptionMode::Zero>; -using FunctionToDecimal128OrZero = FunctionConvertFromString, NameToDecimal128OrZero, ConvertFromStringExceptionMode::Zero>; -using FunctionToDecimal256OrZero = FunctionConvertFromString, NameToDecimal256OrZero, ConvertFromStringExceptionMode::Zero>; -using FunctionToUUIDOrZero = FunctionConvertFromString; -using FunctionToIPv4OrZero = FunctionConvertFromString; -using FunctionToIPv6OrZero = FunctionConvertFromString; - -struct NameToUInt8OrNull { static constexpr auto name = "toUInt8OrNull"; }; -struct NameToUInt16OrNull { static constexpr auto name = "toUInt16OrNull"; }; -struct NameToUInt32OrNull { static constexpr auto name = "toUInt32OrNull"; }; -struct NameToUInt64OrNull { static constexpr auto name = "toUInt64OrNull"; }; -struct NameToUInt128OrNull { static constexpr auto name = "toUInt128OrNull"; }; -struct NameToUInt256OrNull { static constexpr auto name = "toUInt256OrNull"; }; -struct NameToInt8OrNull { static constexpr auto name = "toInt8OrNull"; }; -struct NameToInt16OrNull { static constexpr auto name = "toInt16OrNull"; }; -struct NameToInt32OrNull { static constexpr auto name = "toInt32OrNull"; }; -struct NameToInt64OrNull { static constexpr auto name = "toInt64OrNull"; }; -struct NameToInt128OrNull { static constexpr auto name = "toInt128OrNull"; }; -struct NameToInt256OrNull { static constexpr auto name = "toInt256OrNull"; }; -struct NameToFloat32OrNull { static constexpr auto name = "toFloat32OrNull"; }; -struct NameToFloat64OrNull { static constexpr auto name = "toFloat64OrNull"; }; -struct NameToDateOrNull { static constexpr auto name = "toDateOrNull"; }; -struct NameToDate32OrNull { static constexpr auto name = "toDate32OrNull"; }; -struct NameToDateTimeOrNull { static constexpr auto name = "toDateTimeOrNull"; }; -struct NameToDateTime64OrNull { static constexpr auto name = "toDateTime64OrNull"; }; -struct NameToDecimal32OrNull { static constexpr auto name = "toDecimal32OrNull"; }; -struct NameToDecimal64OrNull { static constexpr auto name = "toDecimal64OrNull"; }; -struct NameToDecimal128OrNull { static constexpr auto name = "toDecimal128OrNull"; }; -struct NameToDecimal256OrNull { static constexpr auto name = "toDecimal256OrNull"; }; -struct NameToUUIDOrNull { static constexpr auto name = "toUUIDOrNull"; }; -struct NameToIPv4OrNull { static constexpr auto name = "toIPv4OrNull"; }; -struct NameToIPv6OrNull { static constexpr auto name = "toIPv6OrNull"; }; - -using FunctionToUInt8OrNull = FunctionConvertFromString; -using FunctionToUInt16OrNull = FunctionConvertFromString; -using FunctionToUInt32OrNull = FunctionConvertFromString; -using FunctionToUInt64OrNull = FunctionConvertFromString; -using FunctionToUInt128OrNull = FunctionConvertFromString; -using FunctionToUInt256OrNull = FunctionConvertFromString; -using FunctionToInt8OrNull = FunctionConvertFromString; -using FunctionToInt16OrNull = FunctionConvertFromString; -using FunctionToInt32OrNull = FunctionConvertFromString; -using FunctionToInt64OrNull = FunctionConvertFromString; -using FunctionToInt128OrNull = FunctionConvertFromString; -using FunctionToInt256OrNull = FunctionConvertFromString; -using FunctionToFloat32OrNull = FunctionConvertFromString; -using FunctionToFloat64OrNull = FunctionConvertFromString; -using FunctionToDateOrNull = FunctionConvertFromString; -using FunctionToDate32OrNull = FunctionConvertFromString; -using FunctionToDateTimeOrNull = FunctionConvertFromString; -using FunctionToDateTime64OrNull = FunctionConvertFromString; -using FunctionToDecimal32OrNull = FunctionConvertFromString, NameToDecimal32OrNull, ConvertFromStringExceptionMode::Null>; -using FunctionToDecimal64OrNull = FunctionConvertFromString, NameToDecimal64OrNull, ConvertFromStringExceptionMode::Null>; -using FunctionToDecimal128OrNull = FunctionConvertFromString, NameToDecimal128OrNull, ConvertFromStringExceptionMode::Null>; -using FunctionToDecimal256OrNull = FunctionConvertFromString, NameToDecimal256OrNull, ConvertFromStringExceptionMode::Null>; -using FunctionToUUIDOrNull = FunctionConvertFromString; -using FunctionToIPv4OrNull = FunctionConvertFromString; -using FunctionToIPv6OrNull = FunctionConvertFromString; - -struct NameParseDateTimeBestEffort { static constexpr auto name = "parseDateTimeBestEffort"; }; -struct NameParseDateTimeBestEffortOrZero { static constexpr auto name = "parseDateTimeBestEffortOrZero"; }; -struct NameParseDateTimeBestEffortOrNull { static constexpr auto name = "parseDateTimeBestEffortOrNull"; }; -struct NameParseDateTimeBestEffortUS { static constexpr auto name = "parseDateTimeBestEffortUS"; }; -struct NameParseDateTimeBestEffortUSOrZero { static constexpr auto name = "parseDateTimeBestEffortUSOrZero"; }; -struct NameParseDateTimeBestEffortUSOrNull { static constexpr auto name = "parseDateTimeBestEffortUSOrNull"; }; -struct NameParseDateTime32BestEffort { static constexpr auto name = "parseDateTime32BestEffort"; }; -struct NameParseDateTime32BestEffortOrZero { static constexpr auto name = "parseDateTime32BestEffortOrZero"; }; -struct NameParseDateTime32BestEffortOrNull { static constexpr auto name = "parseDateTime32BestEffortOrNull"; }; -struct NameParseDateTime64BestEffort { static constexpr auto name = "parseDateTime64BestEffort"; }; -struct NameParseDateTime64BestEffortOrZero { static constexpr auto name = "parseDateTime64BestEffortOrZero"; }; -struct NameParseDateTime64BestEffortOrNull { static constexpr auto name = "parseDateTime64BestEffortOrNull"; }; -struct NameParseDateTime64BestEffortUS { static constexpr auto name = "parseDateTime64BestEffortUS"; }; -struct NameParseDateTime64BestEffortUSOrZero { static constexpr auto name = "parseDateTime64BestEffortUSOrZero"; }; -struct NameParseDateTime64BestEffortUSOrNull { static constexpr auto name = "parseDateTime64BestEffortUSOrNull"; }; - - -using FunctionParseDateTimeBestEffort = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTimeBestEffort, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffort>; -using FunctionParseDateTimeBestEffortOrZero = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTimeBestEffortOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffort>; -using FunctionParseDateTimeBestEffortOrNull = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTimeBestEffortOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffort>; - -using FunctionParseDateTimeBestEffortUS = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTimeBestEffortUS, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffortUS>; -using FunctionParseDateTimeBestEffortUSOrZero = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTimeBestEffortUSOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffortUS>; -using FunctionParseDateTimeBestEffortUSOrNull = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTimeBestEffortUSOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffortUS>; - -using FunctionParseDateTime32BestEffort = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTime32BestEffort, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffort>; -using FunctionParseDateTime32BestEffortOrZero = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTime32BestEffortOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffort>; -using FunctionParseDateTime32BestEffortOrNull = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTime32BestEffortOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffort>; - -using FunctionParseDateTime64BestEffort = FunctionConvertFromString< - DataTypeDateTime64, NameParseDateTime64BestEffort, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffort>; -using FunctionParseDateTime64BestEffortOrZero = FunctionConvertFromString< - DataTypeDateTime64, NameParseDateTime64BestEffortOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffort>; -using FunctionParseDateTime64BestEffortOrNull = FunctionConvertFromString< - DataTypeDateTime64, NameParseDateTime64BestEffortOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffort>; - -using FunctionParseDateTime64BestEffortUS = FunctionConvertFromString< - DataTypeDateTime64, NameParseDateTime64BestEffortUS, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffortUS>; -using FunctionParseDateTime64BestEffortUSOrZero = FunctionConvertFromString< - DataTypeDateTime64, NameParseDateTime64BestEffortUSOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffortUS>; -using FunctionParseDateTime64BestEffortUSOrNull = FunctionConvertFromString< - DataTypeDateTime64, NameParseDateTime64BestEffortUSOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffortUS>; - - -class ExecutableFunctionCast : public IExecutableFunction -{ -public: - using WrapperType = std::function; - - explicit ExecutableFunctionCast( - WrapperType && wrapper_function_, const char * name_, std::optional diagnostic_) - : wrapper_function(std::move(wrapper_function_)), name(name_), diagnostic(std::move(diagnostic_)) {} - - String getName() const override { return name; } - -protected: - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override - { - /// drop second argument, pass others - ColumnsWithTypeAndName new_arguments{arguments.front()}; - if (arguments.size() > 2) - new_arguments.insert(std::end(new_arguments), std::next(std::begin(arguments), 2), std::end(arguments)); - - try - { - return wrapper_function(new_arguments, result_type, nullptr, input_rows_count); - } - catch (Exception & e) - { - if (diagnostic) - e.addMessage("while converting source column " + backQuoteIfNeed(diagnostic->column_from) + - " to destination column " + backQuoteIfNeed(diagnostic->column_to)); - throw; - } - } - - bool useDefaultImplementationForNulls() const override { return false; } - /// CAST(Nothing, T) -> T - bool useDefaultImplementationForNothing() const override { return false; } - bool useDefaultImplementationForConstants() const override { return true; } - bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - -private: - WrapperType wrapper_function; - const char * name; - std::optional diagnostic; -}; - -struct CastName { static constexpr auto name = "CAST"; }; -struct CastInternalName { static constexpr auto name = "_CAST"; }; - -class FunctionCastBase : public IFunctionBase -{ -public: - using MonotonicityForRange = std::function; -}; - -template -class FunctionCast final : public FunctionCastBase -{ -public: - using WrapperType = std::function; - - FunctionCast(ContextPtr context_ - , const char * cast_name_ - , MonotonicityForRange && monotonicity_for_range_ - , const DataTypes & argument_types_ - , const DataTypePtr & return_type_ - , std::optional diagnostic_ - , CastType cast_type_) - : cast_name(cast_name_), monotonicity_for_range(std::move(monotonicity_for_range_)) - , argument_types(argument_types_), return_type(return_type_), diagnostic(std::move(diagnostic_)) - , cast_type(cast_type_) - , context(context_) - { - } - - const DataTypes & getArgumentTypes() const override { return argument_types; } - const DataTypePtr & getResultType() const override { return return_type; } - - ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName & /*sample_columns*/) const override - { - try - { - return std::make_unique( - prepareUnpackDictionaries(getArgumentTypes()[0], getResultType()), cast_name, diagnostic); - } - catch (Exception & e) - { - if (diagnostic) - e.addMessage("while converting source column " + backQuoteIfNeed(diagnostic->column_from) + - " to destination column " + backQuoteIfNeed(diagnostic->column_to)); - throw; - } - } - - String getName() const override { return cast_name; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - bool hasInformationAboutMonotonicity() const override - { - return static_cast(monotonicity_for_range); - } - - Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override - { - return monotonicity_for_range(type, left, right); - } - -private: - - const char * cast_name; - MonotonicityForRange monotonicity_for_range; - - DataTypes argument_types; - DataTypePtr return_type; - - std::optional diagnostic; - CastType cast_type; - ContextPtr context; - - static WrapperType createFunctionAdaptor(FunctionPtr function, const DataTypePtr & from_type) - { - auto function_adaptor = std::make_unique(function)->build({ColumnWithTypeAndName{nullptr, from_type, ""}}); - - return [function_adaptor] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) - { - return function_adaptor->execute(arguments, result_type, input_rows_count); - }; - } - - static WrapperType createToNullableColumnWrapper() - { - return [] (ColumnsWithTypeAndName &, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) - { - ColumnPtr res = result_type->createColumn(); - ColumnUInt8::Ptr col_null_map_to = ColumnUInt8::create(input_rows_count, true); - return ColumnNullable::create(res->cloneResized(input_rows_count), std::move(col_null_map_to)); - }; - } - - template - WrapperType createWrapper(const DataTypePtr & from_type, const ToDataType * const to_type, bool requested_result_is_nullable) const - { - TypeIndex from_type_index = from_type->getTypeId(); - WhichDataType which(from_type_index); - bool can_apply_accurate_cast = (cast_type == CastType::accurate || cast_type == CastType::accurateOrNull) - && (which.isInt() || which.isUInt() || which.isFloat()); - - FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior; - if (context) - date_time_overflow_behavior = context->getSettingsRef().date_time_overflow_behavior; - - if (requested_result_is_nullable && checkAndGetDataType(from_type.get())) - { - /// In case when converting to Nullable type, we apply different parsing rule, - /// that will not throw an exception but return NULL in case of malformed input. - FunctionPtr function = FunctionConvertFromString::create(); - return createFunctionAdaptor(function, from_type); - } - else if (!can_apply_accurate_cast) - { - FunctionPtr function = FunctionTo::Type::create(context); - return createFunctionAdaptor(function, from_type); - } - - auto wrapper_cast_type = cast_type; - - return [wrapper_cast_type, from_type_index, to_type, date_time_overflow_behavior] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *column_nullable, size_t input_rows_count) - { - ColumnPtr result_column; - auto res = callOnIndexAndDataType(from_type_index, [&](const auto & types) -> bool { - using Types = std::decay_t; - using LeftDataType = typename Types::LeftType; - using RightDataType = typename Types::RightType; - - if constexpr (IsDataTypeNumber) - { - if constexpr (IsDataTypeNumber) - { -#define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE, ADDITIONS) \ - case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \ - result_column = ConvertImpl::execute( \ - arguments, result_type, input_rows_count, ADDITIONS()); \ - break; - if (wrapper_cast_type == CastType::accurate) - { - switch (date_time_overflow_behavior) - { - GENERATE_OVERFLOW_MODE_CASE(Throw, AccurateConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Ignore, AccurateConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Saturate, AccurateConvertStrategyAdditions) - } - } - else - { - switch (date_time_overflow_behavior) - { - GENERATE_OVERFLOW_MODE_CASE(Throw, AccurateOrNullConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Ignore, AccurateOrNullConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Saturate, AccurateOrNullConvertStrategyAdditions) - } - } -#undef GENERATE_OVERFLOW_MODE_CASE - - return true; - } - - if constexpr (std::is_same_v || std::is_same_v) - { -#define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE, ADDITIONS) \ - case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \ - result_column = ConvertImpl::template execute( \ -arguments, result_type, input_rows_count); \ - break; - if (wrapper_cast_type == CastType::accurate) - { - switch (date_time_overflow_behavior) - { - GENERATE_OVERFLOW_MODE_CASE(Throw, DateTimeAccurateConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Ignore, DateTimeAccurateConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Saturate, DateTimeAccurateConvertStrategyAdditions) - } - } - else - { - switch (date_time_overflow_behavior) - { - GENERATE_OVERFLOW_MODE_CASE(Throw, DateTimeAccurateOrNullConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Ignore, DateTimeAccurateOrNullConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Saturate, DateTimeAccurateOrNullConvertStrategyAdditions) - } - } -#undef GENERATE_OVERFLOW_MODE_CASE - return true; - } - } - - return false; - }); - - /// Additionally check if callOnIndexAndDataType wasn't called at all. - if (!res) - { - if (wrapper_cast_type == CastType::accurateOrNull) - { - auto nullable_column_wrapper = FunctionCast::createToNullableColumnWrapper(); - return nullable_column_wrapper(arguments, result_type, column_nullable, input_rows_count); - } - else - { - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, - "Conversion from {} to {} is not supported", - from_type_index, to_type->getName()); - } - } - - return result_column; - }; - } - - template - WrapperType createBoolWrapper(const DataTypePtr & from_type, const ToDataType * const to_type, bool requested_result_is_nullable) const - { - if (checkAndGetDataType(from_type.get())) - { - return &ConvertImplGenericFromString::execute; - } - - return createWrapper(from_type, to_type, requested_result_is_nullable); - } - - WrapperType createUInt8ToBoolWrapper(const DataTypePtr from_type, const DataTypePtr to_type) const - { - return [from_type, to_type] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) -> ColumnPtr - { - /// Special case when we convert UInt8 column to Bool column. - /// both columns have type UInt8, but we shouldn't use identity wrapper, - /// because Bool column can contain only 0 and 1. - auto res_column = to_type->createColumn(); - const auto & data_from = checkAndGetColumn(arguments[0].column.get())->getData(); - auto & data_to = assert_cast(res_column.get())->getData(); - data_to.resize(data_from.size()); - for (size_t i = 0; i != data_from.size(); ++i) - data_to[i] = static_cast(data_from[i]); - return res_column; - }; - } - - static WrapperType createStringWrapper(const DataTypePtr & from_type) - { - FunctionPtr function = FunctionToString::create(); - return createFunctionAdaptor(function, from_type); - } - - WrapperType createFixedStringWrapper(const DataTypePtr & from_type, const size_t N) const - { - if (!isStringOrFixedString(from_type)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "CAST AS FixedString is only implemented for types String and FixedString"); - - bool exception_mode_null = cast_type == CastType::accurateOrNull; - return [exception_mode_null, N] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) - { - if (exception_mode_null) - return FunctionToFixedString::executeForN(arguments, N); - else - return FunctionToFixedString::executeForN(arguments, N); - }; - } - -#define GENERATE_INTERVAL_CASE(INTERVAL_KIND) \ - case IntervalKind::Kind::INTERVAL_KIND: \ - return createFunctionAdaptor(FunctionConvert::create(), from_type); - - static WrapperType createIntervalWrapper(const DataTypePtr & from_type, IntervalKind kind) - { - switch (kind) - { - GENERATE_INTERVAL_CASE(Nanosecond) - GENERATE_INTERVAL_CASE(Microsecond) - GENERATE_INTERVAL_CASE(Millisecond) - GENERATE_INTERVAL_CASE(Second) - GENERATE_INTERVAL_CASE(Minute) - GENERATE_INTERVAL_CASE(Hour) - GENERATE_INTERVAL_CASE(Day) - GENERATE_INTERVAL_CASE(Week) - GENERATE_INTERVAL_CASE(Month) - GENERATE_INTERVAL_CASE(Quarter) - GENERATE_INTERVAL_CASE(Year) - } - throw Exception{ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion to unexpected IntervalKind: {}", kind.toString()}; - } - -#undef GENERATE_INTERVAL_CASE - - template - requires IsDataTypeDecimal - WrapperType createDecimalWrapper(const DataTypePtr & from_type, const ToDataType * to_type, bool requested_result_is_nullable) const - { - TypeIndex type_index = from_type->getTypeId(); - UInt32 scale = to_type->getScale(); - - WhichDataType which(type_index); - bool ok = which.isNativeInt() || which.isNativeUInt() || which.isDecimal() || which.isFloat() || which.isDateOrDate32() || which.isDateTime() || which.isDateTime64() - || which.isStringOrFixedString(); - if (!ok) - { - if (cast_type == CastType::accurateOrNull) - return createToNullableColumnWrapper(); - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", - from_type->getName(), to_type->getName()); - } - - auto wrapper_cast_type = cast_type; - - return [wrapper_cast_type, type_index, scale, to_type, requested_result_is_nullable] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *column_nullable, size_t input_rows_count) - { - ColumnPtr result_column; - auto res = callOnIndexAndDataType(type_index, [&](const auto & types) -> bool - { - using Types = std::decay_t; - using LeftDataType = typename Types::LeftType; - using RightDataType = typename Types::RightType; - - if constexpr (IsDataTypeDecimalOrNumber && IsDataTypeDecimalOrNumber && !std::is_same_v) - { - if (wrapper_cast_type == CastType::accurate) - { - AccurateConvertStrategyAdditions additions; - additions.scale = scale; - result_column = ConvertImpl::execute( - arguments, result_type, input_rows_count, additions); - - return true; - } - else if (wrapper_cast_type == CastType::accurateOrNull) - { - AccurateOrNullConvertStrategyAdditions additions; - additions.scale = scale; - result_column = ConvertImpl::execute( - arguments, result_type, input_rows_count, additions); - - return true; - } - } - else if constexpr (std::is_same_v) - { - if (requested_result_is_nullable) - { - /// Consistent with CAST(Nullable(String) AS Nullable(Numbers)) - /// In case when converting to Nullable type, we apply different parsing rule, - /// that will not throw an exception but return NULL in case of malformed input. - result_column = ConvertImpl::execute( - arguments, result_type, input_rows_count, scale); - - return true; - } - } - - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); - - return true; - }); - - /// Additionally check if callOnIndexAndDataType wasn't called at all. - if (!res) - { - if (wrapper_cast_type == CastType::accurateOrNull) - { - auto nullable_column_wrapper = FunctionCast::createToNullableColumnWrapper(); - return nullable_column_wrapper(arguments, result_type, column_nullable, input_rows_count); - } - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, - "Conversion from {} to {} is not supported", - type_index, to_type->getName()); - } - - return result_column; - }; - } - - WrapperType createAggregateFunctionWrapper(const DataTypePtr & from_type_untyped, const DataTypeAggregateFunction * to_type) const - { - /// Conversion from String through parsing. - if (checkAndGetDataType(from_type_untyped.get())) - { - return &ConvertImplGenericFromString::execute; - } - else if (const auto * agg_type = checkAndGetDataType(from_type_untyped.get())) - { - if (agg_type->getFunction()->haveSameStateRepresentation(*to_type->getFunction())) - { - return [function = to_type->getFunction()]( - ColumnsWithTypeAndName & arguments, - const DataTypePtr & /* result_type */, - const ColumnNullable * /* nullable_source */, - size_t /*input_rows_count*/) -> ColumnPtr - { - const auto & argument_column = arguments.front(); - const auto * col_agg = checkAndGetColumn(argument_column.column.get()); - if (col_agg) - { - auto new_col_agg = ColumnAggregateFunction::create(*col_agg); - new_col_agg->set(function); - return new_col_agg; - } - else - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Illegal column {} for function CAST AS AggregateFunction", - argument_column.column->getName()); - } - }; - } - } - - if (cast_type == CastType::accurateOrNull) - return createToNullableColumnWrapper(); - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", - from_type_untyped->getName(), to_type->getName()); - } - - WrapperType createArrayWrapper(const DataTypePtr & from_type_untyped, const DataTypeArray & to_type) const - { - /// Conversion from String through parsing. - if (checkAndGetDataType(from_type_untyped.get())) - { - return &ConvertImplGenericFromString::execute; - } - - DataTypePtr from_type_holder; - const auto * from_type = checkAndGetDataType(from_type_untyped.get()); - const auto * from_type_map = checkAndGetDataType(from_type_untyped.get()); - - /// Convert from Map - if (from_type_map) - { - /// Recreate array of unnamed tuples because otherwise it may work - /// unexpectedly while converting to array of named tuples. - from_type_holder = from_type_map->getNestedTypeWithUnnamedTuple(); - from_type = assert_cast(from_type_holder.get()); - } - - if (!from_type) - { - throw Exception(ErrorCodes::TYPE_MISMATCH, - "CAST AS Array can only be performed between same-dimensional Array, Map or String types"); - } - - DataTypePtr from_nested_type = from_type->getNestedType(); - - /// In query SELECT CAST([] AS Array(Array(String))) from type is Array(Nothing) - bool from_empty_array = isNothing(from_nested_type); - - if (from_type->getNumberOfDimensions() != to_type.getNumberOfDimensions() && !from_empty_array) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "CAST AS Array can only be performed between same-dimensional array types"); - - const DataTypePtr & to_nested_type = to_type.getNestedType(); - - /// Prepare nested type conversion - const auto nested_function = prepareUnpackDictionaries(from_nested_type, to_nested_type); - - return [nested_function, from_nested_type, to_nested_type]( - ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr - { - const auto & argument_column = arguments.front(); - - const ColumnArray * col_array = nullptr; - - if (const ColumnMap * col_map = checkAndGetColumn(argument_column.column.get())) - col_array = &col_map->getNestedColumn(); - else - col_array = checkAndGetColumn(argument_column.column.get()); - - if (col_array) - { - /// create columns for converting nested column containing original and result columns - ColumnsWithTypeAndName nested_columns{{ col_array->getDataPtr(), from_nested_type, "" }}; - - /// convert nested column - auto result_column = nested_function(nested_columns, to_nested_type, nullable_source, nested_columns.front().column->size()); - - /// set converted nested column to result - return ColumnArray::create(result_column, col_array->getOffsetsPtr()); - } - else - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Illegal column {} for function CAST AS Array", - argument_column.column->getName()); - } - }; - } - - using ElementWrappers = std::vector; - - ElementWrappers getElementWrappers(const DataTypes & from_element_types, const DataTypes & to_element_types) const - { - ElementWrappers element_wrappers; - element_wrappers.reserve(from_element_types.size()); - - /// Create conversion wrapper for each element in tuple - for (size_t i = 0; i < from_element_types.size(); ++i) - { - const DataTypePtr & from_element_type = from_element_types[i]; - const DataTypePtr & to_element_type = to_element_types[i]; - element_wrappers.push_back(prepareUnpackDictionaries(from_element_type, to_element_type)); - } - - return element_wrappers; - } - - WrapperType createTupleWrapper(const DataTypePtr & from_type_untyped, const DataTypeTuple * to_type) const - { - /// Conversion from String through parsing. - if (checkAndGetDataType(from_type_untyped.get())) - { - return &ConvertImplGenericFromString::execute; - } - - const auto * from_type = checkAndGetDataType(from_type_untyped.get()); - if (!from_type) - throw Exception(ErrorCodes::TYPE_MISMATCH, "CAST AS Tuple can only be performed between tuple types or from String.\n" - "Left type: {}, right type: {}", from_type_untyped->getName(), to_type->getName()); - - const auto & from_element_types = from_type->getElements(); - const auto & to_element_types = to_type->getElements(); - - std::vector element_wrappers; - std::vector> to_reverse_index; - - /// For named tuples allow conversions for tuples with - /// different sets of elements. If element exists in @to_type - /// and doesn't exist in @to_type it will be filled by default values. - if (from_type->haveExplicitNames() && to_type->haveExplicitNames()) - { - const auto & from_names = from_type->getElementNames(); - std::unordered_map from_positions; - from_positions.reserve(from_names.size()); - for (size_t i = 0; i < from_names.size(); ++i) - from_positions[from_names[i]] = i; - - const auto & to_names = to_type->getElementNames(); - element_wrappers.reserve(to_names.size()); - to_reverse_index.reserve(from_names.size()); - - for (size_t i = 0; i < to_names.size(); ++i) - { - auto it = from_positions.find(to_names[i]); - if (it != from_positions.end()) - { - element_wrappers.emplace_back(prepareUnpackDictionaries(from_element_types[it->second], to_element_types[i])); - to_reverse_index.emplace_back(it->second); - } - else - { - element_wrappers.emplace_back(); - to_reverse_index.emplace_back(); - } - } - } - else - { - if (from_element_types.size() != to_element_types.size()) - throw Exception(ErrorCodes::TYPE_MISMATCH, "CAST AS Tuple can only be performed between tuple types " - "with the same number of elements or from String.\nLeft type: {}, right type: {}", - from_type->getName(), to_type->getName()); - - element_wrappers = getElementWrappers(from_element_types, to_element_types); - to_reverse_index.reserve(to_element_types.size()); - for (size_t i = 0; i < to_element_types.size(); ++i) - to_reverse_index.emplace_back(i); - } - - return [element_wrappers, from_element_types, to_element_types, to_reverse_index] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t input_rows_count) -> ColumnPtr - { - const auto * col = arguments.front().column.get(); - - size_t tuple_size = to_element_types.size(); - const ColumnTuple & column_tuple = typeid_cast(*col); - - Columns converted_columns(tuple_size); - - /// invoke conversion for each element - for (size_t i = 0; i < tuple_size; ++i) - { - if (to_reverse_index[i]) - { - size_t from_idx = *to_reverse_index[i]; - ColumnsWithTypeAndName element = {{column_tuple.getColumns()[from_idx], from_element_types[from_idx], "" }}; - converted_columns[i] = element_wrappers[i](element, to_element_types[i], nullable_source, input_rows_count); - } - else - { - converted_columns[i] = to_element_types[i]->createColumn()->cloneResized(input_rows_count); - } - } - - return ColumnTuple::create(converted_columns); - }; - } - - /// The case of: tuple([key1, key2, ..., key_n], [value1, value2, ..., value_n]) - WrapperType createTupleToMapWrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const - { - return [element_wrappers = getElementWrappers(from_kv_types, to_kv_types), from_kv_types, to_kv_types] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr - { - const auto * col = arguments.front().column.get(); - const auto & column_tuple = assert_cast(*col); - - Columns offsets(2); - Columns converted_columns(2); - for (size_t i = 0; i < 2; ++i) - { - const auto & column_array = assert_cast(column_tuple.getColumn(i)); - ColumnsWithTypeAndName element = {{column_array.getDataPtr(), from_kv_types[i], ""}}; - converted_columns[i] = element_wrappers[i](element, to_kv_types[i], nullable_source, (element[0].column)->size()); - offsets[i] = column_array.getOffsetsPtr(); - } - - const auto & keys_offsets = assert_cast(*offsets[0]).getData(); - const auto & values_offsets = assert_cast(*offsets[1]).getData(); - if (keys_offsets != values_offsets) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "CAST AS Map can only be performed from tuple of arrays with equal sizes."); - - return ColumnMap::create(converted_columns[0], converted_columns[1], offsets[0]); - }; - } - - WrapperType createMapToMapWrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const - { - return [element_wrappers = getElementWrappers(from_kv_types, to_kv_types), from_kv_types, to_kv_types] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr - { - const auto * col = arguments.front().column.get(); - const auto & column_map = typeid_cast(*col); - const auto & nested_data = column_map.getNestedData(); - - Columns converted_columns(2); - for (size_t i = 0; i < 2; ++i) - { - ColumnsWithTypeAndName element = {{nested_data.getColumnPtr(i), from_kv_types[i], ""}}; - converted_columns[i] = element_wrappers[i](element, to_kv_types[i], nullable_source, (element[0].column)->size()); - } - - return ColumnMap::create(converted_columns[0], converted_columns[1], column_map.getNestedColumn().getOffsetsPtr()); - }; - } - - /// The case of: [(key1, value1), (key2, value2), ...] - WrapperType createArrayToMapWrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const - { - return [element_wrappers = getElementWrappers(from_kv_types, to_kv_types), from_kv_types, to_kv_types] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr - { - const auto * col = arguments.front().column.get(); - const auto & column_array = typeid_cast(*col); - const auto & nested_data = typeid_cast(column_array.getData()); - - Columns converted_columns(2); - for (size_t i = 0; i < 2; ++i) - { - ColumnsWithTypeAndName element = {{nested_data.getColumnPtr(i), from_kv_types[i], ""}}; - converted_columns[i] = element_wrappers[i](element, to_kv_types[i], nullable_source, (element[0].column)->size()); - } - - return ColumnMap::create(converted_columns[0], converted_columns[1], column_array.getOffsetsPtr()); - }; - } - - - WrapperType createMapWrapper(const DataTypePtr & from_type_untyped, const DataTypeMap * to_type) const - { - if (const auto * from_tuple = checkAndGetDataType(from_type_untyped.get())) - { - if (from_tuple->getElements().size() != 2) - throw Exception( - ErrorCodes::TYPE_MISMATCH, - "CAST AS Map from tuple requires 2 elements. " - "Left type: {}, right type: {}", - from_tuple->getName(), - to_type->getName()); - - DataTypes from_kv_types; - const auto & to_kv_types = to_type->getKeyValueTypes(); - - for (const auto & elem : from_tuple->getElements()) - { - const auto * type_array = checkAndGetDataType(elem.get()); - if (!type_array) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "CAST AS Map can only be performed from tuples of array. Got: {}", from_tuple->getName()); - - from_kv_types.push_back(type_array->getNestedType()); - } - - return createTupleToMapWrapper(from_kv_types, to_kv_types); - } - else if (const auto * from_array = typeid_cast(from_type_untyped.get())) - { - const auto * nested_tuple = typeid_cast(from_array->getNestedType().get()); - if (!nested_tuple || nested_tuple->getElements().size() != 2) - throw Exception( - ErrorCodes::TYPE_MISMATCH, - "CAST AS Map from array requires nested tuple of 2 elements. " - "Left type: {}, right type: {}", - from_array->getName(), - to_type->getName()); - - return createArrayToMapWrapper(nested_tuple->getElements(), to_type->getKeyValueTypes()); - } - else if (const auto * from_type = checkAndGetDataType(from_type_untyped.get())) - { - return createMapToMapWrapper(from_type->getKeyValueTypes(), to_type->getKeyValueTypes()); - } - else - { - throw Exception(ErrorCodes::TYPE_MISMATCH, "Unsupported types to CAST AS Map. " - "Left type: {}, right type: {}", from_type_untyped->getName(), to_type->getName()); - } - } - - WrapperType createTupleToObjectWrapper(const DataTypeTuple & from_tuple, bool has_nullable_subcolumns) const - { - if (!from_tuple.haveExplicitNames()) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Cast to Object can be performed only from flatten Named Tuple. Got: {}", from_tuple.getName()); - - PathsInData paths; - DataTypes from_types; - - std::tie(paths, from_types) = flattenTuple(from_tuple.getPtr()); - auto to_types = from_types; - - for (auto & type : to_types) - { - if (isTuple(type) || isNested(type)) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Cast to Object can be performed only from flatten Named Tuple. Got: {}", - from_tuple.getName()); - - type = recursiveRemoveLowCardinality(type); - } - - return [element_wrappers = getElementWrappers(from_types, to_types), - has_nullable_subcolumns, from_types, to_types, paths] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t input_rows_count) - { - size_t tuple_size = to_types.size(); - auto flattened_column = flattenTuple(arguments.front().column); - const auto & column_tuple = assert_cast(*flattened_column); - - if (tuple_size != column_tuple.getColumns().size()) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Expected tuple with {} subcolumn, but got {} subcolumns", - tuple_size, column_tuple.getColumns().size()); - - auto res = ColumnObject::create(has_nullable_subcolumns); - for (size_t i = 0; i < tuple_size; ++i) - { - ColumnsWithTypeAndName element = {{column_tuple.getColumns()[i], from_types[i], "" }}; - auto converted_column = element_wrappers[i](element, to_types[i], nullable_source, input_rows_count); - res->addSubcolumn(paths[i], converted_column->assumeMutable()); - } - - return res; - }; - } - - WrapperType createMapToObjectWrapper(const DataTypeMap & from_map, bool has_nullable_subcolumns) const - { - auto key_value_types = from_map.getKeyValueTypes(); - - if (!isStringOrFixedString(key_value_types[0])) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Cast to Object from Map can be performed only from Map " - "with String or FixedString key. Got: {}", from_map.getName()); - - const auto & value_type = key_value_types[1]; - auto to_value_type = value_type; - - if (!has_nullable_subcolumns && value_type->isNullable()) - to_value_type = removeNullable(value_type); - - if (has_nullable_subcolumns && !value_type->isNullable()) - to_value_type = makeNullable(value_type); - - DataTypes to_key_value_types{std::make_shared(), std::move(to_value_type)}; - auto element_wrappers = getElementWrappers(key_value_types, to_key_value_types); - - return [has_nullable_subcolumns, element_wrappers, key_value_types, to_key_value_types] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t) -> ColumnPtr - { - const auto & column_map = assert_cast(*arguments.front().column); - const auto & offsets = column_map.getNestedColumn().getOffsets(); - auto key_value_columns = column_map.getNestedData().getColumnsCopy(); - - for (size_t i = 0; i < 2; ++i) - { - ColumnsWithTypeAndName element{{key_value_columns[i], key_value_types[i], ""}}; - key_value_columns[i] = element_wrappers[i](element, to_key_value_types[i], nullable_source, key_value_columns[i]->size()); - } - - const auto & key_column_str = assert_cast(*key_value_columns[0]); - const auto & value_column = *key_value_columns[1]; - - using SubcolumnsMap = HashMap; - SubcolumnsMap subcolumns; - - for (size_t row = 0; row < offsets.size(); ++row) - { - for (size_t i = offsets[static_cast(row) - 1]; i < offsets[row]; ++i) - { - auto ref = key_column_str.getDataAt(i); - - bool inserted; - SubcolumnsMap::LookupResult it; - subcolumns.emplace(ref, it, inserted); - auto & subcolumn = it->getMapped(); - - if (inserted) - subcolumn = value_column.cloneEmpty()->cloneResized(row); - - /// Map can have duplicated keys. We insert only first one. - if (subcolumn->size() == row) - subcolumn->insertFrom(value_column, i); - } - - /// Insert default values for keys missed in current row. - for (const auto & [_, subcolumn] : subcolumns) - if (subcolumn->size() == row) - subcolumn->insertDefault(); - } - - auto column_object = ColumnObject::create(has_nullable_subcolumns); - for (auto && [key, subcolumn] : subcolumns) - { - PathInData path(key.toView()); - column_object->addSubcolumn(path, std::move(subcolumn)); - } - - return column_object; - }; - } - - WrapperType createObjectWrapper(const DataTypePtr & from_type, const DataTypeObject * to_type) const - { - if (const auto * from_tuple = checkAndGetDataType(from_type.get())) - { - return createTupleToObjectWrapper(*from_tuple, to_type->hasNullableSubcolumns()); - } - else if (const auto * from_map = checkAndGetDataType(from_type.get())) - { - return createMapToObjectWrapper(*from_map, to_type->hasNullableSubcolumns()); - } - else if (checkAndGetDataType(from_type.get())) - { - return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count) - { - auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable(); - res->finalize(); - return res; - }; - } - else if (checkAndGetDataType(from_type.get())) - { - return [is_nullable = to_type->hasNullableSubcolumns()] (ColumnsWithTypeAndName & arguments, const DataTypePtr & , const ColumnNullable * , size_t) -> ColumnPtr - { - auto & column_object = assert_cast(*arguments.front().column); - auto res = ColumnObject::create(is_nullable); - for (size_t i = 0; i < column_object.size(); i++) - res->insert(column_object[i]); - - res->finalize(); - return res; - }; - } - - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Cast to Object can be performed only from flatten named Tuple, Map or String. Got: {}", from_type->getName()); - } - - WrapperType createVariantToVariantWrapper(const DataTypeVariant & from_variant, const DataTypeVariant & to_variant) const - { - /// We support only extension of variant type, so, only new types can be added. - /// For example: Variant(T1, T2) -> Variant(T1, T2, T3) is supported, but Variant(T1, T2) -> Variant(T1, T3) is not supported. - /// We want to extend Variant type for free without rewriting the data, but we sort data types inside Variant during type creation - /// (we do it because we want Variant(T1, T2) to be the same as Variant(T2, T1)), but after extension the order of variant types - /// (and so their discriminators) can be different. For example: Variant(T1, T3) -> Variant(T1, T2, T3). - /// To avoid full rewrite of discriminators column, ColumnVariant supports it's local order of variant columns (and so local - /// discriminators) and stores mapping global order -> local order. - /// So, to extend Variant with new types for free, we should keep old local order for old variants, append new variants and change - /// mapping global order -> local order according to the new global order. - - /// Create map (new variant type) -> (it's global discriminator in new order). - const auto & new_variants = to_variant.getVariants(); - std::unordered_map new_variant_types_to_new_global_discriminator; - new_variant_types_to_new_global_discriminator.reserve(new_variants.size()); - for (size_t i = 0; i != new_variants.size(); ++i) - new_variant_types_to_new_global_discriminator[new_variants[i]->getName()] = i; - - /// Create set of old variant types. - const auto & old_variants = from_variant.getVariants(); - std::unordered_map old_variant_types_to_old_global_discriminator; - old_variant_types_to_old_global_discriminator.reserve(old_variants.size()); - for (size_t i = 0; i != old_variants.size(); ++i) - old_variant_types_to_old_global_discriminator[old_variants[i]->getName()] = i; - - /// Check that the set of old variants types is a subset of new variant types and collect new global discriminator for each old global discriminator. - std::unordered_map old_global_discriminator_to_new; - old_global_discriminator_to_new.reserve(old_variants.size()); - for (const auto & [old_variant_type, old_discriminator] : old_variant_types_to_old_global_discriminator) - { - auto it = new_variant_types_to_new_global_discriminator.find(old_variant_type); - if (it == new_variant_types_to_new_global_discriminator.end()) - throw Exception( - ErrorCodes::CANNOT_CONVERT_TYPE, - "Cannot convert type {} to {}. Conversion between Variant types is allowed only when new Variant type is an extension " - "of an initial one", from_variant.getName(), to_variant.getName()); - old_global_discriminator_to_new[old_discriminator] = it->second; - } - - /// Collect variant types and their global discriminators that should be added to the old Variant to get the new Variant. - std::vector> variant_types_and_discriminators_to_add; - variant_types_and_discriminators_to_add.reserve(new_variants.size() - old_variants.size()); - for (size_t i = 0; i != new_variants.size(); ++i) - { - if (!old_variant_types_to_old_global_discriminator.contains(new_variants[i]->getName())) - variant_types_and_discriminators_to_add.emplace_back(new_variants[i], i); - } - - return [old_global_discriminator_to_new, variant_types_and_discriminators_to_add] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr - { - const auto & column_variant = assert_cast(*arguments.front().column.get()); - size_t num_old_variants = column_variant.getNumVariants(); - Columns new_variant_columns; - new_variant_columns.reserve(num_old_variants + variant_types_and_discriminators_to_add.size()); - std::vector new_local_to_global_discriminators; - new_local_to_global_discriminators.reserve(num_old_variants + variant_types_and_discriminators_to_add.size()); - for (size_t i = 0; i != num_old_variants; ++i) - { - new_variant_columns.push_back(column_variant.getVariantPtrByLocalDiscriminator(i)); - new_local_to_global_discriminators.push_back(old_global_discriminator_to_new.at(column_variant.globalDiscriminatorByLocal(i))); - } - - for (const auto & [new_variant_type, new_global_discriminator] : variant_types_and_discriminators_to_add) - { - new_variant_columns.push_back(new_variant_type->createColumn()); - new_local_to_global_discriminators.push_back(new_global_discriminator); - } - - return ColumnVariant::create(column_variant.getLocalDiscriminatorsPtr(), column_variant.getOffsetsPtr(), new_variant_columns, new_local_to_global_discriminators); - }; - } - - WrapperType createVariantToColumnWrapper(const DataTypeVariant & from_variant, const DataTypePtr & to_type) const - { - const auto & variant_types = from_variant.getVariants(); - std::vector variant_wrappers; - variant_wrappers.reserve(variant_types.size()); - - /// Create conversion wrapper for each variant. - for (const auto & variant_type : variant_types) - variant_wrappers.push_back(prepareUnpackDictionaries(variant_type, to_type)); - - return [variant_wrappers, variant_types, to_type] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr - { - const auto & column_variant = assert_cast(*arguments.front().column.get()); - - /// First, cast each variant to the result type. - std::vector casted_variant_columns; - casted_variant_columns.reserve(variant_types.size()); - for (size_t i = 0; i != variant_types.size(); ++i) - { - auto variant_col = column_variant.getVariantPtrByLocalDiscriminator(i); - ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], "" }}; - const auto & variant_wrapper = variant_wrappers[column_variant.globalDiscriminatorByLocal(i)]; - casted_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_col->size())); - } - - /// Second, construct resulting column from casted variant columns according to discriminators. - const auto & local_discriminators = column_variant.getLocalDiscriminators(); - auto res = result_type->createColumn(); - res->reserve(input_rows_count); - for (size_t i = 0; i != input_rows_count; ++i) - { - auto local_discr = local_discriminators[i]; - if (local_discr == ColumnVariant::NULL_DISCRIMINATOR) - res->insertDefault(); - else - res->insertFrom(*casted_variant_columns[local_discr], column_variant.offsetAt(i)); - } - - return res; - }; - } - - static ColumnPtr createVariantFromDescriptorsAndOneNonEmptyVariant(const DataTypes & variant_types, const ColumnPtr & discriminators, const ColumnPtr & variant, ColumnVariant::Discriminator variant_discr) - { - Columns variants; - variants.reserve(variant_types.size()); - for (size_t i = 0; i != variant_types.size(); ++i) - { - if (i == variant_discr) - variants.emplace_back(variant); - else - variants.push_back(variant_types[i]->createColumn()); - } - - return ColumnVariant::create(discriminators, variants); - } - - WrapperType createColumnToVariantWrapper(const DataTypePtr & from_type, const DataTypeVariant & to_variant) const - { - /// We allow converting NULL to Variant(...) as Variant can store NULLs. - if (from_type->onlyNull()) - { - return [](ColumnsWithTypeAndName &, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr - { - auto result_column = result_type->createColumn(); - result_column->insertManyDefaults(input_rows_count); - return result_column; - }; - } - - auto variant_discr_opt = to_variant.tryGetVariantDiscriminator(*removeNullableOrLowCardinalityNullable(from_type)); - if (!variant_discr_opt) - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert type {} to {}. Conversion to Variant allowed only for types from this Variant", from_type->getName(), to_variant.getName()); - - return [variant_discr = *variant_discr_opt] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t) -> ColumnPtr - { - const auto & result_variant_type = assert_cast(*result_type); - const auto & variant_types = result_variant_type.getVariants(); - if (const ColumnNullable * col_nullable = typeid_cast(arguments.front().column.get())) - { - const auto & column = col_nullable->getNestedColumnPtr(); - const auto & null_map = col_nullable->getNullMapData(); - IColumn::Filter filter; - filter.reserve(column->size()); - auto discriminators = ColumnVariant::ColumnDiscriminators::create(); - auto & discriminators_data = discriminators->getData(); - discriminators_data.reserve(column->size()); - size_t variant_size_hint = 0; - for (size_t i = 0; i != column->size(); ++i) - { - if (null_map[i]) - { - discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR); - filter.push_back(0); - } - else - { - discriminators_data.push_back(variant_discr); - filter.push_back(1); - ++variant_size_hint; - } - } - - ColumnPtr variant_column; - /// If there were no NULLs, just use the column. - if (variant_size_hint == column->size()) - variant_column = column; - /// Otherwise we should use filtered column. - else - variant_column = column->filter(filter, variant_size_hint); - return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), variant_column, variant_discr); - } - else if (isColumnLowCardinalityNullable(*arguments.front().column)) - { - const auto & column = arguments.front().column; - - /// Variant column cannot have LowCardinality(Nullable(...)) variant, as Variant column stores NULLs itself. - /// We should create a null-map, insert NULL_DISCRIMINATOR on NULL values and filter initial column. - const auto & col_lc = assert_cast(*column); - const auto & indexes = col_lc.getIndexes(); - auto null_index = col_lc.getDictionary().getNullValueIndex(); - IColumn::Filter filter; - filter.reserve(col_lc.size()); - auto discriminators = ColumnVariant::ColumnDiscriminators::create(); - auto & discriminators_data = discriminators->getData(); - discriminators_data.reserve(col_lc.size()); - size_t variant_size_hint = 0; - for (size_t i = 0; i != col_lc.size(); ++i) - { - if (indexes.getUInt(i) == null_index) - { - discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR); - filter.push_back(0); - } - else - { - discriminators_data.push_back(variant_discr); - filter.push_back(1); - ++variant_size_hint; - } - } - - MutableColumnPtr variant_column; - /// If there were no NULLs, we can just clone the column. - if (variant_size_hint == col_lc.size()) - variant_column = IColumn::mutate(column); - /// Otherwise we should filter column. - else - variant_column = column->filter(filter, variant_size_hint)->assumeMutable(); - - assert_cast(*variant_column).nestedRemoveNullable(); - return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), std::move(variant_column), variant_discr); - } - else - { - const auto & column = arguments.front().column; - auto discriminators = ColumnVariant::ColumnDiscriminators::create(); - discriminators->getData().resize_fill(column->size(), variant_discr); - return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), column, variant_discr); - } - }; - } - - /// Wrapper for conversion to/from Variant type - WrapperType createVariantWrapper(const DataTypePtr & from_type, const DataTypePtr & to_type) const - { - if (const auto * from_variant = checkAndGetDataType(from_type.get())) - { - if (const auto * to_variant = checkAndGetDataType(to_type.get())) - return createVariantToVariantWrapper(*from_variant, *to_variant); - - return createVariantToColumnWrapper(*from_variant, to_type); - } - - return createColumnToVariantWrapper(from_type, assert_cast(*to_type)); - } - - template - WrapperType createEnumWrapper(const DataTypePtr & from_type, const DataTypeEnum * to_type) const - { - using EnumType = DataTypeEnum; - using Function = typename FunctionTo::Type; - - if (const auto * from_enum8 = checkAndGetDataType(from_type.get())) - checkEnumToEnumConversion(from_enum8, to_type); - else if (const auto * from_enum16 = checkAndGetDataType(from_type.get())) - checkEnumToEnumConversion(from_enum16, to_type); - - if (checkAndGetDataType(from_type.get())) - return createStringToEnumWrapper(); - else if (checkAndGetDataType(from_type.get())) - return createStringToEnumWrapper(); - else if (isNativeNumber(from_type) || isEnum(from_type)) - { - auto function = Function::create(); - return createFunctionAdaptor(function, from_type); - } - else - { - if (cast_type == CastType::accurateOrNull) - return createToNullableColumnWrapper(); - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", - from_type->getName(), to_type->getName()); - } - } - - template - void checkEnumToEnumConversion(const EnumTypeFrom * from_type, const EnumTypeTo * to_type) const - { - const auto & from_values = from_type->getValues(); - const auto & to_values = to_type->getValues(); - - using ValueType = std::common_type_t; - using NameValuePair = std::pair; - using EnumValues = std::vector; - - EnumValues name_intersection; - std::set_intersection(std::begin(from_values), std::end(from_values), - std::begin(to_values), std::end(to_values), std::back_inserter(name_intersection), - [] (auto && from, auto && to) { return from.first < to.first; }); - - for (const auto & name_value : name_intersection) - { - const auto & old_value = name_value.second; - const auto & new_value = to_type->getValue(name_value.first); - if (old_value != new_value) - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Enum conversion changes value for element '{}' from {} to {}", - name_value.first, toString(old_value), toString(new_value)); - } - } - - template - WrapperType createStringToEnumWrapper() const - { - const char * function_name = cast_name; - return [function_name] ( - ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, const ColumnNullable * nullable_col, size_t /*input_rows_count*/) - { - const auto & first_col = arguments.front().column.get(); - const auto & result_type = typeid_cast(*res_type); - - const ColumnStringType * col = typeid_cast(first_col); - - if (col && nullable_col && nullable_col->size() != col->size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnNullable is not compatible with original"); - - if (col) - { - const auto size = col->size(); - - auto res = result_type.createColumn(); - auto & out_data = static_cast(*res).getData(); - out_data.resize(size); - - auto default_enum_value = result_type.getValues().front().second; - - if (nullable_col) - { - for (size_t i = 0; i < size; ++i) - { - if (!nullable_col->isNullAt(i)) - out_data[i] = result_type.getValue(col->getDataAt(i)); - else - out_data[i] = default_enum_value; - } - } - else - { - for (size_t i = 0; i < size; ++i) - out_data[i] = result_type.getValue(col->getDataAt(i)); - } - - return res; - } - else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected column {} as first argument of function {}", - first_col->getName(), function_name); - }; - } - - template - WrapperType createEnumToStringWrapper() const - { - const char * function_name = cast_name; - return [function_name] ( - ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, const ColumnNullable * nullable_col, size_t /*input_rows_count*/) - { - using ColumnEnumType = EnumType::ColumnType; - - const auto & first_col = arguments.front().column.get(); - const auto & first_type = arguments.front().type.get(); - - const ColumnEnumType * enum_col = typeid_cast(first_col); - const EnumType * enum_type = typeid_cast(first_type); - - if (enum_col && nullable_col && nullable_col->size() != enum_col->size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnNullable is not compatible with original"); - - if (enum_col && enum_type) - { - const auto size = enum_col->size(); - const auto & enum_data = enum_col->getData(); - - auto res = res_type->createColumn(); - - if (nullable_col) - { - for (size_t i = 0; i < size; ++i) - { - if (!nullable_col->isNullAt(i)) - { - const auto & value = enum_type->getNameForValue(enum_data[i]); - res->insertData(value.data, value.size); - } - else - res->insertDefault(); - } - } - else - { - for (size_t i = 0; i < size; ++i) - { - const auto & value = enum_type->getNameForValue(enum_data[i]); - res->insertData(value.data, value.size); - } - } - - return res; - } - else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected column {} as first argument of function {}", - first_col->getName(), function_name); - }; - } - - static WrapperType createIdentityWrapper(const DataTypePtr &) - { - return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) - { - return arguments.front().column; - }; - } - - static WrapperType createNothingWrapper(const IDataType * to_type) - { - ColumnPtr res = to_type->createColumnConstWithDefaultValue(1); - return [res] (ColumnsWithTypeAndName &, const DataTypePtr &, const ColumnNullable *, size_t input_rows_count) - { - /// Column of Nothing type is trivially convertible to any other column - return res->cloneResized(input_rows_count)->convertToFullColumnIfConst(); - }; - } - - WrapperType prepareUnpackDictionaries(const DataTypePtr & from_type, const DataTypePtr & to_type) const - { - /// Conversion from/to Variant data type is processed in a special way. - /// We don't need to remove LowCardinality/Nullable. - if (isVariant(to_type) || isVariant(from_type)) - return createVariantWrapper(from_type, to_type); - - const auto * from_low_cardinality = typeid_cast(from_type.get()); - const auto * to_low_cardinality = typeid_cast(to_type.get()); - const auto & from_nested = from_low_cardinality ? from_low_cardinality->getDictionaryType() : from_type; - const auto & to_nested = to_low_cardinality ? to_low_cardinality->getDictionaryType() : to_type; - - if (from_type->onlyNull()) - { - if (!to_nested->isNullable() && !isVariant(to_type)) - { - if (cast_type == CastType::accurateOrNull) - { - return createToNullableColumnWrapper(); - } - else - { - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert NULL to a non-nullable type"); - } - } - - return [](ColumnsWithTypeAndName &, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) - { - return result_type->createColumnConstWithDefaultValue(input_rows_count)->convertToFullColumnIfConst(); - }; - } - - bool skip_not_null_check = false; - - if (from_low_cardinality && from_nested->isNullable() && !to_nested->isNullable()) - /// Disable check for dictionary. Will check that column doesn't contain NULL in wrapper below. - skip_not_null_check = true; - - auto wrapper = prepareRemoveNullable(from_nested, to_nested, skip_not_null_check); - if (!from_low_cardinality && !to_low_cardinality) - return wrapper; - - return [wrapper, from_low_cardinality, to_low_cardinality, skip_not_null_check] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count) -> ColumnPtr - { - ColumnsWithTypeAndName args = {arguments[0]}; - auto & arg = args.front(); - auto res_type = result_type; - - ColumnPtr converted_column; - - ColumnPtr res_indexes; - /// For some types default can't be casted (for example, String to Int). In that case convert column to full. - bool src_converted_to_full_column = false; - - { - auto tmp_rows_count = input_rows_count; - - if (to_low_cardinality) - res_type = to_low_cardinality->getDictionaryType(); - - if (from_low_cardinality) - { - const auto * col_low_cardinality = assert_cast(arguments[0].column.get()); - - if (skip_not_null_check && col_low_cardinality->containsNull()) - throw Exception(ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN, "Cannot convert NULL value to non-Nullable type"); - - arg.column = col_low_cardinality->getDictionary().getNestedColumn(); - arg.type = from_low_cardinality->getDictionaryType(); - - /// TODO: Make map with defaults conversion. - src_converted_to_full_column = !removeNullable(arg.type)->equals(*removeNullable(res_type)); - if (src_converted_to_full_column) - arg.column = arg.column->index(col_low_cardinality->getIndexes(), 0); - else - res_indexes = col_low_cardinality->getIndexesPtr(); - - tmp_rows_count = arg.column->size(); - } - - /// Perform the requested conversion. - converted_column = wrapper(args, res_type, nullable_source, tmp_rows_count); - } - - if (to_low_cardinality) - { - auto res_column = to_low_cardinality->createColumn(); - auto * col_low_cardinality = assert_cast(res_column.get()); - - if (from_low_cardinality && !src_converted_to_full_column) - { - col_low_cardinality->insertRangeFromDictionaryEncodedColumn(*converted_column, *res_indexes); - } - else - col_low_cardinality->insertRangeFromFullColumn(*converted_column, 0, converted_column->size()); - - return res_column; - } - else if (!src_converted_to_full_column) - return converted_column->index(*res_indexes, 0); - else - return converted_column; - }; - } - - WrapperType prepareRemoveNullable(const DataTypePtr & from_type, const DataTypePtr & to_type, bool skip_not_null_check) const - { - /// Determine whether pre-processing and/or post-processing must take place during conversion. - - bool source_is_nullable = from_type->isNullable(); - bool result_is_nullable = to_type->isNullable(); - - auto wrapper = prepareImpl(removeNullable(from_type), removeNullable(to_type), result_is_nullable); - - if (result_is_nullable) - { - return [wrapper, source_is_nullable] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr - { - /// Create a temporary columns on which to perform the operation. - const auto & nullable_type = static_cast(*result_type); - const auto & nested_type = nullable_type.getNestedType(); - - ColumnsWithTypeAndName tmp_args; - if (source_is_nullable) - tmp_args = createBlockWithNestedColumns(arguments); - else - tmp_args = arguments; - - const ColumnNullable * nullable_source = nullptr; - - /// Add original ColumnNullable for createStringToEnumWrapper() - if (source_is_nullable) - { - if (arguments.size() != 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid number of arguments"); - nullable_source = typeid_cast(arguments.front().column.get()); - } - - /// Perform the requested conversion. - auto tmp_res = wrapper(tmp_args, nested_type, nullable_source, input_rows_count); - - /// May happen in fuzzy tests. For debug purpose. - if (!tmp_res) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Couldn't convert {} to {} in prepareRemoveNullable wrapper.", - arguments[0].type->getName(), nested_type->getName()); - - return wrapInNullable(tmp_res, arguments, nested_type, input_rows_count); - }; - } - else if (source_is_nullable) - { - /// Conversion from Nullable to non-Nullable. - - return [wrapper, skip_not_null_check] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr - { - auto tmp_args = createBlockWithNestedColumns(arguments); - auto nested_type = removeNullable(result_type); - - /// Check that all values are not-NULL. - /// Check can be skipped in case if LowCardinality dictionary is transformed. - /// In that case, correctness will be checked beforehand. - if (!skip_not_null_check) - { - const auto & col = arguments[0].column; - const auto & nullable_col = assert_cast(*col); - const auto & null_map = nullable_col.getNullMapData(); - - if (!memoryIsZero(null_map.data(), 0, null_map.size())) - throw Exception(ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN, "Cannot convert NULL value to non-Nullable type"); - } - const ColumnNullable * nullable_source = typeid_cast(arguments.front().column.get()); - return wrapper(tmp_args, nested_type, nullable_source, input_rows_count); - }; - } - else - return wrapper; - } - - /// 'from_type' and 'to_type' are nested types in case of Nullable. - /// 'requested_result_is_nullable' is true if CAST to Nullable type is requested. - WrapperType prepareImpl(const DataTypePtr & from_type, const DataTypePtr & to_type, bool requested_result_is_nullable) const - { - if (isUInt8(from_type) && isBool(to_type)) - return createUInt8ToBoolWrapper(from_type, to_type); - - /// We can cast IPv6 into IPv6, IPv4 into IPv4, but we should not allow to cast FixedString(16) into IPv6 as part of identity cast - bool safe_convert_custom_types = true; - - if (const auto * to_type_custom_name = to_type->getCustomName()) - safe_convert_custom_types = from_type->getCustomName() && from_type->getCustomName()->getName() == to_type_custom_name->getName(); - else if (const auto * from_type_custom_name = from_type->getCustomName()) - safe_convert_custom_types = to_type->getCustomName() && from_type_custom_name->getName() == to_type->getCustomName()->getName(); - - if (from_type->equals(*to_type) && safe_convert_custom_types) - { - /// We can only use identity conversion for DataTypeAggregateFunction when they are strictly equivalent. - if (typeid_cast(from_type.get())) - { - if (DataTypeAggregateFunction::strictEquals(from_type, to_type)) - return createIdentityWrapper(from_type); - } - else - return createIdentityWrapper(from_type); - } - else if (WhichDataType(from_type).isNothing()) - return createNothingWrapper(to_type.get()); - - WrapperType ret; - - auto make_default_wrapper = [&](const auto & types) -> bool - { - using Types = std::decay_t; - using ToDataType = typename Types::LeftType; - - if constexpr ( - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v) - { - ret = createWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); - return true; - } - if constexpr (std::is_same_v) - { - if (isBool(to_type)) - ret = createBoolWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); - else - ret = createWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); - return true; - } - if constexpr ( - std::is_same_v || - std::is_same_v) - { - ret = createEnumWrapper(from_type, checkAndGetDataType(to_type.get())); - return true; - } - if constexpr ( - std::is_same_v> || - std::is_same_v> || - std::is_same_v> || - std::is_same_v> || - std::is_same_v) - { - ret = createDecimalWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); - return true; - } - - return false; - }; - - bool cast_ipv4_ipv6_default_on_conversion_error_value = context && context->getSettingsRef().cast_ipv4_ipv6_default_on_conversion_error; - bool input_format_ipv4_default_on_conversion_error_value = context && context->getSettingsRef().input_format_ipv4_default_on_conversion_error; - bool input_format_ipv6_default_on_conversion_error_value = context && context->getSettingsRef().input_format_ipv6_default_on_conversion_error; - - auto make_custom_serialization_wrapper = [&, cast_ipv4_ipv6_default_on_conversion_error_value, input_format_ipv4_default_on_conversion_error_value, input_format_ipv6_default_on_conversion_error_value](const auto & types) -> bool - { - using Types = std::decay_t; - using ToDataType = typename Types::RightType; - using FromDataType = typename Types::LeftType; - - if constexpr (WhichDataType(FromDataType::type_id).isStringOrFixedString()) - { - if constexpr (std::is_same_v) - { - ret = [cast_ipv4_ipv6_default_on_conversion_error_value, - input_format_ipv4_default_on_conversion_error_value, - requested_result_is_nullable]( - ColumnsWithTypeAndName & arguments, - const DataTypePtr & result_type, - const ColumnNullable * column_nullable, - size_t) -> ColumnPtr - { - if (!WhichDataType(result_type).isIPv4()) - throw Exception(ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv4", result_type->getName()); - - const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; - if (requested_result_is_nullable) - return convertToIPv4(arguments[0].column, null_map); - else if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv4_default_on_conversion_error_value) - return convertToIPv4(arguments[0].column, null_map); - else - return convertToIPv4(arguments[0].column, null_map); - }; - - return true; - } - - if constexpr (std::is_same_v) - { - ret = [cast_ipv4_ipv6_default_on_conversion_error_value, - input_format_ipv6_default_on_conversion_error_value, - requested_result_is_nullable]( - ColumnsWithTypeAndName & arguments, - const DataTypePtr & result_type, - const ColumnNullable * column_nullable, - size_t) -> ColumnPtr - { - if (!WhichDataType(result_type).isIPv6()) - throw Exception( - ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv6", result_type->getName()); - - const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; - if (requested_result_is_nullable) - return convertToIPv6(arguments[0].column, null_map); - else if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv6_default_on_conversion_error_value) - return convertToIPv6(arguments[0].column, null_map); - else - return convertToIPv6(arguments[0].column, null_map); - }; - - return true; - } - - if (to_type->getCustomSerialization() && to_type->getCustomName()) - { - ret = [requested_result_is_nullable]( - ColumnsWithTypeAndName & arguments, - const DataTypePtr & result_type, - const ColumnNullable * column_nullable, - size_t input_rows_count) -> ColumnPtr - { - auto wrapped_result_type = result_type; - if (requested_result_is_nullable) - wrapped_result_type = makeNullable(result_type); - return ConvertImplGenericFromString::execute( - arguments, wrapped_result_type, column_nullable, input_rows_count); - }; - return true; - } - } - else if constexpr (WhichDataType(FromDataType::type_id).isIPv6() && WhichDataType(ToDataType::type_id).isIPv4()) - { - ret = [cast_ipv4_ipv6_default_on_conversion_error_value, requested_result_is_nullable]( - ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t) - -> ColumnPtr - { - if (!WhichDataType(result_type).isIPv4()) - throw Exception( - ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv4", result_type->getName()); - - const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; - if (requested_result_is_nullable) - return convertIPv6ToIPv4(arguments[0].column, null_map); - else if (cast_ipv4_ipv6_default_on_conversion_error_value) - return convertIPv6ToIPv4(arguments[0].column, null_map); - else - return convertIPv6ToIPv4(arguments[0].column, null_map); - }; - - return true; - } - - if constexpr (WhichDataType(ToDataType::type_id).isStringOrFixedString()) - { - if constexpr (WhichDataType(FromDataType::type_id).isEnum()) - { - ret = createEnumToStringWrapper(); - return true; - } - else if (from_type->getCustomSerialization()) - { - ret = [](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr - { - return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); - }; - return true; - } - } - - return false; - }; - - if (callOnTwoTypeIndexes(from_type->getTypeId(), to_type->getTypeId(), make_custom_serialization_wrapper)) - return ret; - - if (callOnIndexAndDataType(to_type->getTypeId(), make_default_wrapper)) - return ret; - - switch (to_type->getTypeId()) - { - case TypeIndex::String: - return createStringWrapper(from_type); - case TypeIndex::FixedString: - return createFixedStringWrapper(from_type, checkAndGetDataType(to_type.get())->getN()); - case TypeIndex::Array: - return createArrayWrapper(from_type, static_cast(*to_type)); - case TypeIndex::Tuple: - return createTupleWrapper(from_type, checkAndGetDataType(to_type.get())); - case TypeIndex::Map: - return createMapWrapper(from_type, checkAndGetDataType(to_type.get())); - case TypeIndex::Object: - return createObjectWrapper(from_type, checkAndGetDataType(to_type.get())); - case TypeIndex::AggregateFunction: - return createAggregateFunctionWrapper(from_type, checkAndGetDataType(to_type.get())); - case TypeIndex::Interval: - return createIntervalWrapper(from_type, checkAndGetDataType(to_type.get())->getKind()); - default: - break; - } - - if (cast_type == CastType::accurateOrNull) - return createToNullableColumnWrapper(); - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", - from_type->getName(), to_type->getName()); - } -}; - -class MonotonicityHelper -{ -public: - using MonotonicityForRange = FunctionCastBase::MonotonicityForRange; - - template - static auto monotonicityForType(const DataType * const) - { - return FunctionTo::Type::Monotonic::get; - } - - static MonotonicityForRange getMonotonicityInformation(const DataTypePtr & from_type, const IDataType * to_type) - { - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (isEnum(from_type)) - { - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - } - /// other types like Null, FixedString, Array and Tuple have no monotonicity defined - return {}; - } -}; - -} diff --git a/src/Functions/FunctionsRound.cpp b/src/Functions/FunctionsRound.cpp index 02fe1d659de..059476acb40 100644 --- a/src/Functions/FunctionsRound.cpp +++ b/src/Functions/FunctionsRound.cpp @@ -7,11 +7,11 @@ namespace DB REGISTER_FUNCTION(Round) { - factory.registerFunction("round", {}, FunctionFactory::CaseInsensitive); - factory.registerFunction("roundBankers", {}, FunctionFactory::CaseSensitive); - factory.registerFunction("floor", {}, FunctionFactory::CaseInsensitive); - factory.registerFunction("ceil", {}, FunctionFactory::CaseInsensitive); - factory.registerFunction("trunc", {}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::CaseSensitive); + factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::CaseInsensitive); factory.registerFunction(); /// Compatibility aliases. diff --git a/src/Functions/FunctionsStringSearch.h b/src/Functions/FunctionsStringSearch.h index 41b476ccc56..53d99198134 100644 --- a/src/Functions/FunctionsStringSearch.h +++ b/src/Functions/FunctionsStringSearch.h @@ -22,13 +22,13 @@ namespace DB * positionCaseInsensitive(haystack, needle) * positionCaseInsensitiveUTF8(haystack, needle) * - * like(haystack, pattern) - search by the regular expression LIKE; Returns 0 or 1. Case-insensitive, but only for Latin. - * notLike(haystack, pattern) + * like(haystack, needle) - search by the regular expression LIKE; Returns 0 or 1. Case-insensitive, but only for Latin. + * notLike(haystack, needle) * - * ilike(haystack, pattern) - like 'like' but case-insensitive - * notIlike(haystack, pattern) + * ilike(haystack, needle) - like 'like' but case-insensitive + * notIlike(haystack, needle) * - * match(haystack, pattern) - search by regular expression re2; Returns 0 or 1. + * match(haystack, needle) - search by regular expression re2; Returns 0 or 1. * * countSubstrings(haystack, needle) -- count number of occurrences of needle in haystack. * countSubstringsCaseInsensitive(haystack, needle) @@ -53,7 +53,7 @@ namespace DB * - the first subpattern, if the regexp has a subpattern; * - the zero subpattern (the match part, otherwise); * - if not match - an empty string. - * extract(haystack, pattern) + * extract(haystack, needle) */ namespace ErrorCodes @@ -69,13 +69,39 @@ enum class ExecutionErrorPolicy Throw }; -template +enum class HaystackNeedleOrderIsConfigurable +{ + No, /// function arguments are always: (haystack, needle[, position]) + Yes /// depending on a setting, the function arguments are (haystack, needle[, position]) or (needle, haystack[, position]) +}; + +template class FunctionsStringSearch : public IFunction { +private: + enum class ArgumentOrder + { + HaystackNeedle, + NeedleHaystack + }; + + ArgumentOrder argument_order = ArgumentOrder::HaystackNeedle; + public: static constexpr auto name = Impl::name; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + + explicit FunctionsStringSearch([[maybe_unused]] ContextPtr context) + { + if constexpr (haystack_needle_order_is_configurable == HaystackNeedleOrderIsConfigurable::Yes) + { + if (context->getSettingsRef().function_locate_has_mysql_compatible_argument_order) + argument_order = ArgumentOrder::NeedleHaystack; + } + } String getName() const override { return name; } @@ -105,13 +131,16 @@ public: "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3", getName(), arguments.size()); - if (!isStringOrFixedString(arguments[0])) + const auto & haystack_type = (argument_order == ArgumentOrder::HaystackNeedle) ? arguments[0] : arguments[1]; + const auto & needle_type = (argument_order == ArgumentOrder::HaystackNeedle) ? arguments[1] : arguments[0]; + + if (!isStringOrFixedString(haystack_type)) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName()); - if (!isString(arguments[1])) + if (!isString(needle_type)) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", @@ -135,8 +164,8 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override { - const ColumnPtr & column_haystack = arguments[0].column; - const ColumnPtr & column_needle = arguments[1].column; + const ColumnPtr & column_haystack = (argument_order == ArgumentOrder::HaystackNeedle) ? arguments[0].column : arguments[1].column; + const ColumnPtr & column_needle = (argument_order == ArgumentOrder::HaystackNeedle) ? arguments[1].column : arguments[0].column; ColumnPtr column_start_pos = nullptr; if (arguments.size() >= 3) diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp index 3ec5393fa6f..b7c7e5847bd 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp @@ -128,7 +128,7 @@ restoreUserDefinedSQLObjects(RestorerFromBackup & restorer, const String & data_ statement_def.data() + statement_def.size(), "in file " + filepath + " from backup " + backup->getNameForLogging(), 0, - context->getSettingsRef().max_parser_depth); + context->getSettingsRef().max_parser_depth, context->getSettingsRef().max_parser_backtracks); break; } } diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp index 34946db7d9e..b083c540083 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp @@ -92,7 +92,8 @@ ASTPtr UserDefinedSQLObjectsDiskStorage::tryLoadObject(UserDefinedSQLObjectType object_create_query.data() + object_create_query.size(), "", 0, - global_context->getSettingsRef().max_parser_depth); + global_context->getSettingsRef().max_parser_depth, + global_context->getSettingsRef().max_parser_backtracks); return ast; } } diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp index c43b223ffeb..4ec34c15efc 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp @@ -314,7 +314,8 @@ ASTPtr UserDefinedSQLObjectsZooKeeperStorage::parseObjectData(const String & obj object_data.data() + object_data.size(), "", 0, - global_context->getSettingsRef().max_parser_depth); + global_context->getSettingsRef().max_parser_depth, + global_context->getSettingsRef().max_parser_backtracks); return ast; } } diff --git a/src/Functions/array/FunctionsMapMiscellaneous.cpp b/src/Functions/array/FunctionsMapMiscellaneous.cpp index 157f2fa8a26..d92bfcf0bc6 100644 --- a/src/Functions/array/FunctionsMapMiscellaneous.cpp +++ b/src/Functions/array/FunctionsMapMiscellaneous.cpp @@ -213,6 +213,7 @@ struct MapToSubcolumnAdapter : public MapAdapterBase( - std::make_shared(element_type)); }); + [element_type](ContextPtr){ return std::make_shared(element_type); }); } } diff --git a/src/Functions/caseWithExpression.cpp b/src/Functions/caseWithExpression.cpp index 9547cd200b2..71fccc8436e 100644 --- a/src/Functions/caseWithExpression.cpp +++ b/src/Functions/caseWithExpression.cpp @@ -113,9 +113,7 @@ REGISTER_FUNCTION(CaseWithExpression) factory.registerFunction(); /// These are obsolete function names. - factory.registerFunction("caseWithExpr"); + factory.registerAlias("caseWithExpr", "caseWithExpression"); } } - - diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index 970e6fd6f75..44b39811882 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -173,25 +173,22 @@ private: bool keep_nullable; }; -template class FunctionCastOrDefaultTyped final : public IFunction { public: - static constexpr auto name = Name::name; - - static FunctionPtr create(ContextPtr context) - { - return std::make_shared(context); - } - - explicit FunctionCastOrDefaultTyped(ContextPtr context_) - : impl(context_) + explicit FunctionCastOrDefaultTyped(ContextPtr context_, String name_, DataTypePtr type_) + : impl(context_), name(std::move(name_)), type(std::move(type_)), which(type) { } String getName() const override { return name; } private: + FunctionCastOrDefault impl; + String name; + DataTypePtr type; + WhichDataType which; + size_t getNumberOfArguments() const override { return 0; } bool isVariadic() const override { return true; } @@ -209,10 +206,10 @@ private: FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; FunctionArgumentDescriptors optional_args; - if constexpr (IsDataTypeDecimal) + if (isDecimal(type) || isDateTime64(type)) mandatory_args.push_back({"scale", static_cast(&isNativeInteger), &isColumnConst, "const Integer"}); - if (std::is_same_v || std::is_same_v) + if (isDateTimeOrDateTime64(type)) optional_args.push_back({"timezone", static_cast(&isString), isColumnConst, "const String"}); optional_args.push_back({"default_value", nullptr, nullptr, nullptr}); @@ -224,7 +221,7 @@ private: size_t scale = 0; std::string time_zone; - if constexpr (IsDataTypeDecimal) + if (isDecimal(type) || isDateTime64(type)) { const auto & scale_argument = arguments[additional_argument_index]; @@ -241,7 +238,7 @@ private: ++additional_argument_index; } - if constexpr (std::is_same_v || std::is_same_v) + if (isDateTimeOrDateTime64(type)) { if (additional_argument_index < arguments.size()) { @@ -251,16 +248,22 @@ private: } } - std::shared_ptr cast_type; + DataTypePtr cast_type; - if constexpr (std::is_same_v) - cast_type = std::make_shared(scale, time_zone); - else if constexpr (IsDataTypeDecimal) - cast_type = std::make_shared(Type::maxPrecision(), scale); - else if constexpr (std::is_same_v || std::is_same_v) - cast_type = std::make_shared(time_zone); + if (which.isDateTime64()) + cast_type = std::make_shared(scale, time_zone); + else if (which.isDateTime()) + cast_type = std::make_shared(time_zone); + else if (which.isDecimal32()) + cast_type = createDecimalMaxPrecision(scale); + else if (which.isDecimal64()) + cast_type = createDecimalMaxPrecision(scale); + else if (which.isDecimal128()) + cast_type = createDecimalMaxPrecision(scale); + else if (which.isDecimal256()) + cast_type = createDecimalMaxPrecision(scale); else - cast_type = std::make_shared(); + cast_type = type; ColumnWithTypeAndName type_argument = { @@ -289,7 +292,8 @@ private: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_size) const override { - size_t additional_arguments_size = IsDataTypeDecimal + (std::is_same_v || std::is_same_v); + /// Scale and time zone + size_t additional_arguments_size = (which.isDecimal() || which.isDateTime64()) + which.isDateTimeOrDateTime64(); ColumnWithTypeAndName second_argument = { @@ -299,7 +303,7 @@ private: }; ColumnsWithTypeAndName arguments_with_cast_type; - arguments_with_cast_type.reserve(arguments.size()); + arguments_with_cast_type.reserve(arguments.size() + 1); arguments_with_cast_type.emplace_back(arguments[0]); arguments_with_cast_type.emplace_back(second_argument); @@ -310,98 +314,79 @@ private: return impl.executeImpl(arguments_with_cast_type, result_type, input_rows_size); } - - FunctionCastOrDefault impl; }; -struct NameToUInt8OrDefault { static constexpr auto name = "toUInt8OrDefault"; }; -struct NameToUInt16OrDefault { static constexpr auto name = "toUInt16OrDefault"; }; -struct NameToUInt32OrDefault { static constexpr auto name = "toUInt32OrDefault"; }; -struct NameToUInt64OrDefault { static constexpr auto name = "toUInt64OrDefault"; }; -struct NameToUInt256OrDefault { static constexpr auto name = "toUInt256OrDefault"; }; -struct NameToInt8OrDefault { static constexpr auto name = "toInt8OrDefault"; }; -struct NameToInt16OrDefault { static constexpr auto name = "toInt16OrDefault"; }; -struct NameToInt32OrDefault { static constexpr auto name = "toInt32OrDefault"; }; -struct NameToInt64OrDefault { static constexpr auto name = "toInt64OrDefault"; }; -struct NameToInt128OrDefault { static constexpr auto name = "toInt128OrDefault"; }; -struct NameToInt256OrDefault { static constexpr auto name = "toInt256OrDefault"; }; -struct NameToFloat32OrDefault { static constexpr auto name = "toFloat32OrDefault"; }; -struct NameToFloat64OrDefault { static constexpr auto name = "toFloat64OrDefault"; }; -struct NameToDateOrDefault { static constexpr auto name = "toDateOrDefault"; }; -struct NameToDate32OrDefault { static constexpr auto name = "toDate32OrDefault"; }; -struct NameToDateTimeOrDefault { static constexpr auto name = "toDateTimeOrDefault"; }; -struct NameToDateTime64OrDefault { static constexpr auto name = "toDateTime64OrDefault"; }; -struct NameToDecimal32OrDefault { static constexpr auto name = "toDecimal32OrDefault"; }; -struct NameToDecimal64OrDefault { static constexpr auto name = "toDecimal64OrDefault"; }; -struct NameToDecimal128OrDefault { static constexpr auto name = "toDecimal128OrDefault"; }; -struct NameToDecimal256OrDefault { static constexpr auto name = "toDecimal256OrDefault"; }; -struct NameToUUIDOrDefault { static constexpr auto name = "toUUIDOrDefault"; }; -struct NameToIPv4OrDefault { static constexpr auto name = "toIPv4OrDefault"; }; -struct NameToIPv6OrDefault { static constexpr auto name = "toIPv6OrDefault"; }; - -using FunctionToUInt8OrDefault = FunctionCastOrDefaultTyped; -using FunctionToUInt16OrDefault = FunctionCastOrDefaultTyped; -using FunctionToUInt32OrDefault = FunctionCastOrDefaultTyped; -using FunctionToUInt64OrDefault = FunctionCastOrDefaultTyped; -using FunctionToUInt256OrDefault = FunctionCastOrDefaultTyped; - -using FunctionToInt8OrDefault = FunctionCastOrDefaultTyped; -using FunctionToInt16OrDefault = FunctionCastOrDefaultTyped; -using FunctionToInt32OrDefault = FunctionCastOrDefaultTyped; -using FunctionToInt64OrDefault = FunctionCastOrDefaultTyped; -using FunctionToInt128OrDefault = FunctionCastOrDefaultTyped; -using FunctionToInt256OrDefault = FunctionCastOrDefaultTyped; - -using FunctionToFloat32OrDefault = FunctionCastOrDefaultTyped; -using FunctionToFloat64OrDefault = FunctionCastOrDefaultTyped; - -using FunctionToDateOrDefault = FunctionCastOrDefaultTyped; -using FunctionToDate32OrDefault = FunctionCastOrDefaultTyped; -using FunctionToDateTimeOrDefault = FunctionCastOrDefaultTyped; -using FunctionToDateTime64OrDefault = FunctionCastOrDefaultTyped; - -using FunctionToDecimal32OrDefault = FunctionCastOrDefaultTyped, NameToDecimal32OrDefault>; -using FunctionToDecimal64OrDefault = FunctionCastOrDefaultTyped, NameToDecimal64OrDefault>; -using FunctionToDecimal128OrDefault = FunctionCastOrDefaultTyped, NameToDecimal128OrDefault>; -using FunctionToDecimal256OrDefault = FunctionCastOrDefaultTyped, NameToDecimal256OrDefault>; - -using FunctionToUUIDOrDefault = FunctionCastOrDefaultTyped; -using FunctionToIPv4OrDefault = FunctionCastOrDefaultTyped; -using FunctionToIPv6OrDefault = FunctionCastOrDefaultTyped; - REGISTER_FUNCTION(CastOrDefault) { factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction("toUInt8OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toUInt8OrDefault", std::make_shared()); }); + factory.registerFunction("toUInt16OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toUInt16OrDefault", std::make_shared()); }); + factory.registerFunction("toUInt32OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toUInt32OrDefault", std::make_shared()); }); + factory.registerFunction("toUInt64OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toUInt64OrDefault", std::make_shared()); }); + factory.registerFunction("toUInt128OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toUInt128OrDefault", std::make_shared()); }, + FunctionDocumentation{ + .description=R"( +Converts a string in the first argument of the function to UInt128 by parsing it. +If it cannot parse the value, returns the default value, which can be provided as the second function argument, and if provided, must be of UInt128 type. +If the default value is not provided in the second argument, it is assumed to be zero. +)", + .examples{ + {"Successful conversion", "SELECT toUInt128OrDefault('1', 2::UInt128)", "1"}, + {"Default value", "SELECT toUInt128OrDefault('upyachka', 123456789012345678901234567890::UInt128)", "123456789012345678901234567890"}, + {"Implicit default value", "SELECT toUInt128OrDefault('upyachka')", "0"}}, + .categories{"ConversionFunctions"} + }); + factory.registerFunction("toUInt256OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toUInt256OrDefault", std::make_shared()); }); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction("toInt8OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toInt8OrDefault", std::make_shared()); }); + factory.registerFunction("toInt16OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toInt16OrDefault", std::make_shared()); }); + factory.registerFunction("toInt32OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toInt32OrDefault", std::make_shared()); }); + factory.registerFunction("toInt64OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toInt64OrDefault", std::make_shared()); }); + factory.registerFunction("toInt128OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toInt128OrDefault", std::make_shared()); }); + factory.registerFunction("toInt256OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toInt256OrDefault", std::make_shared()); }); - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction("toFloat32OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toFloat32OrDefault", std::make_shared()); }); + factory.registerFunction("toFloat64OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toFloat64OrDefault", std::make_shared()); }); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction("toDateOrDefault", [](ContextPtr context) + { return std::make_shared(context, "toDateOrDefault", std::make_shared()); }); + factory.registerFunction("toDate32OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toDate32OrDefault", std::make_shared()); }); + factory.registerFunction("toDateTimeOrDefault", [](ContextPtr context) + { return std::make_shared(context, "toDateTimeOrDefault", std::make_shared()); }); + factory.registerFunction("toDateTime64OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toDateTime64OrDefault", std::make_shared(3 /* default scale */)); }); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction("toDecimal32OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toDecimal32OrDefault", createDecimalMaxPrecision(0)); }); + factory.registerFunction("toDecimal64OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toDecimal64OrDefault", createDecimalMaxPrecision(0)); }); + factory.registerFunction("toDecimal128OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toDecimal128OrDefault", createDecimalMaxPrecision(0)); }); + factory.registerFunction("toDecimal256OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toDecimal256OrDefault", createDecimalMaxPrecision(0)); }); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction("toUUIDOrDefault", [](ContextPtr context) + { return std::make_shared(context, "toUUIDOrDefault", std::make_shared()); }); + factory.registerFunction("toIPv4OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toIPv4OrDefault", std::make_shared()); }); + factory.registerFunction("toIPv6OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toIPv6OrDefault", std::make_shared()); }); } } diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp index d68f5256f6d..c75a806559c 100644 --- a/src/Functions/concat.cpp +++ b/src/Functions/concat.cpp @@ -1,8 +1,8 @@ #include +#include #include #include #include -#include #include #include #include diff --git a/src/Functions/coverage.cpp b/src/Functions/coverage.cpp index a1a43d0cf58..97f807e22b7 100644 --- a/src/Functions/coverage.cpp +++ b/src/Functions/coverage.cpp @@ -93,7 +93,7 @@ public: REGISTER_FUNCTION(Coverage) { - factory.registerFunction("coverageCurrent", [](ContextPtr){ return std::make_unique(std::make_shared(Kind::Current)); }, + factory.registerFunction("coverageCurrent", [](ContextPtr){ return std::make_shared(Kind::Current); }, FunctionDocumentation { .description=R"( @@ -124,7 +124,7 @@ See https://clang.llvm.org/docs/SanitizerCoverage.html for more information. .categories{"Introspection"} }); - factory.registerFunction("coverageCumulative", [](ContextPtr){ return std::make_unique(std::make_shared(Kind::Cumulative)); }, + factory.registerFunction("coverageCumulative", [](ContextPtr){ return std::make_shared(Kind::Cumulative); }, FunctionDocumentation { .description=R"( @@ -140,7 +140,7 @@ See the `coverageCurrent` function for the details. .categories{"Introspection"} }); - factory.registerFunction("coverageAll", [](ContextPtr){ return std::make_unique(std::make_shared(Kind::All)); }, + factory.registerFunction("coverageAll", [](ContextPtr){ return std::make_shared(Kind::All); }, FunctionDocumentation { .description=R"( diff --git a/src/Functions/currentProfiles.cpp b/src/Functions/currentProfiles.cpp index 77c8a20ccee..8f14943e011 100644 --- a/src/Functions/currentProfiles.cpp +++ b/src/Functions/currentProfiles.cpp @@ -98,9 +98,9 @@ namespace REGISTER_FUNCTION(Profiles) { - factory.registerFunction("currentProfiles", [](ContextPtr context){ return std::make_unique(std::make_shared(context, Kind::currentProfiles)); }); - factory.registerFunction("enabledProfiles", [](ContextPtr context){ return std::make_unique(std::make_shared(context, Kind::enabledProfiles)); }); - factory.registerFunction("defaultProfiles", [](ContextPtr context){ return std::make_unique(std::make_shared(context, Kind::defaultProfiles)); }); + factory.registerFunction("currentProfiles", [](ContextPtr context){ return std::make_shared(context, Kind::currentProfiles); }); + factory.registerFunction("enabledProfiles", [](ContextPtr context){ return std::make_shared(context, Kind::enabledProfiles); }); + factory.registerFunction("defaultProfiles", [](ContextPtr context){ return std::make_shared(context, Kind::defaultProfiles); }); } } diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index 01ef2a733c8..87438365901 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -1832,10 +1832,10 @@ using FunctionFromUnixTimestampInJodaSyntax = FunctionFormatDateTimeImpl(); - factory.registerAlias("DATE_FORMAT", FunctionFormatDateTime::name); + factory.registerAlias("DATE_FORMAT", FunctionFormatDateTime::name, FunctionFactory::CaseInsensitive); factory.registerFunction(); - factory.registerAlias("FROM_UNIXTIME", FunctionFromUnixTimestamp::name); + factory.registerAlias("FROM_UNIXTIME", FunctionFromUnixTimestamp::name, FunctionFactory::CaseInsensitive); factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp index 92403d2e88e..d7addcc284e 100644 --- a/src/Functions/formatQuery.cpp +++ b/src/Functions/formatQuery.cpp @@ -17,6 +17,9 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; } +namespace +{ + enum class OutputFormatting { SingleLine, @@ -29,21 +32,16 @@ enum class ErrorHandling Null }; -template class FunctionFormatQuery : public IFunction { public: - static constexpr auto name = Name::name; - static FunctionPtr create(ContextPtr context) - { - const auto & settings = context->getSettings(); - return std::make_shared(settings.max_query_size, settings.max_parser_depth); - } - - FunctionFormatQuery(size_t max_query_size_, size_t max_parser_depth_) - : max_query_size(max_query_size_) - , max_parser_depth(max_parser_depth_) + FunctionFormatQuery(ContextPtr context, String name_, OutputFormatting output_formatting_, ErrorHandling error_handling_) + : name(name_), output_formatting(output_formatting_), error_handling(error_handling_) { + const Settings & settings = context->getSettings(); + max_query_size = settings.max_query_size; + max_parser_depth = settings.max_parser_depth; + max_parser_backtracks = settings.max_parser_backtracks; } String getName() const override { return name; } @@ -59,7 +57,7 @@ public: validateFunctionArgumentTypes(*this, arguments, args); DataTypePtr string_type = std::make_shared(); - if constexpr (error_handling == ErrorHandling::Null) + if (error_handling == ErrorHandling::Null) return std::make_shared(string_type); else return string_type; @@ -70,7 +68,7 @@ public: const ColumnPtr col_query = arguments[0].column; ColumnUInt8::MutablePtr col_null_map; - if constexpr (error_handling == ErrorHandling::Null) + if (error_handling == ErrorHandling::Null) col_null_map = ColumnUInt8::create(input_rows_count, 0); if (const ColumnString * col_query_string = checkAndGetColumn(col_query.get())) @@ -78,7 +76,7 @@ public: auto col_res = ColumnString::create(); formatVector(col_query_string->getChars(), col_query_string->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_null_map); - if constexpr (error_handling == ErrorHandling::Null) + if (error_handling == ErrorHandling::Null) return ColumnNullable::create(std::move(col_res), std::move(col_null_map)); else return col_res; @@ -113,11 +111,11 @@ private: try { - ast = parseQuery(parser, begin, end, /*query_description*/ {}, max_query_size, max_parser_depth); + ast = parseQuery(parser, begin, end, /*query_description*/ {}, max_query_size, max_parser_depth, max_parser_backtracks); } catch (...) { - if constexpr (error_handling == ErrorHandling::Null) + if (error_handling == ErrorHandling::Null) { const size_t res_data_new_size = res_data_size + 1; if (res_data_new_size > res_data.size()) @@ -135,7 +133,6 @@ private: } else { - static_assert(error_handling == ErrorHandling::Exception); throw; } } @@ -160,92 +157,91 @@ private: res_data.resize(res_data_size); } - const size_t max_query_size; - const size_t max_parser_depth; + String name; + OutputFormatting output_formatting; + ErrorHandling error_handling; + + size_t max_query_size; + size_t max_parser_depth; + size_t max_parser_backtracks; }; -struct NameFormatQuery -{ - static constexpr auto name = "formatQuery"; -}; - -struct NameFormatQueryOrNull -{ - static constexpr auto name = "formatQueryOrNull"; -}; - -struct NameFormatQuerySingleLine -{ - static constexpr auto name = "formatQuerySingleLine"; -}; - -struct NameFormatQuerySingleLineOrNull -{ - static constexpr auto name = "formatQuerySingleLineOrNull"; -}; +} REGISTER_FUNCTION(formatQuery) { - factory.registerFunction>(FunctionDocumentation{ - .description = "Returns a formatted, possibly multi-line, version of the given SQL query. Throws in case of a parsing error.\n[example:multiline]", - .syntax = "formatQuery(query)", - .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}}, - .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).", - .examples{ - {"multiline", - "SELECT formatQuery('select a, b FRom tab WHERE a > 3 and b < 3');", - "SELECT\n" - " a,\n" - " b\n" - "FROM tab\n" - "WHERE (a > 3) AND (b < 3)"}}, - .categories{"Other"}}); + factory.registerFunction( + "formatQuery", + [](ContextPtr context) { return std::make_shared(context, "formatQuery", OutputFormatting::MultiLine, ErrorHandling::Exception); }, + FunctionDocumentation{ + .description = "Returns a formatted, possibly multi-line, version of the given SQL query. Throws in case of a parsing error.\n[example:multiline]", + .syntax = "formatQuery(query)", + .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}}, + .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).", + .examples{ + {"multiline", + "SELECT formatQuery('select a, b FRom tab WHERE a > 3 and b < 3');", + "SELECT\n" + " a,\n" + " b\n" + "FROM tab\n" + "WHERE (a > 3) AND (b < 3)"}}, + .categories{"Other"}}); } REGISTER_FUNCTION(formatQueryOrNull) { - factory.registerFunction>(FunctionDocumentation{ - .description = "Returns a formatted, possibly multi-line, version of the given SQL query. Returns NULL in case of a parsing error.\n[example:multiline]", - .syntax = "formatQueryOrNull(query)", - .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}}, - .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).", - .examples{ - {"multiline", - "SELECT formatQuery('select a, b FRom tab WHERE a > 3 and b < 3');", - "SELECT\n" - " a,\n" - " b\n" - "FROM tab\n" - "WHERE (a > 3) AND (b < 3)"}}, - .categories{"Other"}}); + factory.registerFunction( + "formatQueryOrNull", + [](ContextPtr context) { return std::make_shared(context, "formatQueryOrNull", OutputFormatting::MultiLine, ErrorHandling::Null); }, + FunctionDocumentation{ + .description = "Returns a formatted, possibly multi-line, version of the given SQL query. Returns NULL in case of a parsing error.\n[example:multiline]", + .syntax = "formatQueryOrNull(query)", + .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}}, + .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).", + .examples{ + {"multiline", + "SELECT formatQuery('select a, b FRom tab WHERE a > 3 and b < 3');", + "SELECT\n" + " a,\n" + " b\n" + "FROM tab\n" + "WHERE (a > 3) AND (b < 3)"}}, + .categories{"Other"}}); } REGISTER_FUNCTION(formatQuerySingleLine) { - factory.registerFunction>(FunctionDocumentation{ - .description = "Like formatQuery() but the returned formatted string contains no line breaks. Throws in case of a parsing error.\n[example:multiline]", - .syntax = "formatQuerySingleLine(query)", - .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}}, - .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).", - .examples{ - {"multiline", - "SELECT formatQuerySingleLine('select a, b FRom tab WHERE a > 3 and b < 3');", - "SELECT a, b FROM tab WHERE (a > 3) AND (b < 3)"}}, - .categories{"Other"}}); + factory.registerFunction( + "formatQuerySingleLine", + [](ContextPtr context) { return std::make_shared(context, "formatQuerySingleLine", OutputFormatting::SingleLine, ErrorHandling::Exception); }, + FunctionDocumentation{ + .description = "Like formatQuery() but the returned formatted string contains no line breaks. Throws in case of a parsing error.\n[example:multiline]", + .syntax = "formatQuerySingleLine(query)", + .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}}, + .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).", + .examples{ + {"multiline", + "SELECT formatQuerySingleLine('select a, b FRom tab WHERE a > 3 and b < 3');", + "SELECT a, b FROM tab WHERE (a > 3) AND (b < 3)"}}, + .categories{"Other"}}); } REGISTER_FUNCTION(formatQuerySingleLineOrNull) { - factory.registerFunction>(FunctionDocumentation{ - .description = "Like formatQuery() but the returned formatted string contains no line breaks. Returns NULL in case of a parsing error.\n[example:multiline]", - .syntax = "formatQuerySingleLineOrNull(query)", - .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}}, - .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).", - .examples{ - {"multiline", - "SELECT formatQuerySingleLine('select a, b FRom tab WHERE a > 3 and b < 3');", - "SELECT a, b FROM tab WHERE (a > 3) AND (b < 3)"}}, - .categories{"Other"}}); + factory.registerFunction( + "formatQuerySingleLineOrNull", + [](ContextPtr context) { return std::make_shared(context, "formatQuerySingleLineOrNull", OutputFormatting::SingleLine, ErrorHandling::Null); }, + FunctionDocumentation{ + .description = "Like formatQuery() but the returned formatted string contains no line breaks. Returns NULL in case of a parsing error.\n[example:multiline]", + .syntax = "formatQuerySingleLineOrNull(query)", + .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}}, + .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).", + .examples{ + {"multiline", + "SELECT formatQuerySingleLine('select a, b FRom tab WHERE a > 3 and b < 3');", + "SELECT a, b FROM tab WHERE (a > 3) AND (b < 3)"}}, + .categories{"Other"}}); } } diff --git a/src/Functions/fromUnixTimestamp64Micro.cpp b/src/Functions/fromUnixTimestamp64Micro.cpp index 191e2137a0d..d96e0232335 100644 --- a/src/Functions/fromUnixTimestamp64Micro.cpp +++ b/src/Functions/fromUnixTimestamp64Micro.cpp @@ -7,8 +7,7 @@ namespace DB REGISTER_FUNCTION(FromUnixTimestamp64Micro) { factory.registerFunction("fromUnixTimestamp64Micro", - [](ContextPtr context){ return std::make_unique( - std::make_shared(6, "fromUnixTimestamp64Micro", context)); }); + [](ContextPtr context){ return std::make_shared(6, "fromUnixTimestamp64Micro", context); }); } } diff --git a/src/Functions/fromUnixTimestamp64Milli.cpp b/src/Functions/fromUnixTimestamp64Milli.cpp index c6d4fcd30a2..aa77e8043c1 100644 --- a/src/Functions/fromUnixTimestamp64Milli.cpp +++ b/src/Functions/fromUnixTimestamp64Milli.cpp @@ -7,8 +7,7 @@ namespace DB REGISTER_FUNCTION(FromUnixTimestamp64Milli) { factory.registerFunction("fromUnixTimestamp64Milli", - [](ContextPtr context){ return std::make_unique( - std::make_shared(3, "fromUnixTimestamp64Milli", context)); }); + [](ContextPtr context){ return std::make_shared(3, "fromUnixTimestamp64Milli", context); }); } } diff --git a/src/Functions/fromUnixTimestamp64Nano.cpp b/src/Functions/fromUnixTimestamp64Nano.cpp index 2b5a7addbfc..f9d69219933 100644 --- a/src/Functions/fromUnixTimestamp64Nano.cpp +++ b/src/Functions/fromUnixTimestamp64Nano.cpp @@ -7,8 +7,7 @@ namespace DB REGISTER_FUNCTION(FromUnixTimestamp64Nano) { factory.registerFunction("fromUnixTimestamp64Nano", - [](ContextPtr context){ return std::make_unique( - std::make_shared(9, "fromUnixTimestamp64Nano", context)); }); + [](ContextPtr context){ return std::make_shared(9, "fromUnixTimestamp64Nano", context); }); } } diff --git a/src/Functions/getFuzzerData.cpp b/src/Functions/getFuzzerData.cpp index 6d748619926..a6f8dd1de2c 100644 --- a/src/Functions/getFuzzerData.cpp +++ b/src/Functions/getFuzzerData.cpp @@ -1,13 +1,59 @@ #ifdef FUZZING_MODE -#include + +#include +#include +#include +#include + namespace DB { +namespace +{ + +class FunctionGetFuzzerData : public IFunction +{ + inline static String fuzz_data; + +public: + static constexpr auto name = "getFuzzerData"; + + inline static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + inline String getName() const override { return name; } + + inline size_t getNumberOfArguments() const override { return 0; } + + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override + { + return std::make_shared(); + } + + inline bool isDeterministic() const override { return false; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, + const DataTypePtr &, + size_t input_rows_count) const override + { + return DataTypeString().createColumnConst(input_rows_count, fuzz_data); + } + + [[maybe_unused]] static void update(const String & fuzz_data_) + { + fuzz_data = fuzz_data_; + } +}; + +} + REGISTER_FUNCTION(GetFuzzerData) { factory.registerFunction(); } } + #endif diff --git a/src/Functions/getFuzzerData.h b/src/Functions/getFuzzerData.h deleted file mode 100644 index 635ca2bdce9..00000000000 --- a/src/Functions/getFuzzerData.h +++ /dev/null @@ -1,50 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -namespace DB -{ -class FunctionGetFuzzerData : public IFunction -{ - inline static String fuzz_data; - -public: - static constexpr auto name = "getFuzzerData"; - - inline static FunctionPtr create(ContextPtr) { return create(); } - - static FunctionPtr create() - { - return std::make_shared(); - } - - inline String getName() const override { return name; } - - inline size_t getNumberOfArguments() const override { return 0; } - - DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override - { - return std::make_shared(); - } - - inline bool isDeterministic() const override { return false; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName &, - const DataTypePtr &, - size_t input_rows_count) const override - { - return DataTypeString().createColumnConst(input_rows_count, fuzz_data); - } - - static void update(const String & fuzz_data_) - { - fuzz_data = fuzz_data_; - } -}; - -} diff --git a/src/Functions/like.cpp b/src/Functions/like.cpp index 5a86e37a92d..de907380265 100644 --- a/src/Functions/like.cpp +++ b/src/Functions/like.cpp @@ -1,5 +1,5 @@ -#include "FunctionFactory.h" #include "like.h" +#include "FunctionFactory.h" namespace DB diff --git a/src/Functions/locate.cpp b/src/Functions/locate.cpp new file mode 100644 index 00000000000..9a70fbb2d72 --- /dev/null +++ b/src/Functions/locate.cpp @@ -0,0 +1,34 @@ +#include "FunctionsStringSearch.h" +#include "FunctionFactory.h" +#include "PositionImpl.h" + + +namespace DB +{ +namespace +{ + +struct NameLocate +{ + static constexpr auto name = "locate"; +}; + +using FunctionLocate = FunctionsStringSearch, ExecutionErrorPolicy::Throw, HaystackNeedleOrderIsConfigurable::Yes>; + +} + +REGISTER_FUNCTION(Locate) +{ + FunctionDocumentation::Description doc_description = "Like function `position` but with arguments `haystack` and `locate` switched. The behavior of this function depends on the ClickHouse version: In versions < v24.3, `locate` was an alias of function `position` and accepted arguments `(haystack, needle[, start_pos])`. In versions >= 24.3,, `locate` is an individual function (for better compatibility with MySQL) and accepts arguments `(needle, haystack[, start_pos])`. The previous behaviorcan be restored using setting `function_locate_has_mysql_compatible_argument_order = false`."; + FunctionDocumentation::Syntax doc_syntax = "location(needle, haystack[, start_pos])"; + FunctionDocumentation::Arguments doc_arguments = {{"needle", "Substring to be searched (String)"}, + {"haystack", "String in which the search is performed (String)."}, + {"start_pos", "Position (1-based) in `haystack` at which the search starts (UInt*)."}}; + FunctionDocumentation::ReturnedValue doc_returned_value = "Starting position in bytes and counting from 1, if the substring was found. 0, if the substring was not found."; + FunctionDocumentation::Examples doc_examples = {{"Example", "SELECT locate('abcabc', 'ca');", "3"}}; + FunctionDocumentation::Categories doc_categories = {"String search"}; + + + factory.registerFunction({doc_description, doc_syntax, doc_arguments, doc_returned_value, doc_examples, doc_categories}, FunctionFactory::CaseInsensitive); +} +} diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index 81304f3afbd..49c45d0c0be 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -549,8 +549,8 @@ REGISTER_FUNCTION(MultiIf) factory.registerFunction(); /// These are obsolete function names. - factory.registerFunction("caseWithoutExpr"); - factory.registerFunction("caseWithoutExpression"); + factory.registerAlias("caseWithoutExpr", "multiIf"); + factory.registerAlias("caseWithoutExpression", "multiIf"); } FunctionOverloadResolverPtr createInternalMultiIfOverloadResolver(bool allow_execute_multiif_columnar, bool allow_experimental_variant_type, bool use_variant_as_common_type) @@ -559,5 +559,3 @@ FunctionOverloadResolverPtr createInternalMultiIfOverloadResolver(bool allow_exe } } - - diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index 18882177c90..7a0d7c75774 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -1942,7 +1942,7 @@ namespace REGISTER_FUNCTION(ParseDateTime) { factory.registerFunction(); - factory.registerAlias("TO_UNIXTIME", FunctionParseDateTime::name); + factory.registerAlias("TO_UNIXTIME", FunctionParseDateTime::name, FunctionFactory::CaseInsensitive); factory.registerFunction(); factory.registerFunction(); factory.registerAlias("str_to_date", FunctionParseDateTimeOrNull::name, FunctionFactory::CaseInsensitive); diff --git a/src/Functions/position.cpp b/src/Functions/position.cpp index 409a593b44c..29a5db2eb24 100644 --- a/src/Functions/position.cpp +++ b/src/Functions/position.cpp @@ -20,6 +20,5 @@ using FunctionPosition = FunctionsStringSearch({}, FunctionFactory::CaseInsensitive); - factory.registerAlias("locate", NamePosition::name, FunctionFactory::CaseInsensitive); } } diff --git a/src/Functions/snowflake.cpp b/src/Functions/snowflake.cpp index f2dd1f1c51d..4a2d502a31a 100644 --- a/src/Functions/snowflake.cpp +++ b/src/Functions/snowflake.cpp @@ -249,28 +249,24 @@ public: REGISTER_FUNCTION(DateTimeToSnowflake) { factory.registerFunction("dateTimeToSnowflake", - [](ContextPtr){ return std::make_unique( - std::make_shared("dateTimeToSnowflake")); }); + [](ContextPtr){ return std::make_shared("dateTimeToSnowflake"); }); } REGISTER_FUNCTION(DateTime64ToSnowflake) { factory.registerFunction("dateTime64ToSnowflake", - [](ContextPtr){ return std::make_unique( - std::make_shared("dateTime64ToSnowflake")); }); + [](ContextPtr){ return std::make_shared("dateTime64ToSnowflake"); }); } REGISTER_FUNCTION(SnowflakeToDateTime) { factory.registerFunction("snowflakeToDateTime", - [](ContextPtr context){ return std::make_unique( - std::make_shared("snowflakeToDateTime", context)); }); + [](ContextPtr context){ return std::make_shared("snowflakeToDateTime", context); }); } REGISTER_FUNCTION(SnowflakeToDateTime64) { factory.registerFunction("snowflakeToDateTime64", - [](ContextPtr context){ return std::make_unique( - std::make_shared("snowflakeToDateTime64", context)); }); + [](ContextPtr context){ return std::make_shared("snowflakeToDateTime64", context); }); } } diff --git a/src/Functions/toFixedString.h b/src/Functions/toFixedString.h index 7bee666c5dd..9c7ffc48004 100644 --- a/src/Functions/toFixedString.h +++ b/src/Functions/toFixedString.h @@ -34,7 +34,6 @@ class FunctionToFixedString : public IFunction public: static constexpr auto name = "toFixedString"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } - static FunctionPtr create() { return std::make_shared(); } String getName() const override { @@ -158,4 +157,3 @@ public: }; } - diff --git a/src/Functions/toUnixTimestamp64Micro.cpp b/src/Functions/toUnixTimestamp64Micro.cpp index fd35e2a7a73..964ad5a2c18 100644 --- a/src/Functions/toUnixTimestamp64Micro.cpp +++ b/src/Functions/toUnixTimestamp64Micro.cpp @@ -7,8 +7,7 @@ namespace DB REGISTER_FUNCTION(ToUnixTimestamp64Micro) { factory.registerFunction("toUnixTimestamp64Micro", - [](ContextPtr){ return std::make_unique( - std::make_shared(6, "toUnixTimestamp64Micro")); }); + [](ContextPtr){ return std::make_shared(6, "toUnixTimestamp64Micro"); }); } } diff --git a/src/Functions/toUnixTimestamp64Milli.cpp b/src/Functions/toUnixTimestamp64Milli.cpp index e6a680f941a..bc92a6d1fe3 100644 --- a/src/Functions/toUnixTimestamp64Milli.cpp +++ b/src/Functions/toUnixTimestamp64Milli.cpp @@ -7,8 +7,7 @@ namespace DB REGISTER_FUNCTION(ToUnixTimestamp64Milli) { factory.registerFunction("toUnixTimestamp64Milli", - [](ContextPtr){ return std::make_unique( - std::make_shared(3, "toUnixTimestamp64Milli")); }); + [](ContextPtr){ return std::make_shared(3, "toUnixTimestamp64Milli"); }); } } diff --git a/src/Functions/toUnixTimestamp64Nano.cpp b/src/Functions/toUnixTimestamp64Nano.cpp index 257f011603c..8829b00bf56 100644 --- a/src/Functions/toUnixTimestamp64Nano.cpp +++ b/src/Functions/toUnixTimestamp64Nano.cpp @@ -7,8 +7,7 @@ namespace DB REGISTER_FUNCTION(ToUnixTimestamp64Nano) { factory.registerFunction("toUnixTimestamp64Nano", - [](ContextPtr){ return std::make_unique( - std::make_shared(9, "toUnixTimestamp64Nano")); }); + [](ContextPtr){ return std::make_shared(9, "toUnixTimestamp64Nano"); }); } } diff --git a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp index a55588baeaa..1147d74c146 100644 --- a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp @@ -206,7 +206,7 @@ namespace if (!filter.empty()) { ParserExpression parser; - ASTPtr expr = parseQuery(parser, filter, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr expr = parseQuery(parser, filter, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); query->filters.emplace_back(type, std::move(expr)); } } diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index a9578b5540f..39b63f6b635 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -53,6 +53,7 @@ namespace ProfileEvents extern const Event OverflowThrow; extern const Event OverflowBreak; extern const Event OverflowAny; + extern const Event AggregationOptimizedEqualRangesOfKeys; } namespace CurrentMetrics @@ -1344,6 +1345,7 @@ void NO_INLINE Aggregator::executeImplBatch( if (use_compiled_functions) { std::vector columns_data; + bool can_optimize_equal_keys_ranges = true; for (size_t i = 0; i < aggregate_functions.size(); ++i) { @@ -1352,13 +1354,15 @@ void NO_INLINE Aggregator::executeImplBatch( AggregateFunctionInstruction * inst = aggregate_instructions + i; size_t arguments_size = inst->that->getArgumentTypes().size(); // NOLINT + can_optimize_equal_keys_ranges &= inst->can_optimize_equal_keys_ranges; for (size_t argument_index = 0; argument_index < arguments_size; ++argument_index) columns_data.emplace_back(getColumnData(inst->batch_arguments[argument_index])); } - if (all_keys_are_const || (!no_more_keys && state.hasOnlyOneValueSinceLastReset())) + if (all_keys_are_const || (can_optimize_equal_keys_ranges && state.hasOnlyOneValueSinceLastReset())) { + ProfileEvents::increment(ProfileEvents::AggregationOptimizedEqualRangesOfKeys); auto add_into_aggregate_states_function_single_place = compiled_aggregate_functions_holder->compiled_aggregate_functions.add_into_aggregate_states_function_single_place; add_into_aggregate_states_function_single_place(row_begin, row_end, columns_data.data(), places[key_start]); } @@ -1380,10 +1384,15 @@ void NO_INLINE Aggregator::executeImplBatch( AggregateFunctionInstruction * inst = aggregate_instructions + i; - if (all_keys_are_const || (!no_more_keys && state.hasOnlyOneValueSinceLastReset())) + if (all_keys_are_const || (inst->can_optimize_equal_keys_ranges && state.hasOnlyOneValueSinceLastReset())) + { + ProfileEvents::increment(ProfileEvents::AggregationOptimizedEqualRangesOfKeys); addBatchSinglePlace(row_begin, row_end, inst, places[key_start] + inst->state_offset, aggregates_pool); + } else + { addBatch(row_begin, row_end, inst, places.get(), aggregates_pool); + } } } @@ -1573,6 +1582,7 @@ void Aggregator::prepareAggregateInstructions( } aggregate_functions_instructions[i].has_sparse_arguments = has_sparse_arguments; + aggregate_functions_instructions[i].can_optimize_equal_keys_ranges = aggregate_functions[i]->canOptimizeEqualKeysRanges(); aggregate_functions_instructions[i].arguments = aggregate_columns[i].data(); aggregate_functions_instructions[i].state_offset = offsets_of_aggregate_states[i]; diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 6c357623003..eeb2355d370 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -1221,6 +1221,7 @@ public: const IColumn ** batch_arguments{}; const UInt64 * offsets{}; bool has_sparse_arguments = false; + bool can_optimize_equal_keys_ranges = true; }; /// Used for optimize_aggregation_in_order: diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 9327f31b6ff..9234d052d97 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -148,19 +148,25 @@ AsynchronousInsertQueue::InsertData::Entry::Entry( { } +void AsynchronousInsertQueue::InsertData::Entry::resetChunk() +{ + if (chunk.empty()) + return; + + // To avoid races on counter of user's MemoryTracker we should free memory at this moment. + // Entries data must be destroyed in context of user who runs async insert. + // Each entry in the list may correspond to a different user, + // so we need to switch current thread's MemoryTracker. + MemoryTrackerSwitcher switcher(user_memory_tracker); + chunk = {}; +} + void AsynchronousInsertQueue::InsertData::Entry::finish(std::exception_ptr exception_) { if (finished.exchange(true)) return; - { - // To avoid races on counter of user's MemoryTracker we should free memory at this moment. - // Entries data must be destroyed in context of user who runs async insert. - // Each entry in the list may correspond to a different user, - // so we need to switch current thread's MemoryTracker. - MemoryTrackerSwitcher switcher(user_memory_tracker); - chunk = {}; - } + resetChunk(); if (exception_) { @@ -224,7 +230,7 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue() auto & shard = queue_shards[i]; shard.are_tasks_available.notify_one(); - assert(dump_by_first_update_threads[i].joinable()); + chassert(dump_by_first_update_threads[i].joinable()); dump_by_first_update_threads[i].join(); if (flush_on_shutdown) @@ -510,14 +516,13 @@ void AsynchronousInsertQueue::validateSettings(const Settings & settings, Logger /// Adaptive timeout settings. const auto min_ms = std::chrono::milliseconds(settings.async_insert_busy_timeout_min_ms); - if (min_ms > max_ms) - if (log) - LOG_WARNING( - log, - "Setting 'async_insert_busy_timeout_min_ms'={} is greater than 'async_insert_busy_timeout_max_ms'={}. Ignoring " - "'async_insert_busy_timeout_min_ms'", - min_ms.count(), - max_ms.count()); + if (min_ms > max_ms && log) + LOG_WARNING( + log, + "Setting 'async_insert_busy_timeout_min_ms'={} is greater than 'async_insert_busy_timeout_max_ms'={}. Ignoring " + "'async_insert_busy_timeout_min_ms'", + min_ms.count(), + max_ms.count()); if (settings.async_insert_busy_timeout_increase_rate <= 0) throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting 'async_insert_busy_timeout_increase_rate' must be greater than zero"); @@ -953,14 +958,18 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing( "Expected entry with data kind Parsed. Got: {}", entry->chunk.getDataKind()); auto buffer = std::make_unique(*bytes); + size_t num_bytes = bytes->size(); size_t num_rows = executor.execute(*buffer); + total_rows += num_rows; chunk_info->offsets.push_back(total_rows); chunk_info->tokens.push_back(entry->async_dedup_token); add_to_async_insert_log(entry, query_for_logging, current_exception, num_rows, num_bytes, data->timeout_ms); + current_exception.clear(); + entry->resetChunk(); } Chunk chunk(executor.getResultColumns(), total_rows); @@ -1011,6 +1020,8 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries( const auto & query_for_logging = get_query_by_format(entry->format); add_to_async_insert_log(entry, query_for_logging, "", block->rows(), block->bytes(), data->timeout_ms); + + entry->resetChunk(); } Chunk chunk(std::move(result_columns), total_rows); diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index 5076701d0b0..9bf4c799660 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -117,6 +117,17 @@ private: return DataKind::Parsed; } + bool empty() const + { + return std::visit([](const T & arg) + { + if constexpr (std::is_same_v) + return arg.rows() == 0; + else + return arg.empty(); + }, *this); + } + const String * asString() const { return std::get_if(this); } const Block * asBlock() const { return std::get_if(this); } }; @@ -140,7 +151,9 @@ private: const String & format_, MemoryTracker * user_memory_tracker_); + void resetChunk(); void finish(std::exception_ptr exception_ = nullptr); + std::future getFuture() { return promise.get_future(); } bool isFinished() const { return finished; } diff --git a/src/Interpreters/AsynchronousMetricLog.cpp b/src/Interpreters/AsynchronousMetricLog.cpp index f905f72e7a7..dc67bd91550 100644 --- a/src/Interpreters/AsynchronousMetricLog.cpp +++ b/src/Interpreters/AsynchronousMetricLog.cpp @@ -8,7 +8,6 @@ #include #include #include -#include namespace DB @@ -22,31 +21,31 @@ ColumnsDescription AsynchronousMetricLogElement::getColumnsDescription() { "hostname", std::make_shared(std::make_shared()), - parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH), + parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), "Hostname of the server executing the query." }, { "event_date", std::make_shared(), - parseQuery(codec_parser, "(Delta(2), ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH), + parseQuery(codec_parser, "(Delta(2), ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), "Event date." }, { "event_time", std::make_shared(), - parseQuery(codec_parser, "(Delta(4), ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH), + parseQuery(codec_parser, "(Delta(4), ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), "Event time." }, { "metric", std::make_shared(std::make_shared()), - parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH), + parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), "Metric name." }, { "value", std::make_shared(), - parseQuery(codec_parser, "(ZSTD(3))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH), + parseQuery(codec_parser, "(ZSTD(3))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), "Metric value." } }; diff --git a/src/Interpreters/Cache/EvictionCandidates.cpp b/src/Interpreters/Cache/EvictionCandidates.cpp index 7dceab4f95f..f1ae2baa347 100644 --- a/src/Interpreters/Cache/EvictionCandidates.cpp +++ b/src/Interpreters/Cache/EvictionCandidates.cpp @@ -32,7 +32,7 @@ void EvictionCandidates::add(LockedKey & locked_key, const FileSegmentMetadataPt ++candidates_size; } -void EvictionCandidates::evict(FileCacheQueryLimit::QueryContext * query_context, const CacheGuard::Lock & lock) +void EvictionCandidates::evict(FileCacheQueryLimit::QueryContext * query_context, const CachePriorityGuard::Lock & lock) { if (candidates.empty()) return; diff --git a/src/Interpreters/Cache/EvictionCandidates.h b/src/Interpreters/Cache/EvictionCandidates.h index 0557962d97f..e817d33d5fe 100644 --- a/src/Interpreters/Cache/EvictionCandidates.h +++ b/src/Interpreters/Cache/EvictionCandidates.h @@ -11,7 +11,7 @@ public: void add(LockedKey & locked_key, const FileSegmentMetadataPtr & candidate); - void evict(FileCacheQueryLimit::QueryContext * query_context, const CacheGuard::Lock &); + void evict(FileCacheQueryLimit::QueryContext * query_context, const CachePriorityGuard::Lock &); size_t size() const { return candidates_size; } diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index ea40ffcfa3c..65b6a3a172d 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -183,13 +183,13 @@ void FileCache::initialize() is_initialized = true; } -CacheGuard::Lock FileCache::lockCache() const +CachePriorityGuard::Lock FileCache::lockCache() const { ProfileEventTimeIncrement watch(ProfileEvents::FilesystemCacheLockCacheMicroseconds); return cache_guard.lock(); } -CacheGuard::Lock FileCache::tryLockCache(std::optional acquire_timeout) const +CachePriorityGuard::Lock FileCache::tryLockCache(std::optional acquire_timeout) const { return acquire_timeout.has_value() ? cache_guard.tryLockFor(acquire_timeout.value()) : cache_guard.tryLock(); } @@ -706,7 +706,7 @@ KeyMetadata::iterator FileCache::addFileSegment( size_t size, FileSegment::State state, const CreateFileSegmentSettings & create_settings, - const CacheGuard::Lock * lock) + const CachePriorityGuard::Lock * lock) { /// Create a file_segment_metadata and put it in `files` map by [key][offset]. diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 007c4fd9483..8ea5f4dab40 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -173,8 +173,8 @@ public: void deactivateBackgroundOperations(); - CacheGuard::Lock lockCache() const; - CacheGuard::Lock tryLockCache(std::optional acquire_timeout = std::nullopt) const; + CachePriorityGuard::Lock lockCache() const; + CachePriorityGuard::Lock tryLockCache(std::optional acquire_timeout = std::nullopt) const; std::vector sync(); @@ -208,7 +208,7 @@ private: CacheMetadata metadata; FileCachePriorityPtr main_priority; - mutable CacheGuard cache_guard; + mutable CachePriorityGuard cache_guard; struct HitsCountStash { @@ -280,7 +280,7 @@ private: size_t size, FileSegment::State state, const CreateFileSegmentSettings & create_settings, - const CacheGuard::Lock *); + const CachePriorityGuard::Lock *); }; } diff --git a/src/Interpreters/Cache/Guards.h b/src/Interpreters/Cache/Guards.h index 0ac7cb80483..6193ee38755 100644 --- a/src/Interpreters/Cache/Guards.h +++ b/src/Interpreters/Cache/Guards.h @@ -10,17 +10,17 @@ namespace DB * 2. KeyGuard::Lock (hold till the end of the method) * * FileCache::tryReserve - * 1. CacheGuard::Lock + * 1. CachePriorityGuard::Lock * 2. KeyGuard::Lock (taken without metadata lock) * 3. any number of KeyGuard::Lock's for files which are going to be evicted (taken via metadata lock) * * FileCache::removeIfExists - * 1. CacheGuard::Lock + * 1. CachePriorityGuard::Lock * 2. KeyGuard::Lock (taken via metadata lock) * 3. FileSegmentGuard::Lock * * FileCache::removeAllReleasable - * 1. CacheGuard::Lock + * 1. CachePriorityGuard::Lock * 2. any number of KeyGuard::Lock's locks (takken via metadata lock), but at a moment of time only one key lock can be hold * 3. FileSegmentGuard::Lock * @@ -34,23 +34,23 @@ namespace DB * 2. FileSegmentGuard::Lock * * FileSegment::complete - * 1. CacheGuard::Lock + * 1. CachePriorityGuard::Lock * 2. KeyGuard::Lock (taken without metadata lock) * 3. FileSegmentGuard::Lock * * Rules: - * 1. Priority of locking: CacheGuard::Lock > CacheMetadataGuard::Lock > KeyGuard::Lock > FileSegmentGuard::Lock - * 2. If we take more than one key lock at a moment of time, we need to take CacheGuard::Lock (example: tryReserve()) + * 1. Priority of locking: CachePriorityGuard::Lock > CacheMetadataGuard::Lock > KeyGuard::Lock > FileSegmentGuard::Lock + * 2. If we take more than one key lock at a moment of time, we need to take CachePriorityGuard::Lock (example: tryReserve()) * * - * _CacheGuard_ + * _CachePriorityGuard_ * 1. FileCache::tryReserve * 2. FileCache::removeIfExists(key) * 3. FileCache::removeAllReleasable * 4. FileSegment::complete * * _KeyGuard_ _CacheMetadataGuard_ - * 1. all from CacheGuard 1. getOrSet/get/set + * 1. all from CachePriorityGuard 1. getOrSet/get/set * 2. getOrSet/get/Set * * *This table does not include locks taken for introspection and system tables. @@ -59,11 +59,11 @@ namespace DB /** * Cache priority queue guard. */ -struct CacheGuard : private boost::noncopyable +struct CachePriorityGuard : private boost::noncopyable { using Mutex = std::timed_mutex; - /// struct is used (not keyword `using`) to make CacheGuard::Lock non-interchangable with other guards locks - /// so, we wouldn't be able to pass CacheGuard::Lock to a function which accepts KeyGuard::Lock, for example + /// struct is used (not keyword `using`) to make CachePriorityGuard::Lock non-interchangable with other guards locks + /// so, we wouldn't be able to pass CachePriorityGuard::Lock to a function which accepts KeyGuard::Lock, for example struct Lock : public std::unique_lock { using Base = std::unique_lock; diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h index bc036166940..58011780323 100644 --- a/src/Interpreters/Cache/IFileCachePriority.h +++ b/src/Interpreters/Cache/IFileCachePriority.h @@ -43,11 +43,11 @@ public: virtual EntryPtr getEntry() const = 0; - virtual size_t increasePriority(const CacheGuard::Lock &) = 0; + virtual size_t increasePriority(const CachePriorityGuard::Lock &) = 0; virtual void updateSize(int64_t size) = 0; - virtual void remove(const CacheGuard::Lock &) = 0; + virtual void remove(const CachePriorityGuard::Lock &) = 0; virtual void invalidate() = 0; @@ -57,13 +57,13 @@ public: virtual ~IFileCachePriority() = default; - size_t getElementsLimit(const CacheGuard::Lock &) const { return max_elements; } + size_t getElementsLimit(const CachePriorityGuard::Lock &) const { return max_elements; } - size_t getSizeLimit(const CacheGuard::Lock &) const { return max_size; } + size_t getSizeLimit(const CachePriorityGuard::Lock &) const { return max_size; } - virtual size_t getSize(const CacheGuard::Lock &) const = 0; + virtual size_t getSize(const CachePriorityGuard::Lock &) const = 0; - virtual size_t getElementsCount(const CacheGuard::Lock &) const = 0; + virtual size_t getElementsCount(const CachePriorityGuard::Lock &) const = 0; /// Throws exception if there is not enough size to fit it. virtual IteratorPtr add( /// NOLINT @@ -71,7 +71,7 @@ public: size_t offset, size_t size, const UserInfo & user, - const CacheGuard::Lock &, + const CachePriorityGuard::Lock &, bool best_effort = false) = 0; /// `reservee` is the entry for which are reserving now. @@ -79,11 +79,11 @@ public: /// for the corresponding file segment. virtual bool canFit( /// NOLINT size_t size, - const CacheGuard::Lock &, + const CachePriorityGuard::Lock &, IteratorPtr reservee = nullptr, bool best_effort = false) const = 0; - virtual void shuffle(const CacheGuard::Lock &) = 0; + virtual void shuffle(const CachePriorityGuard::Lock &) = 0; struct IPriorityDump { @@ -91,9 +91,9 @@ public: }; using PriorityDumpPtr = std::shared_ptr; - virtual PriorityDumpPtr dump(const CacheGuard::Lock &) = 0; + virtual PriorityDumpPtr dump(const CachePriorityGuard::Lock &) = 0; - using FinalizeEvictionFunc = std::function; + using FinalizeEvictionFunc = std::function; virtual bool collectCandidatesForEviction( size_t size, FileCacheReserveStat & stat, @@ -101,9 +101,9 @@ public: IFileCachePriority::IteratorPtr reservee, FinalizeEvictionFunc & finalize_eviction_func, const UserID & user_id, - const CacheGuard::Lock &) = 0; + const CachePriorityGuard::Lock &) = 0; - virtual bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock &) = 0; + virtual bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CachePriorityGuard::Lock &) = 0; protected: IFileCachePriority(size_t max_size_, size_t max_elements_); diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index bce03b60024..08e65b577ca 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -43,13 +43,13 @@ IFileCachePriority::IteratorPtr LRUFileCachePriority::add( /// NOLINT size_t offset, size_t size, const UserInfo &, - const CacheGuard::Lock & lock, + const CachePriorityGuard::Lock & lock, bool) { return std::make_shared(add(std::make_shared(key_metadata->key, offset, size, key_metadata), lock)); } -LRUFileCachePriority::LRUIterator LRUFileCachePriority::add(EntryPtr entry, const CacheGuard::Lock & lock) +LRUFileCachePriority::LRUIterator LRUFileCachePriority::add(EntryPtr entry, const CachePriorityGuard::Lock & lock) { if (entry->size == 0) { @@ -93,7 +93,7 @@ LRUFileCachePriority::LRUIterator LRUFileCachePriority::add(EntryPtr entry, cons return LRUIterator(this, iterator); } -LRUFileCachePriority::LRUQueue::iterator LRUFileCachePriority::remove(LRUQueue::iterator it, const CacheGuard::Lock &) +LRUFileCachePriority::LRUQueue::iterator LRUFileCachePriority::remove(LRUQueue::iterator it, const CachePriorityGuard::Lock &) { /// If size is 0, entry is invalidated, current_elements_num was already updated. const auto & entry = **it; @@ -150,7 +150,7 @@ bool LRUFileCachePriority::LRUIterator::operator ==(const LRUIterator & other) c return cache_priority == other.cache_priority && iterator == other.iterator; } -void LRUFileCachePriority::iterate(IterateFunc && func, const CacheGuard::Lock & lock) +void LRUFileCachePriority::iterate(IterateFunc && func, const CachePriorityGuard::Lock & lock) { for (auto it = queue.begin(); it != queue.end();) { @@ -201,7 +201,7 @@ void LRUFileCachePriority::iterate(IterateFunc && func, const CacheGuard::Lock & bool LRUFileCachePriority::canFit( /// NOLINT size_t size, - const CacheGuard::Lock & lock, + const CachePriorityGuard::Lock & lock, IteratorPtr, bool) const { @@ -212,7 +212,7 @@ bool LRUFileCachePriority::canFit( size_t size, size_t released_size_assumption, size_t released_elements_assumption, - const CacheGuard::Lock &) const + const CachePriorityGuard::Lock &) const { return (max_size == 0 || (state->current_size + size - released_size_assumption <= max_size)) && (max_elements == 0 || state->current_elements_num + 1 - released_elements_assumption <= max_elements); @@ -225,7 +225,7 @@ bool LRUFileCachePriority::collectCandidatesForEviction( IFileCachePriority::IteratorPtr, FinalizeEvictionFunc &, const UserID &, - const CacheGuard::Lock & lock) + const CachePriorityGuard::Lock & lock) { if (canFit(size, lock)) return true; @@ -264,7 +264,7 @@ bool LRUFileCachePriority::collectCandidatesForEviction( return can_fit(); } -LRUFileCachePriority::LRUIterator LRUFileCachePriority::move(LRUIterator & it, LRUFileCachePriority & other, const CacheGuard::Lock &) +LRUFileCachePriority::LRUIterator LRUFileCachePriority::move(LRUIterator & it, LRUFileCachePriority & other, const CachePriorityGuard::Lock &) { const auto & entry = *it.getEntry(); if (entry.size == 0) @@ -297,7 +297,7 @@ LRUFileCachePriority::LRUIterator LRUFileCachePriority::move(LRUIterator & it, L return LRUIterator(this, it.iterator); } -IFileCachePriority::PriorityDumpPtr LRUFileCachePriority::dump(const CacheGuard::Lock & lock) +IFileCachePriority::PriorityDumpPtr LRUFileCachePriority::dump(const CachePriorityGuard::Lock & lock) { std::vector res; iterate([&](LockedKey &, const FileSegmentMetadataPtr & segment_metadata) @@ -309,7 +309,7 @@ IFileCachePriority::PriorityDumpPtr LRUFileCachePriority::dump(const CacheGuard: } bool LRUFileCachePriority::modifySizeLimits( - size_t max_size_, size_t max_elements_, double /* size_ratio_ */, const CacheGuard::Lock & lock) + size_t max_size_, size_t max_elements_, double /* size_ratio_ */, const CachePriorityGuard::Lock & lock) { if (max_size == max_size_ && max_elements == max_elements_) return false; /// Nothing to change. @@ -353,7 +353,7 @@ bool LRUFileCachePriority::modifySizeLimits( return true; } -void LRUFileCachePriority::LRUIterator::remove(const CacheGuard::Lock & lock) +void LRUFileCachePriority::LRUIterator::remove(const CachePriorityGuard::Lock & lock) { assertValid(); cache_priority->remove(iterator, lock); @@ -389,7 +389,7 @@ void LRUFileCachePriority::LRUIterator::updateSize(int64_t size) entry->size += size; } -size_t LRUFileCachePriority::LRUIterator::increasePriority(const CacheGuard::Lock &) +size_t LRUFileCachePriority::LRUIterator::increasePriority(const CachePriorityGuard::Lock &) { assertValid(); cache_priority->queue.splice(cache_priority->queue.end(), cache_priority->queue, iterator); @@ -402,7 +402,7 @@ void LRUFileCachePriority::LRUIterator::assertValid() const throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to use invalid iterator"); } -void LRUFileCachePriority::shuffle(const CacheGuard::Lock &) +void LRUFileCachePriority::shuffle(const CachePriorityGuard::Lock &) { std::vector its; its.reserve(queue.size()); diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index a74a4b8b621..dcd4ee0a24c 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -24,13 +24,13 @@ protected: public: LRUFileCachePriority(size_t max_size_, size_t max_elements_, StatePtr state_ = nullptr); - size_t getSize(const CacheGuard::Lock &) const override { return state->current_size; } + size_t getSize(const CachePriorityGuard::Lock &) const override { return state->current_size; } - size_t getElementsCount(const CacheGuard::Lock &) const override { return state->current_elements_num; } + size_t getElementsCount(const CachePriorityGuard::Lock &) const override { return state->current_elements_num; } bool canFit( /// NOLINT size_t size, - const CacheGuard::Lock &, + const CachePriorityGuard::Lock &, IteratorPtr reservee = nullptr, bool best_effort = false) const override; @@ -39,7 +39,7 @@ public: size_t offset, size_t size, const UserInfo & user, - const CacheGuard::Lock &, + const CachePriorityGuard::Lock &, bool best_effort = false) override; bool collectCandidatesForEviction( @@ -49,9 +49,9 @@ public: IFileCachePriority::IteratorPtr reservee, FinalizeEvictionFunc & finalize_eviction_func, const UserID & user_id, - const CacheGuard::Lock &) override; + const CachePriorityGuard::Lock &) override; - void shuffle(const CacheGuard::Lock &) override; + void shuffle(const CachePriorityGuard::Lock &) override; struct LRUPriorityDump : public IPriorityDump { @@ -59,11 +59,11 @@ public: explicit LRUPriorityDump(const std::vector & infos_) : infos(infos_) {} void merge(const LRUPriorityDump & other) { infos.insert(infos.end(), other.infos.begin(), other.infos.end()); } }; - PriorityDumpPtr dump(const CacheGuard::Lock &) override; + PriorityDumpPtr dump(const CachePriorityGuard::Lock &) override; - void pop(const CacheGuard::Lock & lock) { remove(queue.begin(), lock); } + void pop(const CachePriorityGuard::Lock & lock) { remove(queue.begin(), lock); } - bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock &) override; + bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CachePriorityGuard::Lock &) override; private: class LRUIterator; @@ -77,9 +77,9 @@ private: void updateElementsCount(int64_t num); void updateSize(int64_t size); - bool canFit(size_t size, size_t released_size_assumption, size_t released_elements_assumption, const CacheGuard::Lock &) const; + bool canFit(size_t size, size_t released_size_assumption, size_t released_elements_assumption, const CachePriorityGuard::Lock &) const; - LRUQueue::iterator remove(LRUQueue::iterator it, const CacheGuard::Lock &); + LRUQueue::iterator remove(LRUQueue::iterator it, const CachePriorityGuard::Lock &); enum class IterationResult { @@ -88,10 +88,10 @@ private: REMOVE_AND_CONTINUE, }; using IterateFunc = std::function; - void iterate(IterateFunc && func, const CacheGuard::Lock &); + void iterate(IterateFunc && func, const CachePriorityGuard::Lock &); - LRUIterator move(LRUIterator & it, LRUFileCachePriority & other, const CacheGuard::Lock &); - LRUIterator add(EntryPtr entry, const CacheGuard::Lock &); + LRUIterator move(LRUIterator & it, LRUFileCachePriority & other, const CachePriorityGuard::Lock &); + LRUIterator add(EntryPtr entry, const CachePriorityGuard::Lock &); }; class LRUFileCachePriority::LRUIterator : public IFileCachePriority::Iterator @@ -108,9 +108,9 @@ public: EntryPtr getEntry() const override { return *iterator; } - size_t increasePriority(const CacheGuard::Lock &) override; + size_t increasePriority(const CachePriorityGuard::Lock &) override; - void remove(const CacheGuard::Lock &) override; + void remove(const CachePriorityGuard::Lock &) override; void invalidate() override; diff --git a/src/Interpreters/Cache/QueryLimit.cpp b/src/Interpreters/Cache/QueryLimit.cpp index ba4f5017772..9421005dc92 100644 --- a/src/Interpreters/Cache/QueryLimit.cpp +++ b/src/Interpreters/Cache/QueryLimit.cpp @@ -16,7 +16,7 @@ static bool isQueryInitialized() && !CurrentThread::getQueryId().empty(); } -FileCacheQueryLimit::QueryContextPtr FileCacheQueryLimit::tryGetQueryContext(const CacheGuard::Lock &) +FileCacheQueryLimit::QueryContextPtr FileCacheQueryLimit::tryGetQueryContext(const CachePriorityGuard::Lock &) { if (!isQueryInitialized()) return nullptr; @@ -25,7 +25,7 @@ FileCacheQueryLimit::QueryContextPtr FileCacheQueryLimit::tryGetQueryContext(con return (query_iter == query_map.end()) ? nullptr : query_iter->second; } -void FileCacheQueryLimit::removeQueryContext(const std::string & query_id, const CacheGuard::Lock &) +void FileCacheQueryLimit::removeQueryContext(const std::string & query_id, const CachePriorityGuard::Lock &) { auto query_iter = query_map.find(query_id); if (query_iter == query_map.end()) @@ -41,7 +41,7 @@ void FileCacheQueryLimit::removeQueryContext(const std::string & query_id, const FileCacheQueryLimit::QueryContextPtr FileCacheQueryLimit::getOrSetQueryContext( const std::string & query_id, const ReadSettings & settings, - const CacheGuard::Lock &) + const CachePriorityGuard::Lock &) { if (query_id.empty()) return nullptr; @@ -70,7 +70,7 @@ void FileCacheQueryLimit::QueryContext::add( size_t offset, size_t size, const FileCache::UserInfo & user, - const CacheGuard::Lock & lock) + const CachePriorityGuard::Lock & lock) { auto it = getPriority().add(key_metadata, offset, size, user, lock); auto [_, inserted] = records.emplace(FileCacheKeyAndOffset{key_metadata->key, offset}, it); @@ -87,7 +87,7 @@ void FileCacheQueryLimit::QueryContext::add( void FileCacheQueryLimit::QueryContext::remove( const Key & key, size_t offset, - const CacheGuard::Lock & lock) + const CachePriorityGuard::Lock & lock) { auto record = records.find({key, offset}); if (record == records.end()) @@ -100,7 +100,7 @@ void FileCacheQueryLimit::QueryContext::remove( IFileCachePriority::IteratorPtr FileCacheQueryLimit::QueryContext::tryGet( const Key & key, size_t offset, - const CacheGuard::Lock &) + const CachePriorityGuard::Lock &) { auto it = records.find({key, offset}); if (it == records.end()) diff --git a/src/Interpreters/Cache/QueryLimit.h b/src/Interpreters/Cache/QueryLimit.h index 419126601f0..7553eff82ba 100644 --- a/src/Interpreters/Cache/QueryLimit.h +++ b/src/Interpreters/Cache/QueryLimit.h @@ -13,14 +13,14 @@ public: class QueryContext; using QueryContextPtr = std::shared_ptr; - QueryContextPtr tryGetQueryContext(const CacheGuard::Lock & lock); + QueryContextPtr tryGetQueryContext(const CachePriorityGuard::Lock & lock); QueryContextPtr getOrSetQueryContext( const std::string & query_id, const ReadSettings & settings, - const CacheGuard::Lock &); + const CachePriorityGuard::Lock &); - void removeQueryContext(const std::string & query_id, const CacheGuard::Lock &); + void removeQueryContext(const std::string & query_id, const CachePriorityGuard::Lock &); class QueryContext { @@ -38,19 +38,19 @@ public: Priority::IteratorPtr tryGet( const Key & key, size_t offset, - const CacheGuard::Lock &); + const CachePriorityGuard::Lock &); void add( KeyMetadataPtr key_metadata, size_t offset, size_t size, const FileCacheUserInfo & user, - const CacheGuard::Lock &); + const CachePriorityGuard::Lock &); void remove( const Key & key, size_t offset, - const CacheGuard::Lock &); + const CachePriorityGuard::Lock &); private: using Records = std::unordered_map; diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp index 43f1c1012ba..1767cb94be7 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp @@ -34,19 +34,19 @@ SLRUFileCachePriority::SLRUFileCachePriority( probationary_queue.max_size, protected_queue.max_elements); } -size_t SLRUFileCachePriority::getSize(const CacheGuard::Lock & lock) const +size_t SLRUFileCachePriority::getSize(const CachePriorityGuard::Lock & lock) const { return protected_queue.getSize(lock) + probationary_queue.getSize(lock); } -size_t SLRUFileCachePriority::getElementsCount(const CacheGuard::Lock & lock) const +size_t SLRUFileCachePriority::getElementsCount(const CachePriorityGuard::Lock & lock) const { return protected_queue.getElementsCount(lock) + probationary_queue.getElementsCount(lock); } bool SLRUFileCachePriority::canFit( /// NOLINT size_t size, - const CacheGuard::Lock & lock, + const CachePriorityGuard::Lock & lock, IteratorPtr reservee, bool best_effort) const { @@ -70,7 +70,7 @@ IFileCachePriority::IteratorPtr SLRUFileCachePriority::add( /// NOLINT size_t offset, size_t size, const UserInfo &, - const CacheGuard::Lock & lock, + const CachePriorityGuard::Lock & lock, bool is_startup) { if (is_startup) @@ -103,7 +103,7 @@ bool SLRUFileCachePriority::collectCandidatesForEviction( IFileCachePriority::IteratorPtr reservee, FinalizeEvictionFunc & finalize_eviction_func, const UserID & user_id, - const CacheGuard::Lock & lock) + const CachePriorityGuard::Lock & lock) { /// If `it` is nullptr, then it is the first space reservation attempt /// for a corresponding file segment, so it will be directly put into probationary queue. @@ -143,7 +143,7 @@ bool SLRUFileCachePriority::collectCandidatesForEviction( && !probationary_queue.collectCandidatesForEviction(size_to_downgrade, stat, res, reservee, noop, user_id, lock)) return false; - finalize_eviction_func = [=, this](const CacheGuard::Lock & lk) mutable + finalize_eviction_func = [=, this](const CachePriorityGuard::Lock & lk) mutable { for (const auto & [key, key_candidates] : *downgrade_candidates) { @@ -159,7 +159,7 @@ bool SLRUFileCachePriority::collectCandidatesForEviction( return true; } -void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const CacheGuard::Lock & lock) +void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const CachePriorityGuard::Lock & lock) { /// If entry is already in protected queue, /// we only need to increase its priority within the protected queue. @@ -242,7 +242,7 @@ void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const Cach iterator.is_protected = true; } -IFileCachePriority::PriorityDumpPtr SLRUFileCachePriority::dump(const CacheGuard::Lock & lock) +IFileCachePriority::PriorityDumpPtr SLRUFileCachePriority::dump(const CachePriorityGuard::Lock & lock) { auto res = dynamic_pointer_cast(probationary_queue.dump(lock)); auto part_res = dynamic_pointer_cast(protected_queue.dump(lock)); @@ -250,14 +250,14 @@ IFileCachePriority::PriorityDumpPtr SLRUFileCachePriority::dump(const CacheGuard return res; } -void SLRUFileCachePriority::shuffle(const CacheGuard::Lock & lock) +void SLRUFileCachePriority::shuffle(const CachePriorityGuard::Lock & lock) { protected_queue.shuffle(lock); probationary_queue.shuffle(lock); } bool SLRUFileCachePriority::modifySizeLimits( - size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock & lock) + size_t max_size_, size_t max_elements_, double size_ratio_, const CachePriorityGuard::Lock & lock) { if (max_size == max_size_ && max_elements == max_elements_ && size_ratio == size_ratio_) return false; /// Nothing to change. @@ -287,7 +287,7 @@ SLRUFileCachePriority::EntryPtr SLRUFileCachePriority::SLRUIterator::getEntry() return entry; } -size_t SLRUFileCachePriority::SLRUIterator::increasePriority(const CacheGuard::Lock & lock) +size_t SLRUFileCachePriority::SLRUIterator::increasePriority(const CachePriorityGuard::Lock & lock) { assertValid(); cache_priority->increasePriority(*this, lock); @@ -306,7 +306,7 @@ void SLRUFileCachePriority::SLRUIterator::invalidate() lru_iterator.invalidate(); } -void SLRUFileCachePriority::SLRUIterator::remove(const CacheGuard::Lock & lock) +void SLRUFileCachePriority::SLRUIterator::remove(const CachePriorityGuard::Lock & lock) { assertValid(); lru_iterator.remove(lock); diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h index d97fa80a6c7..d81ce1bc480 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.h +++ b/src/Interpreters/Cache/SLRUFileCachePriority.h @@ -21,13 +21,13 @@ public: LRUFileCachePriority::StatePtr probationary_state_ = nullptr, LRUFileCachePriority::StatePtr protected_state_ = nullptr); - size_t getSize(const CacheGuard::Lock & lock) const override; + size_t getSize(const CachePriorityGuard::Lock & lock) const override; - size_t getElementsCount(const CacheGuard::Lock &) const override; + size_t getElementsCount(const CachePriorityGuard::Lock &) const override; bool canFit( /// NOLINT size_t size, - const CacheGuard::Lock &, + const CachePriorityGuard::Lock &, IteratorPtr reservee = nullptr, bool best_effort = false) const override; @@ -36,7 +36,7 @@ public: size_t offset, size_t size, const UserInfo & user, - const CacheGuard::Lock &, + const CachePriorityGuard::Lock &, bool is_startup = false) override; bool collectCandidatesForEviction( @@ -46,13 +46,13 @@ public: IFileCachePriority::IteratorPtr reservee, FinalizeEvictionFunc & finalize_eviction_func, const UserID & user_id, - const CacheGuard::Lock &) override; + const CachePriorityGuard::Lock &) override; - void shuffle(const CacheGuard::Lock &) override; + void shuffle(const CachePriorityGuard::Lock &) override; - PriorityDumpPtr dump(const CacheGuard::Lock &) override; + PriorityDumpPtr dump(const CachePriorityGuard::Lock &) override; - bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock &) override; + bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CachePriorityGuard::Lock &) override; private: double size_ratio; @@ -60,7 +60,7 @@ private: LRUFileCachePriority probationary_queue; LoggerPtr log = getLogger("SLRUFileCachePriority"); - void increasePriority(SLRUIterator & iterator, const CacheGuard::Lock & lock); + void increasePriority(SLRUIterator & iterator, const CachePriorityGuard::Lock & lock); }; class SLRUFileCachePriority::SLRUIterator : public IFileCachePriority::Iterator @@ -74,9 +74,9 @@ public: EntryPtr getEntry() const override; - size_t increasePriority(const CacheGuard::Lock &) override; + size_t increasePriority(const CachePriorityGuard::Lock &) override; - void remove(const CacheGuard::Lock &) override; + void remove(const CachePriorityGuard::Lock &) override; void invalidate() override; diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index fe2baea6b4e..e10f3ecfbc9 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -154,7 +154,8 @@ void DDLLogEntry::parse(const String & data) rb >> "settings: " >> settings_str >> "\n"; ParserSetQuery parser{true}; constexpr UInt64 max_depth = 16; - ASTPtr settings_ast = parseQuery(parser, settings_str, Context::getGlobalContextInstance()->getSettingsRef().max_query_size, max_depth); + constexpr UInt64 max_backtracks = DBMS_DEFAULT_MAX_PARSER_BACKTRACKS; + ASTPtr settings_ast = parseQuery(parser, settings_str, Context::getGlobalContextInstance()->getSettingsRef().max_query_size, max_depth, max_backtracks); settings.emplace(std::move(settings_ast->as()->changes)); } } @@ -197,7 +198,7 @@ void DDLTaskBase::parseQueryFromEntry(ContextPtr context) ParserQuery parser_query(end, settings.allow_settings_after_format_in_insert); String description; - query = parseQuery(parser_query, begin, end, description, 0, settings.max_parser_depth); + query = parseQuery(parser_query, begin, end, description, 0, settings.max_parser_depth, settings.max_parser_backtracks); } void DDLTaskBase::formatRewrittenQuery(ContextPtr context) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index a9fd5c852ba..9d9f418934f 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1143,7 +1143,7 @@ void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id) TableMarkedAsDropped dropped_table; { std::lock_guard lock(tables_marked_dropped_mutex); - time_t latest_drop_time = std::numeric_limits::min(); + auto latest_drop_time = std::numeric_limits::min(); auto it_dropped_table = tables_marked_dropped.end(); for (auto it = tables_marked_dropped.begin(); it != tables_marked_dropped.end(); ++it) { @@ -1168,7 +1168,7 @@ void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id) } if (it_dropped_table == tables_marked_dropped.end()) throw Exception(ErrorCodes::UNKNOWN_TABLE, - "The drop task of table {} is in progress, has been dropped or the database engine doesn't support it", + "Table {} is being dropped, has been dropped, or the database engine does not support UNDROP", table_id.getNameForLogs()); latest_metadata_dropped_path = it_dropped_table->metadata_path; String table_metadata_path = getPathForMetadata(it_dropped_table->table_id); diff --git a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp index 60110916760..fed29b410db 100644 --- a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp +++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp @@ -96,7 +96,7 @@ static ASTPtr parseAdditionalPostFilter(const Context & context) ParserExpression parser; return parseQuery( parser, filter.data(), filter.data() + filter.size(), - "additional filter", settings.max_query_size, settings.max_parser_depth); + "additional filter", settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); } static ActionsDAGPtr makeAdditionalPostFilter(ASTPtr & ast, ContextPtr context, const Block & header) diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index b768593da98..7acaf95becc 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -71,11 +72,15 @@ BlockIO InterpreterAlterQuery::execute() BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) { + ASTSelectWithUnionQuery * modify_query = nullptr; + for (auto & child : alter.command_list->children) { auto * command_ast = child->as(); if (command_ast->sql_security) InterpreterCreateQuery::processSQLSecurityOption(getContext(), command_ast->sql_security->as()); + else if (command_ast->type == ASTAlterCommand::MODIFY_QUERY) + modify_query = command_ast->select->as(); } BlockIO res; @@ -123,6 +128,12 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only"); auto table_lock = table->lockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout); + if (modify_query) + { + // Expand CTE before filling default database + ApplyWithSubqueryVisitor().visit(*modify_query); + } + /// Add default database to table identifiers that we can encounter in e.g. default expressions, mutation expression, etc. AddDefaultDatabaseVisitor visitor(getContext(), table_id.getDatabaseName()); ASTPtr command_list_ptr = alter.command_list->ptr(); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index edd7452c130..2a08e8458a4 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -381,7 +381,7 @@ ASTPtr InterpreterCreateQuery::formatColumns(const NamesAndTypesList & columns) String type_name = column.type->getName(); const char * pos = type_name.data(); const char * end = pos + type_name.size(); - column_declaration->type = parseQuery(type_parser, pos, end, "data type", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + column_declaration->type = parseQuery(type_parser, pos, end, "data type", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); columns_list->children.emplace_back(column_declaration); } @@ -401,7 +401,7 @@ ASTPtr InterpreterCreateQuery::formatColumns(const NamesAndTypesList & columns, String type_name = alias_column.type->getName(); const char * type_pos = type_name.data(); const char * type_end = type_pos + type_name.size(); - column_declaration->type = parseQuery(type_parser, type_pos, type_end, "data type", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + column_declaration->type = parseQuery(type_parser, type_pos, type_end, "data type", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); column_declaration->default_specifier = "ALIAS"; @@ -409,7 +409,7 @@ ASTPtr InterpreterCreateQuery::formatColumns(const NamesAndTypesList & columns, const char * alias_pos = alias.data(); const char * alias_end = alias_pos + alias.size(); ParserExpression expression_parser; - column_declaration->default_expression = parseQuery(expression_parser, alias_pos, alias_end, "expression", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + column_declaration->default_expression = parseQuery(expression_parser, alias_pos, alias_end, "expression", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); column_declaration->children.push_back(column_declaration->default_expression); columns_list->children.emplace_back(column_declaration); @@ -433,7 +433,7 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns) String type_name = column.type->getName(); const char * type_name_pos = type_name.data(); const char * type_name_end = type_name_pos + type_name.size(); - column_declaration->type = parseQuery(type_parser, type_name_pos, type_name_end, "data type", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + column_declaration->type = parseQuery(type_parser, type_name_pos, type_name_end, "data type", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); if (column.default_desc.expression) { @@ -1852,10 +1852,12 @@ void InterpreterCreateQuery::addColumnsDescriptionToCreateQueryIfNecessary(ASTCr auto ast_storage = std::make_shared(); unsigned max_parser_depth = static_cast(getContext()->getSettingsRef().max_parser_depth); + unsigned max_parser_backtracks = static_cast(getContext()->getSettingsRef().max_parser_backtracks); auto query_from_storage = DB::getCreateQueryFromStorage(storage, ast_storage, false, max_parser_depth, + max_parser_backtracks, true); auto & create_query_from_storage = query_from_storage->as(); diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 97ae9649ae8..8fb0dabb5b5 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -97,7 +97,8 @@ BlockIO InterpreterDeleteQuery::execute() alter_query.data() + alter_query.size(), "ALTER query", 0, - DBMS_DEFAULT_MAX_PARSER_DEPTH); + DBMS_DEFAULT_MAX_PARSER_DEPTH, + DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); auto context = Context::createCopy(getContext()); context->setSetting("mutations_sync", 2); /// Lightweight delete is always synchronous diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 711100b5de1..72aa4cc63e3 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -417,7 +417,7 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, uuids_to_wait.push_back(table_to_wait); } } - // only if operation is DETACH + // only if operation is DETACH if ((!drop || !truncate) && query.sync) { /// Avoid "some tables are still in use" when sync mode is enabled diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 86196270ed1..26dae6a1df3 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -281,7 +281,7 @@ BlockIO InterpreterKillQueryQuery::execute() const auto with_round_bracket = alter_command.front() == '('; ParserAlterCommand parser{with_round_bracket}; auto command_ast - = parseQuery(parser, alter_command, 0, getContext()->getSettingsRef().max_parser_depth); + = parseQuery(parser, alter_command, 0, getContext()->getSettingsRef().max_parser_depth, getContext()->getSettingsRef().max_parser_backtracks); required_access_rights = InterpreterAlterQuery::getRequiredAccessForCommand( command_ast->as(), table_id.database_name, table_id.table_name); if (!access->isGranted(required_access_rights)) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index a314492c5b0..7c87dadfce6 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -96,6 +96,7 @@ #include #include #include +#include namespace ProfileEvents @@ -160,7 +161,7 @@ FilterDAGInfoPtr generateFilterActions( { ParserExpression expr_parser; /// We should add back quotes around column name as it can contain dots. - expr_list->children.push_back(parseQuery(expr_parser, backQuoteIfNeed(column_str), 0, context->getSettingsRef().max_parser_depth)); + expr_list->children.push_back(parseQuery(expr_parser, backQuoteIfNeed(column_str), 0, context->getSettingsRef().max_parser_depth, context->getSettingsRef().max_parser_backtracks)); } select_ast->setExpression(ASTSelectQuery::Expression::TABLES, std::make_shared()); @@ -331,7 +332,7 @@ ASTPtr parseAdditionalFilterConditionForTable( const auto & settings = context.getSettingsRef(); return parseQuery( parser, filter.data(), filter.data() + filter.size(), - "additional filter", settings.max_query_size, settings.max_parser_depth); + "additional filter", settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); } } @@ -2553,10 +2554,13 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc /// If necessary, we request more sources than the number of threads - to distribute the work evenly over the threads. if (max_streams > 1 && !is_sync_remote) { - if (auto streams_with_ratio = max_streams * settings.max_streams_to_max_threads_ratio; streams_with_ratio < SIZE_MAX) + if (auto streams_with_ratio = max_streams * settings.max_streams_to_max_threads_ratio; canConvertTo(streams_with_ratio)) max_streams = static_cast(streams_with_ratio); else - throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Exceeded limit for `max_streams` with `max_streams_to_max_threads_ratio`. Make sure that `max_streams * max_streams_to_max_threads_ratio` not exceeds {}, current value: {}", SIZE_MAX, streams_with_ratio); + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "Exceeded limit for `max_streams` with `max_streams_to_max_threads_ratio`. " + "Make sure that `max_streams * max_streams_to_max_threads_ratio` is in some reasonable boundaries, current value: {}", + streams_with_ratio); } auto & prewhere_info = analysis_result.prewhere_info; diff --git a/src/Interpreters/InterpreterUndropQuery.cpp b/src/Interpreters/InterpreterUndropQuery.cpp index 8401c47df6b..f628a656947 100644 --- a/src/Interpreters/InterpreterUndropQuery.cpp +++ b/src/Interpreters/InterpreterUndropQuery.cpp @@ -17,14 +17,16 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; } -InterpreterUndropQuery::InterpreterUndropQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) : WithMutableContext(context_), query_ptr(query_ptr_) +InterpreterUndropQuery::InterpreterUndropQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) + : WithMutableContext(context_) + , query_ptr(query_ptr_) { } - BlockIO InterpreterUndropQuery::execute() { getContext()->checkAccess(AccessType::UNDROP_TABLE); + auto & undrop = query_ptr->as(); if (!undrop.cluster.empty() && !maybeRemoveOnCluster(query_ptr, getContext())) { diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 6251a9604e1..5c4ae528fc1 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -43,7 +43,7 @@ ASTPtr makeSubqueryTemplate(const String & table_alias) String query_template = "(select * from _t)"; if (!table_alias.empty()) query_template += " as " + table_alias; - ASTPtr subquery_template = parseQuery(parser, query_template, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr subquery_template = parseQuery(parser, query_template, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); if (!subquery_template) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot parse subquery template"); return subquery_template; diff --git a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp index 9f6e9b930fd..6d6077a0295 100644 --- a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp +++ b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp @@ -19,7 +19,7 @@ using namespace DB; static inline ASTPtr tryRewrittenCreateQuery(const String & query, ContextPtr context) { ParserExternalDDLQuery external_ddl_parser; - ASTPtr ast = parseQuery(external_ddl_parser, "EXTERNAL DDL FROM MySQL(test_database, test_database) " + query, 0, 0); + ASTPtr ast = parseQuery(external_ddl_parser, "EXTERNAL DDL FROM MySQL(test_database, test_database) " + query, 0, 0, 0); return MySQLInterpreter::InterpreterCreateImpl::getRewrittenQueries( *ast->as()->external_ddl->as(), diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index a74b5c67726..e4cbbb8f5f7 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -216,7 +216,7 @@ std::shared_ptr createSystemLog( /// Validate engine definition syntax to prevent some configuration errors. ParserStorageWithComment storage_parser; auto storage_ast = parseQuery(storage_parser, log_settings.engine.data(), log_settings.engine.data() + log_settings.engine.size(), - "Storage to create table for " + config_prefix, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + "Storage to create table for " + config_prefix, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); auto & storage_with_comment = storage_ast->as(); /// Add comment to AST. So it will be saved when the table will be renamed. @@ -647,7 +647,7 @@ ASTPtr SystemLog::getCreateTableQuery() ASTPtr storage_with_comment_ast = parseQuery( storage_parser, storage_def.data(), storage_def.data() + storage_def.size(), - "Storage to create table for " + LogElement::name(), 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + "Storage to create table for " + LogElement::name(), 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); StorageWithComment & storage_with_comment = storage_with_comment_ast->as(); diff --git a/src/Interpreters/examples/CMakeLists.txt b/src/Interpreters/examples/CMakeLists.txt index 11c219ff64e..8bb7f9eeb98 100644 --- a/src/Interpreters/examples/CMakeLists.txt +++ b/src/Interpreters/examples/CMakeLists.txt @@ -1,35 +1,35 @@ clickhouse_add_executable (hash_map hash_map.cpp) -target_link_libraries (hash_map PRIVATE dbms ch_contrib::sparsehash) +target_link_libraries (hash_map PRIVATE dbms clickhouse_functions ch_contrib::sparsehash) clickhouse_add_executable (hash_map_lookup hash_map_lookup.cpp) -target_link_libraries (hash_map_lookup PRIVATE dbms) +target_link_libraries (hash_map_lookup PRIVATE clickhouse_common_io clickhouse_compression) clickhouse_add_executable (hash_map3 hash_map3.cpp) -target_link_libraries (hash_map3 PRIVATE dbms ch_contrib::farmhash ch_contrib::metrohash) +target_link_libraries (hash_map3 PRIVATE clickhouse_common_io clickhouse_compression ch_contrib::farmhash ch_contrib::metrohash) clickhouse_add_executable (hash_map_string hash_map_string.cpp) -target_link_libraries (hash_map_string PRIVATE dbms ch_contrib::sparsehash) +target_link_libraries (hash_map_string PRIVATE clickhouse_common_io clickhouse_compression ch_contrib::sparsehash) clickhouse_add_executable (hash_map_string_2 hash_map_string_2.cpp) -target_link_libraries (hash_map_string_2 PRIVATE dbms) +target_link_libraries (hash_map_string_2 PRIVATE clickhouse_common_io clickhouse_compression) clickhouse_add_executable (hash_map_string_3 hash_map_string_3.cpp) -target_link_libraries (hash_map_string_3 PRIVATE dbms ch_contrib::farmhash ch_contrib::metrohash) +target_link_libraries (hash_map_string_3 PRIVATE clickhouse_common_io clickhouse_compression ch_contrib::farmhash ch_contrib::metrohash) clickhouse_add_executable (hash_map_string_small hash_map_string_small.cpp) -target_link_libraries (hash_map_string_small PRIVATE dbms ch_contrib::sparsehash) +target_link_libraries (hash_map_string_small PRIVATE clickhouse_common_io clickhouse_compression ch_contrib::sparsehash) clickhouse_add_executable (string_hash_map string_hash_map.cpp) -target_link_libraries (string_hash_map PRIVATE dbms ch_contrib::sparsehash) +target_link_libraries (string_hash_map PRIVATE clickhouse_common_io clickhouse_compression ch_contrib::sparsehash) clickhouse_add_executable (string_hash_map_aggregation string_hash_map.cpp) -target_link_libraries (string_hash_map_aggregation PRIVATE dbms) +target_link_libraries (string_hash_map_aggregation PRIVATE clickhouse_common_io clickhouse_compression) clickhouse_add_executable (string_hash_set string_hash_set.cpp) -target_link_libraries (string_hash_set PRIVATE dbms) +target_link_libraries (string_hash_set PRIVATE clickhouse_common_io clickhouse_compression) clickhouse_add_executable (two_level_hash_map two_level_hash_map.cpp) -target_link_libraries (two_level_hash_map PRIVATE dbms ch_contrib::sparsehash) +target_link_libraries (two_level_hash_map PRIVATE clickhouse_common_io clickhouse_compression ch_contrib::sparsehash) clickhouse_add_executable (jit_example jit_example.cpp) target_link_libraries (jit_example PRIVATE dbms) diff --git a/src/Interpreters/examples/hash_map.cpp b/src/Interpreters/examples/hash_map.cpp index b55f174678e..0a91d00809f 100644 --- a/src/Interpreters/examples/hash_map.cpp +++ b/src/Interpreters/examples/hash_map.cpp @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Interpreters/examples/hash_map_lookup.cpp b/src/Interpreters/examples/hash_map_lookup.cpp index fd6b231cf73..829a234c537 100644 --- a/src/Interpreters/examples/hash_map_lookup.cpp +++ b/src/Interpreters/examples/hash_map_lookup.cpp @@ -8,10 +8,8 @@ #define DBMS_HASH_MAP_DEBUG_RESIZES #include -#include #include #include -#include #include #include diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 88021038ebb..7dd46534fdf 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -746,18 +746,18 @@ static std::tuple executeQueryImpl( { ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); /// TODO: parser should fail early when max_query_size limit is reached. - ast = parseKQLQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + ast = parseKQLQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); } else if (settings.dialect == Dialect::prql && !internal) { - ParserPRQLQuery parser(max_query_size, settings.max_parser_depth); - ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + ParserPRQLQuery parser(max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); } else { ParserQuery parser(end, settings.allow_settings_after_format_in_insert); /// TODO: parser should fail early when max_query_size limit is reached. - ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); #ifndef NDEBUG /// Verify that AST formatting is consistent: @@ -774,7 +774,7 @@ static std::tuple executeQueryImpl( ast2 = parseQuery(parser, formatted1.data(), formatted1.data() + formatted1.size(), - "", new_max_query_size, settings.max_parser_depth); + "", new_max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); } catch (const Exception & e) { diff --git a/src/Interpreters/fuzzers/execute_query_fuzzer.cpp b/src/Interpreters/fuzzers/execute_query_fuzzer.cpp index edff202d547..a02ce66e6b5 100644 --- a/src/Interpreters/fuzzers/execute_query_fuzzer.cpp +++ b/src/Interpreters/fuzzers/execute_query_fuzzer.cpp @@ -25,6 +25,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) auto initialize = [&]() mutable { + if (context) + return true; + shared_context = Context::createShared(); context = Context::createGlobal(shared_context.get()); context->makeGlobalContext(); diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp index 1295a4d5a75..d78b6ab0c4d 100644 --- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp +++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp @@ -122,7 +122,7 @@ ASTPtr parseCustomKeyForTable(const String & custom_key, const Context & context const auto & settings = context.getSettingsRef(); return parseQuery( parser, custom_key.data(), custom_key.data() + custom_key.size(), - "parallel replicas custom key", settings.max_query_size, settings.max_parser_depth); + "parallel replicas custom key", settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); } } diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 2723eb37350..226472175b3 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -55,9 +55,11 @@ static void executeCreateQuery( bool create, bool has_force_restore_data_flag) { + const Settings & settings = context->getSettingsRef(); ParserCreateQuery parser; ASTPtr ast = parseQuery( - parser, query.data(), query.data() + query.size(), "in file " + file_name, 0, context->getSettingsRef().max_parser_depth); + parser, query.data(), query.data() + query.size(), "in file " + file_name, + 0, settings.max_parser_depth, settings.max_parser_backtracks); auto & ast_create_query = ast->as(); ast_create_query.setDatabase(database); diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index 78b72022a9a..30a41c090d5 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -115,7 +115,7 @@ ColumnsDescription parseColumnsListFromString(const std::string & structure, con ParserColumnDeclarationList parser(true, true); const Settings & settings = context->getSettingsRef(); - ASTPtr columns_list_raw = parseQuery(parser, structure, "columns declaration list", settings.max_query_size, settings.max_parser_depth); + ASTPtr columns_list_raw = parseQuery(parser, structure, "columns declaration list", settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); auto * columns_list = dynamic_cast(columns_list_raw.get()); if (!columns_list) @@ -136,7 +136,7 @@ bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescrip const char * start = structure.data(); const char * end = structure.data() + structure.size(); ASTPtr columns_list_raw = tryParseQuery( - parser, start, end, error, false, "columns declaration list", false, settings.max_query_size, settings.max_parser_depth); + parser, start, end, error, false, "columns declaration list", false, settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks, true); if (!columns_list_raw) return false; diff --git a/src/Interpreters/tests/gtest_comparison_graph.cpp b/src/Interpreters/tests/gtest_comparison_graph.cpp index 96a78241c8e..ac24a8de368 100644 --- a/src/Interpreters/tests/gtest_comparison_graph.cpp +++ b/src/Interpreters/tests/gtest_comparison_graph.cpp @@ -12,7 +12,7 @@ using namespace DB; static ComparisonGraph getGraph(const String & query) { ParserExpressionList parser(false); - ASTPtr ast = parseQuery(parser, query, 0, 0); + ASTPtr ast = parseQuery(parser, query, 0, 0, 0); return ComparisonGraph(ast->children); } diff --git a/src/Interpreters/tests/gtest_cycle_aliases.cpp b/src/Interpreters/tests/gtest_cycle_aliases.cpp index 2bdeac90f8f..5ff3fbe1c2d 100644 --- a/src/Interpreters/tests/gtest_cycle_aliases.cpp +++ b/src/Interpreters/tests/gtest_cycle_aliases.cpp @@ -14,10 +14,10 @@ TEST(QueryNormalizer, SimpleLoopAlias) { String query = "a as a"; ParserExpressionList parser(false); - ASTPtr ast = parseQuery(parser, query, 0, 0); + ASTPtr ast = parseQuery(parser, query, 0, 0, 0); Aliases aliases; - aliases["a"] = parseQuery(parser, "a as a", 0, 0)->children[0]; + aliases["a"] = parseQuery(parser, "a as a", 0, 0, 0)->children[0]; Settings settings; QueryNormalizer::Data normalizer_data(aliases, {}, false, settings, false); @@ -28,11 +28,11 @@ TEST(QueryNormalizer, SimpleCycleAlias) { String query = "a as b, b as a"; ParserExpressionList parser(false); - ASTPtr ast = parseQuery(parser, query, 0, 0); + ASTPtr ast = parseQuery(parser, query, 0, 0, 0); Aliases aliases; - aliases["a"] = parseQuery(parser, "b as a", 0, 0)->children[0]; - aliases["b"] = parseQuery(parser, "a as b", 0, 0)->children[0]; + aliases["a"] = parseQuery(parser, "b as a", 0, 0, 0)->children[0]; + aliases["b"] = parseQuery(parser, "a as b", 0, 0, 0)->children[0]; Settings settings; QueryNormalizer::Data normalizer_data(aliases, {}, false, settings, true); diff --git a/src/Interpreters/tests/gtest_table_overrides.cpp b/src/Interpreters/tests/gtest_table_overrides.cpp index 779bc7a53a4..09aa2e1f37f 100644 --- a/src/Interpreters/tests/gtest_table_overrides.cpp +++ b/src/Interpreters/tests/gtest_table_overrides.cpp @@ -34,11 +34,11 @@ TEST_P(TableOverrideTest, applyOverrides) const auto & [database_query, table_query, expected_query] = GetParam(); ParserCreateQuery parser; ASTPtr database_ast; - ASSERT_NO_THROW(database_ast = parseQuery(parser, database_query, 0, 0)); + ASSERT_NO_THROW(database_ast = parseQuery(parser, database_query, 0, 0, 0)); auto * database = database_ast->as(); ASSERT_NE(nullptr, database); ASTPtr table_ast; - ASSERT_NO_THROW(table_ast = parseQuery(parser, table_query, 0, 0)); + ASSERT_NO_THROW(table_ast = parseQuery(parser, table_query, 0, 0, 0)); auto * table = table_ast->as(); ASSERT_NE(nullptr, table); auto table_name = table->table->as()->name(); diff --git a/src/Parsers/ASTRenameQuery.h b/src/Parsers/ASTRenameQuery.h index beaf93c4761..d51c382f374 100644 --- a/src/Parsers/ASTRenameQuery.h +++ b/src/Parsers/ASTRenameQuery.h @@ -94,7 +94,24 @@ public: ASTPtr clone() const override { auto res = std::make_shared(*this); - res->cloneChildren(); + res->children.clear(); + + auto clone_child = [&res](ASTPtr & node) + { + if (node) + { + node = node->clone(); + res->children.push_back(node); + } + }; + + for (auto & elem : res->elements) + { + clone_child(elem.from.database); + clone_child(elem.from.table); + clone_child(elem.to.database); + clone_child(elem.to.table); + } cloneOutputOptions(*res); return res; } @@ -108,9 +125,15 @@ public: for (Element & elem : query.elements) { if (!elem.from.database) + { elem.from.database = std::make_shared(params.default_database); + query.children.push_back(elem.from.database); + } if (!elem.to.database) + { elem.to.database = std::make_shared(params.default_database); + query.children.push_back(elem.to.database); + } } return query_ptr; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 6d267a7d215..1e7d0158878 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -1918,7 +1918,7 @@ public: && string_literal->as().value.tryGet(literal)) { Tokens tokens(literal.data(), literal.data() + literal.size()); - IParser::Pos token_pos(tokens, 0); + IParser::Pos token_pos(tokens, pos.max_depth, pos.max_backtracks); Expected token_expected; ASTPtr expr; diff --git a/src/Parsers/IParser.cpp b/src/Parsers/IParser.cpp new file mode 100644 index 00000000000..41981a4bb8a --- /dev/null +++ b/src/Parsers/IParser.cpp @@ -0,0 +1,35 @@ +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int TOO_SLOW_PARSING; +} + +IParser::Pos & IParser::Pos::operator=(const IParser::Pos & rhs) +{ + depth = rhs.depth; + max_depth = rhs.max_depth; + + if (rhs.backtracks > backtracks) + backtracks = rhs.backtracks; + + max_backtracks = rhs.max_backtracks; + + if (rhs < *this) + { + ++backtracks; + if (max_backtracks && backtracks > max_backtracks) + throw Exception(ErrorCodes::TOO_SLOW_PARSING, "Maximum amount of backtracking ({}) exceeded in the parser. " + "Consider rising max_parser_backtracks parameter.", max_backtracks); + } + + TokenIterator::operator=(rhs); + + return *this; +} + +} diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index 198ec0346ff..291f8ee7d44 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -62,11 +62,18 @@ public: uint32_t depth = 0; uint32_t max_depth = 0; - Pos(Tokens & tokens_, uint32_t max_depth_) : TokenIterator(tokens_), max_depth(max_depth_) + uint32_t backtracks = 0; + uint32_t max_backtracks = 0; + + Pos(Tokens & tokens_, uint32_t max_depth_, uint32_t max_backtracks_) + : TokenIterator(tokens_), max_depth(max_depth_), max_backtracks(max_backtracks_) { } - Pos(TokenIterator token_iterator_, uint32_t max_depth_) : TokenIterator(token_iterator_), max_depth(max_depth_) { } + Pos(TokenIterator token_iterator_, uint32_t max_depth_, uint32_t max_backtracks_) + : TokenIterator(token_iterator_), max_depth(max_depth_), max_backtracks(max_backtracks_) + { + } ALWAYS_INLINE void increaseDepth() { @@ -97,6 +104,10 @@ public: throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error in parser: incorrect calculation of parse depth"); --depth; } + + Pos(const Pos & rhs) = default; + + Pos & operator=(const Pos & rhs); }; /** Get the text of this parser parses. */ diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 152c29e5941..1d77007a37c 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -279,13 +279,13 @@ String IParserKQLFunction::getKQLFunctionName(IParser::Pos & pos) } String IParserKQLFunction::kqlCallToExpression( - const std::string_view function_name, const std::initializer_list params, const uint32_t max_depth) + const std::string_view function_name, const std::initializer_list params, uint32_t max_depth, uint32_t max_backtracks) { - return kqlCallToExpression(function_name, std::span(params), max_depth); + return kqlCallToExpression(function_name, std::span(params), max_depth, max_backtracks); } String IParserKQLFunction::kqlCallToExpression( - const std::string_view function_name, const std::span params, const uint32_t max_depth) + const std::string_view function_name, const std::span params, uint32_t max_depth, uint32_t max_backtracks) { const auto params_str = std::accumulate( std::cbegin(params), @@ -302,7 +302,7 @@ String IParserKQLFunction::kqlCallToExpression( const auto kql_call = std::format("{}({})", function_name, params_str); DB::Tokens call_tokens(kql_call.c_str(), kql_call.c_str() + kql_call.length()); - DB::IParser::Pos tokens_pos(call_tokens, max_depth); + DB::IParser::Pos tokens_pos(call_tokens, max_depth, max_backtracks); return DB::IParserKQLFunction::getExpression(tokens_pos); } diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index 147436551f9..f5069e80745 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -77,8 +77,8 @@ public: static std::optional getOptionalArgument(const String & function_name, DB::IParser::Pos & pos, ArgumentState argument_state = ArgumentState::Parsed); static String - kqlCallToExpression(std::string_view function_name, std::initializer_list params, uint32_t max_depth); - static String kqlCallToExpression(std::string_view function_name, std::span params, uint32_t max_depth); + kqlCallToExpression(std::string_view function_name, std::initializer_list params, uint32_t max_depth, uint32_t max_backtracks); + static String kqlCallToExpression(std::string_view function_name, std::span params, uint32_t max_depth, uint32_t max_backtracks); static String escapeSingleQuotes(const String & input); protected: diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp index b0eec16f56f..87841e295ba 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp @@ -99,7 +99,7 @@ bool ToTimeSpan::convertImpl(String & out, IParser::Pos & pos) ++pos; try { - auto result = kqlCallToExpression("time", {arg}, pos.max_depth); + auto result = kqlCallToExpression("time", {arg}, pos.max_depth, pos.max_backtracks); out = std::format("{}", result); } catch (...) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp index 924ac9f6490..e90be363e4b 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp @@ -99,7 +99,7 @@ bool ArrayRotateRight::convertImpl(String & out, IParser::Pos & pos) const auto array = getArgument(function_name, pos, ArgumentState::Raw); const auto count = getArgument(function_name, pos, ArgumentState::Raw); - out = kqlCallToExpression("array_rotate_left", {array, "-1 * " + count}, pos.max_depth); + out = kqlCallToExpression("array_rotate_left", {array, "-1 * " + count}, pos.max_depth, pos.max_backtracks); return true; } @@ -140,7 +140,7 @@ bool ArrayShiftRight::convertImpl(String & out, IParser::Pos & pos) "array_shift_left", fill ? std::initializer_list{array, negated_count, *fill} : std::initializer_list{array, negated_count}, - pos.max_depth); + pos.max_depth, pos.max_backtracks); return true; } @@ -233,8 +233,8 @@ bool JaccardIndex::convertImpl(String & out, IParser::Pos & pos) const auto rhs = getArgument(function_name, pos, ArgumentState::Raw); out = std::format( "divide(length({0}), length({1}))", - kqlCallToExpression("set_intersect", {lhs, rhs}, pos.max_depth), - kqlCallToExpression("set_union", {lhs, rhs}, pos.max_depth)); + kqlCallToExpression("set_intersect", {lhs, rhs}, pos.max_depth, pos.max_backtracks), + kqlCallToExpression("set_union", {lhs, rhs}, pos.max_depth, pos.max_backtracks)); return true; } @@ -292,7 +292,7 @@ bool SetDifference::convertImpl(String & out, IParser::Pos & pos) while (auto next_array = getOptionalArgument(function_name, pos, ArgumentState::Raw)) arrays.push_back(*next_array); - return kqlCallToExpression("set_union", std::vector(arrays.cbegin(), arrays.cend()), pos.max_depth); + return kqlCallToExpression("set_union", std::vector(arrays.cbegin(), arrays.cend()), pos.max_depth, pos.max_backtracks); }); out = std::format("arrayFilter(x -> not has({1}, x), arrayDistinct({0}))", lhs, rhs); diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index 6f853b16fbc..06566dc54ec 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -34,10 +34,10 @@ bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) "sign(IPv4StringToNumOrNull(toString((tupleElement(IPv4CIDRToRange(assumeNotNull(lhs_ip_{5}), " "toUInt8(min2({4}, min2(assumeNotNull(lhs_mask_{5}), assumeNotNull(rhs_mask_{5})))) as mask_{5}), 1))))" " - IPv4StringToNumOrNull(toString((tupleElement(IPv4CIDRToRange(assumeNotNull(rhs_ip_{5}), mask_{5}), 1))))))", - kqlCallToExpression("parse_ipv4", {lhs}, pos.max_depth), - kqlCallToExpression("ipv4_netmask_suffix", {lhs}, pos.max_depth), - kqlCallToExpression("parse_ipv4", {rhs}, pos.max_depth), - kqlCallToExpression("ipv4_netmask_suffix", {rhs}, pos.max_depth), + kqlCallToExpression("parse_ipv4", {lhs}, pos.max_depth, pos.max_backtracks), + kqlCallToExpression("ipv4_netmask_suffix", {lhs}, pos.max_depth, pos.max_backtracks), + kqlCallToExpression("parse_ipv4", {rhs}, pos.max_depth, pos.max_backtracks), + kqlCallToExpression("ipv4_netmask_suffix", {rhs}, pos.max_depth, pos.max_backtracks), mask ? *mask : "32", generateUniqueIdentifier()); return true; @@ -56,8 +56,8 @@ bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) "or isNull({1} as range_start_ip_{3}) or isNull({2} as range_mask_{3}), null, " "bitXor(range_start_ip_{3}, bitAnd(ip_{3}, bitNot(toUInt32(intExp2(toInt32(32 - range_mask_{3})) - 1)))) = 0) ", ip_address, - kqlCallToExpression("parse_ipv4", {ip_range}, pos.max_depth), - kqlCallToExpression("ipv4_netmask_suffix", {ip_range}, pos.max_depth), + kqlCallToExpression("parse_ipv4", {ip_range}, pos.max_depth, pos.max_backtracks), + kqlCallToExpression("ipv4_netmask_suffix", {ip_range}, pos.max_depth, pos.max_backtracks), generateUniqueIdentifier()); return true; } @@ -71,7 +71,7 @@ bool Ipv4IsMatch::convertImpl(String & out, IParser::Pos & pos) const auto lhs = getArgument(function_name, pos, ArgumentState::Raw); const auto rhs = getArgument(function_name, pos, ArgumentState::Raw); const auto mask = getOptionalArgument(function_name, pos, ArgumentState::Raw); - out = std::format("equals({}, 0)", kqlCallToExpression("ipv4_compare", {lhs, rhs, mask ? *mask : "32"}, pos.max_depth)); + out = std::format("equals({}, 0)", kqlCallToExpression("ipv4_compare", {lhs, rhs, mask ? *mask : "32"}, pos.max_depth, pos.max_backtracks)); return true; } @@ -196,7 +196,7 @@ bool Ipv6IsMatch::convertImpl(String & out, IParser::Pos & pos) const auto lhs = getArgument(function_name, pos, ArgumentState::Raw); const auto rhs = getArgument(function_name, pos, ArgumentState::Raw); const auto mask = getOptionalArgument(function_name, pos, ArgumentState::Raw); - out = std::format("equals({}, 0)", kqlCallToExpression("ipv6_compare", {lhs, rhs, mask ? *mask : "128"}, pos.max_depth)); + out = std::format("equals({}, 0)", kqlCallToExpression("ipv6_compare", {lhs, rhs, mask ? *mask : "128"}, pos.max_depth, pos.max_backtracks)); return true; } @@ -228,9 +228,9 @@ bool ParseIpv6Mask::convertImpl(String & out, IParser::Pos & pos) const auto unique_identifier = generateUniqueIdentifier(); out = std::format( "if(empty({0} as ipv4_{3}), {1}, {2})", - kqlCallToExpression("format_ipv4", {"trim_start('::', " + ip_address + ")", mask + " - 96"}, pos.max_depth), - kqlCallToExpression("parse_ipv6", {"strcat(tostring(parse_ipv6(" + ip_address + ")), '/', tostring(" + mask + "))"}, pos.max_depth), - kqlCallToExpression("parse_ipv6", {"ipv4_" + unique_identifier}, pos.max_depth), + kqlCallToExpression("format_ipv4", {"trim_start('::', " + ip_address + ")", mask + " - 96"}, pos.max_depth, pos.max_backtracks), + kqlCallToExpression("parse_ipv6", {"strcat(tostring(parse_ipv6(" + ip_address + ")), '/', tostring(" + mask + "))"}, pos.max_depth, pos.max_backtracks), + kqlCallToExpression("parse_ipv6", {"ipv4_" + unique_identifier}, pos.max_depth, pos.max_backtracks), unique_identifier); return true; } @@ -247,9 +247,9 @@ bool FormatIpv4::convertImpl(String & out, IParser::Pos & pos) "ifNull(if(isNotNull(toUInt32OrNull(toString({0})) as param_as_uint32_{3}) and toTypeName({0}) = 'String' or ({1}) < 0 " "or isNull(ifNull(param_as_uint32_{3}, {2}) as ip_as_number_{3}), null, " "IPv4NumToString(bitAnd(ip_as_number_{3}, bitNot(toUInt32(intExp2(toInt32(32 - ({1}))) - 1))))), '')", - ParserKQLBase::getExprFromToken(ip_address, pos.max_depth), + ParserKQLBase::getExprFromToken(ip_address, pos.max_depth, pos.max_backtracks), mask ? *mask : "32", - kqlCallToExpression("parse_ipv4", {"tostring(" + ip_address + ")"}, pos.max_depth), + kqlCallToExpression("parse_ipv4", {"tostring(" + ip_address + ")"}, pos.max_depth, pos.max_backtracks), generateUniqueIdentifier()); return true; } @@ -266,10 +266,10 @@ bool FormatIpv4Mask::convertImpl(String & out, IParser::Pos & pos) out = std::format( "if(empty({1} as formatted_ip_{2}) or position(toTypeName({0}), 'Int') = 0 or not {0} between 0 and 32, '', " "concat(formatted_ip_{2}, '/', toString(toInt64(min2({0}, ifNull({3} as suffix_{2}, 32))))))", - ParserKQLBase::getExprFromToken(calculated_mask, pos.max_depth), - kqlCallToExpression("format_ipv4", {ip_address, calculated_mask}, pos.max_depth), + ParserKQLBase::getExprFromToken(calculated_mask, pos.max_depth, pos.max_backtracks), + kqlCallToExpression("format_ipv4", {ip_address, calculated_mask}, pos.max_depth, pos.max_backtracks), generateUniqueIdentifier(), - kqlCallToExpression("ipv4_netmask_suffix", {"tostring(" + ip_address + ")"}, pos.max_depth)); + kqlCallToExpression("ipv4_netmask_suffix", {"tostring(" + ip_address + ")"}, pos.max_depth, pos.max_backtracks)); return true; } } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index 82cfa68b180..18c986c2191 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -442,7 +442,7 @@ bool ParseJSON::convertImpl(String & out, IParser::Pos & pos) { --pos; auto arg = getArgument(fn_name, pos); - auto result = kqlCallToExpression("dynamic", {arg}, pos.max_depth); + auto result = kqlCallToExpression("dynamic", {arg}, pos.max_depth, pos.max_backtracks); out = std::format("{}", result); } else @@ -729,7 +729,7 @@ bool Trim::convertImpl(String & out, IParser::Pos & pos) const auto regex = getArgument(fn_name, pos, ArgumentState::Raw); const auto source = getArgument(fn_name, pos, ArgumentState::Raw); - out = kqlCallToExpression("trim_start", {regex, std::format("trim_end({0}, {1})", regex, source)}, pos.max_depth); + out = kqlCallToExpression("trim_start", {regex, std::format("trim_end({0}, {1})", regex, source)}, pos.max_depth, pos.max_backtracks); return true; } diff --git a/src/Parsers/Kusto/ParserKQLDistinct.cpp b/src/Parsers/Kusto/ParserKQLDistinct.cpp index 2de4d2c28e7..3ec823a61b5 100644 --- a/src/Parsers/Kusto/ParserKQLDistinct.cpp +++ b/src/Parsers/Kusto/ParserKQLDistinct.cpp @@ -12,7 +12,7 @@ bool ParserKQLDistinct::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) expr = getExprFromToken(pos); Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); + IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(false).parse(new_pos, select_expression_list, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLExtend.cpp b/src/Parsers/Kusto/ParserKQLExtend.cpp index b37618f69fd..41ce296bd25 100644 --- a/src/Parsers/Kusto/ParserKQLExtend.cpp +++ b/src/Parsers/Kusto/ParserKQLExtend.cpp @@ -23,7 +23,7 @@ bool ParserKQLExtend ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) String except_str; String new_extend_str; Tokens ntokens(extend_expr.c_str(), extend_expr.c_str() + extend_expr.size()); - IParser::Pos npos(ntokens, pos.max_depth); + IParser::Pos npos(ntokens, pos.max_depth, pos.max_backtracks); String alias; @@ -77,7 +77,7 @@ bool ParserKQLExtend ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) String expr = std::format("SELECT * {}, {} from prev", except_str, new_extend_str); Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); + IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserSelectQuery().parse(new_pos, select_query, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index 74d8610ecd4..b060ce8d2c7 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -14,7 +14,7 @@ bool ParserKQLFilter::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr where_expression; Tokens token_filter(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos pos_filter(token_filter, pos.max_depth); + IParser::Pos pos_filter(token_filter, pos.max_depth, pos.max_backtracks); if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, where_expression, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index 910f0e8e1a3..0eb460757b1 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -14,7 +14,7 @@ bool ParserKQLLimit::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto expr = getExprFromToken(pos); Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); + IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserExpressionWithOptionalAlias(false).parse(new_pos, limit_length, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLMVExpand.cpp b/src/Parsers/Kusto/ParserKQLMVExpand.cpp index 7d242dffaf7..d174e9b5911 100644 --- a/src/Parsers/Kusto/ParserKQLMVExpand.cpp +++ b/src/Parsers/Kusto/ParserKQLMVExpand.cpp @@ -69,7 +69,7 @@ bool ParserKQLMVExpand::parseColumnArrayExprs(ColumnArrayExprs & column_array_ex auto add_columns = [&] { - column_array_expr = getExprFromToken(String(expr_begin_pos->begin, expr_end_pos->end), pos.max_depth); + column_array_expr = getExprFromToken(String(expr_begin_pos->begin, expr_end_pos->end), pos.max_depth, pos.max_backtracks); if (alias.empty()) { @@ -189,7 +189,7 @@ bool ParserKQLMVExpand::parserMVExpand(KQLMVExpand & kql_mv_expand, Pos & pos, E return true; } -bool ParserKQLMVExpand::genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_node, int32_t max_depth) +bool ParserKQLMVExpand::genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_node, uint32_t max_depth, uint32_t max_backtracks) { String expand_str; String cast_type_column_remove, cast_type_column_rename; @@ -253,7 +253,7 @@ bool ParserKQLMVExpand::genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_no if (cast_type_column_remove.empty()) { query = std::format("Select {} {} From {} {}", columns, extra_columns, input, expand_str); - if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth)) + if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth, max_backtracks)) return false; if (!setSubQuerySource(sub_query_node, select_node, false, false)) return false; @@ -262,14 +262,14 @@ bool ParserKQLMVExpand::genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_no else { query = std::format("(Select {} {} From {} {})", columns, extra_columns, input, expand_str); - if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth)) + if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth, max_backtracks)) return false; if (!setSubQuerySource(sub_query_node, select_node, true, false)) return false; select_node = std::move(sub_query_node); auto rename_query = std::format("(Select * {}, {} From {})", cast_type_column_remove, cast_type_column_rename, "query"); - if (!parseSQLQueryByString(std::make_unique(), rename_query, sub_query_node, max_depth)) + if (!parseSQLQueryByString(std::make_unique(), rename_query, sub_query_node, max_depth, max_backtracks)) return false; if (!setSubQuerySource(sub_query_node, select_node, true, true)) return false; @@ -277,7 +277,7 @@ bool ParserKQLMVExpand::genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_no select_node = std::move(sub_query_node); query = std::format("Select * {}, {} from {}", cast_type_column_restore, cast_type_column_restore_name, "rename_query"); - if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth)) + if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth, max_backtracks)) return false; sub_query_node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(select_node)); select_node = std::move(sub_query_node); @@ -294,12 +294,12 @@ bool ParserKQLMVExpand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) KQLMVExpand kql_mv_expand; if (!parserMVExpand(kql_mv_expand, pos, expected)) return false; - if (!genQuery(kql_mv_expand, node, pos.max_depth)) + if (!genQuery(kql_mv_expand, node, pos.max_depth, pos.max_backtracks)) return false; const String setting_str = "enable_unaligned_array_join = 1"; Tokens token_settings(setting_str.c_str(), setting_str.c_str() + setting_str.size()); - IParser::Pos pos_settings(token_settings, pos.max_depth); + IParser::Pos pos_settings(token_settings, pos.max_depth, pos.max_backtracks); if (!ParserSetQuery(true).parse(pos_settings, setting, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLMVExpand.h b/src/Parsers/Kusto/ParserKQLMVExpand.h index 61f206bb00d..068aee53f58 100644 --- a/src/Parsers/Kusto/ParserKQLMVExpand.h +++ b/src/Parsers/Kusto/ParserKQLMVExpand.h @@ -33,7 +33,7 @@ protected: static bool parseColumnArrayExprs(ColumnArrayExprs & column_array_exprs, Pos & pos, Expected & expected); static bool parserMVExpand(KQLMVExpand & kql_mv_expand, Pos & pos, Expected & expected); - static bool genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_node, int32_t max_depth); + static bool genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_node, uint32_t max_depth, uint32_t max_backtracks); const char * getName() const override { return "KQL mv-expand"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp index e89423e2fc9..4759efc0025 100644 --- a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp @@ -142,7 +142,7 @@ bool ParserKQLMakeSeries ::parseFromToStepClause(FromToStepClause & from_to_step || ParserKQLDateTypeTimespan().parseConstKQLTimespan(from_to_step.step_str)) { from_to_step.is_timespan = true; - from_to_step.step = std::stod(getExprFromToken(from_to_step.step_str, pos.max_depth)); + from_to_step.step = std::stod(getExprFromToken(from_to_step.step_str, pos.max_depth, pos.max_backtracks)); } else from_to_step.step = std::stod(from_to_step.step_str); @@ -150,7 +150,7 @@ bool ParserKQLMakeSeries ::parseFromToStepClause(FromToStepClause & from_to_step return true; } -bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & select_node, const uint32_t & max_depth) +bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & select_node, uint32_t max_depth, uint32_t max_backtracks) { const uint64_t era_diff = 62135596800; // this magic number is the differicen is second form 0001-01-01 (Azure start time ) and 1970-01-01 (CH start time) @@ -166,15 +166,15 @@ bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & auto step = from_to_step.step; if (!kql_make_series.from_to_step.from_str.empty()) - start_str = getExprFromToken(kql_make_series.from_to_step.from_str, max_depth); + start_str = getExprFromToken(kql_make_series.from_to_step.from_str, max_depth, max_backtracks); if (!kql_make_series.from_to_step.to_str.empty()) - end_str = getExprFromToken(from_to_step.to_str, max_depth); + end_str = getExprFromToken(from_to_step.to_str, max_depth, max_backtracks); auto date_type_cast = [&](String & src) { Tokens tokens(src.c_str(), src.c_str() + src.size()); - IParser::Pos pos(tokens, max_depth); + IParser::Pos pos(tokens, max_depth, max_backtracks); String res; while (isValidKQLPos(pos)) { @@ -201,7 +201,7 @@ bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & { std::vector group_expression_tokens; Tokens tokens(group_expression.c_str(), group_expression.c_str() + group_expression.size()); - IParser::Pos pos(tokens, max_depth); + IParser::Pos pos(tokens, max_depth, max_backtracks); while (isValidKQLPos(pos)) { if (String(pos->begin, pos->end) == "AS") @@ -296,7 +296,7 @@ bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & ASTPtr sub_query_node; - if (!ParserSimpleCHSubquery(select_node).parseByString(sub_sub_query, sub_query_node, max_depth)) + if (!ParserSimpleCHSubquery(select_node).parseByString(sub_sub_query, sub_query_node, max_depth, max_backtracks)) return false; select_node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(sub_query_node)); @@ -351,7 +351,7 @@ bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & else main_query = std::format("{},{}", group_expression_alias, final_axis_agg_alias_list); - if (!ParserSimpleCHSubquery(select_node).parseByString(sub_query, sub_query_node, max_depth)) + if (!ParserSimpleCHSubquery(select_node).parseByString(sub_query, sub_query_node, max_depth, max_backtracks)) return false; select_node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(sub_query_node)); @@ -411,10 +411,10 @@ bool ParserKQLMakeSeries ::parseImpl(Pos & pos, ASTPtr & node, Expected & expect subquery_columns += ", " + column_str; } - makeSeries(kql_make_series, node, pos.max_depth); + makeSeries(kql_make_series, node, pos.max_depth, pos.max_backtracks); Tokens token_main_query(kql_make_series.main_query.c_str(), kql_make_series.main_query.c_str() + kql_make_series.main_query.size()); - IParser::Pos pos_main_query(token_main_query, pos.max_depth); + IParser::Pos pos_main_query(token_main_query, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(true).parse(pos_main_query, select_expression_list, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.h b/src/Parsers/Kusto/ParserKQLMakeSeries.h index ef7cc4976f6..6a32e76eff3 100644 --- a/src/Parsers/Kusto/ParserKQLMakeSeries.h +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.h @@ -42,7 +42,7 @@ protected: String main_query; }; - static bool makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & select_node, const uint32_t & max_depth); + static bool makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & select_node, uint32_t max_depth, uint32_t max_backtracks); static bool parseAggregationColumns(AggregationColumns & aggregation_columns, Pos & pos); static bool parseFromToStepClause(FromToStepClause & from_to_step, Pos & pos); diff --git a/src/Parsers/Kusto/ParserKQLPrint.cpp b/src/Parsers/Kusto/ParserKQLPrint.cpp index bd9980ea96d..37483439f14 100644 --- a/src/Parsers/Kusto/ParserKQLPrint.cpp +++ b/src/Parsers/Kusto/ParserKQLPrint.cpp @@ -10,7 +10,7 @@ bool ParserKQLPrint::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) const String expr = getExprFromToken(pos); Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); + IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index fdc458b7707..eab9ee082c5 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -12,7 +12,7 @@ bool ParserKQLProject ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) expr = getExprFromToken(pos); Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); + IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(false).parse(new_pos, select_expression_list, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index a54a2b0eda9..6fd9c95ec6f 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -33,20 +33,20 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -bool ParserKQLBase::parseByString(const String expr, ASTPtr & node, const uint32_t max_depth) +bool ParserKQLBase::parseByString(String expr, ASTPtr & node, uint32_t max_depth, uint32_t max_backtracks) { Expected expected; Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos pos(tokens, max_depth); + IParser::Pos pos(tokens, max_depth, max_backtracks); return parse(pos, node, expected); } -bool ParserKQLBase::parseSQLQueryByString(ParserPtr && parser, String & query, ASTPtr & select_node, int32_t max_depth) +bool ParserKQLBase::parseSQLQueryByString(ParserPtr && parser, String & query, ASTPtr & select_node, uint32_t max_depth, uint32_t max_backtracks) { Expected expected; Tokens token_subquery(query.c_str(), query.c_str() + query.size()); - IParser::Pos pos_subquery(token_subquery, max_depth); + IParser::Pos pos_subquery(token_subquery, max_depth, max_backtracks); if (!parser->parse(pos_subquery, select_node, expected)) return false; return true; @@ -121,10 +121,10 @@ bool ParserKQLBase::setSubQuerySource(ASTPtr & select_query, ASTPtr & source, bo return true; } -String ParserKQLBase::getExprFromToken(const String & text, const uint32_t max_depth) +String ParserKQLBase::getExprFromToken(const String & text, uint32_t max_depth, uint32_t max_backtracks) { Tokens tokens(text.c_str(), text.c_str() + text.size()); - IParser::Pos pos(tokens, max_depth); + IParser::Pos pos(tokens, max_depth, max_backtracks); return getExprFromToken(pos); } @@ -523,7 +523,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) String sub_query = std::format("({})", String(operation_pos.front().second->begin, last_pos->end)); Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); - IParser::Pos pos_subquery(token_subquery, pos.max_depth); + IParser::Pos pos_subquery(token_subquery, pos.max_depth, pos.max_backtracks); if (!ParserKQLSubquery().parse(pos_subquery, tables, expected)) return false; @@ -544,7 +544,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (oprator) { Tokens token_clause(op_calsue.c_str(), op_calsue.c_str() + op_calsue.size()); - IParser::Pos pos_clause(token_clause, pos.max_depth); + IParser::Pos pos_clause(token_clause, pos.max_depth, pos.max_backtracks); if (!oprator->parse(pos_clause, node, expected)) return false; } @@ -577,7 +577,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { auto expr = String("*"); Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); + IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!std::make_unique()->parse(new_pos, node, expected)) return false; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index a594f43ceec..e003ee3ee8b 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -9,11 +9,11 @@ class ParserKQLBase : public IParserBase { public: static String getExprFromToken(Pos & pos); - static String getExprFromToken(const String & text, uint32_t max_depth); + static String getExprFromToken(const String & text, uint32_t max_depth, uint32_t max_backtracks); static String getExprFromPipe(Pos & pos); static bool setSubQuerySource(ASTPtr & select_query, ASTPtr & source, bool dest_is_subquery, bool src_is_subquery); - static bool parseSQLQueryByString(ParserPtr && parser, String & query, ASTPtr & select_node, int32_t max_depth); - bool parseByString(String expr, ASTPtr & node, uint32_t max_depth); + static bool parseSQLQueryByString(ParserPtr && parser, String & query, ASTPtr & select_node, uint32_t max_depth, uint32_t max_backtracks); + bool parseByString(String expr, ASTPtr & node, uint32_t max_depth, uint32_t max_backtracks); }; class ParserKQLQuery : public IParserBase diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index 7e5ac2b17e7..852ba50698d 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -19,7 +19,7 @@ bool ParserKQLSort::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto expr = getExprFromToken(pos); Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); + IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); auto pos_backup = new_pos; if (!order_list.parse(pos_backup, order_expression_list, expected)) diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 668696fa9dc..fbf2110e664 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -95,7 +95,7 @@ bool ParserKQLTableFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expe } ++pos; Tokens token_kql(kql_statement.c_str(), kql_statement.c_str() + kql_statement.size()); - IParser::Pos pos_kql(token_kql, pos.max_depth); + IParser::Pos pos_kql(token_kql, pos.max_depth, pos.max_backtracks); if (kql_p.parse(pos_kql, select, expected)) { diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index a45717930bb..47d706d0b4b 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -192,10 +192,10 @@ bool ParserKQLSummarize::parseImpl(Pos & pos, ASTPtr & node, Expected & expected expr_columns = expr_columns + "," + expr_aggregation; } - String converted_columns = getExprFromToken(expr_columns, pos.max_depth); + String converted_columns = getExprFromToken(expr_columns, pos.max_depth, pos.max_backtracks); Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); - IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); + IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, select_expression_list, expected)) return false; @@ -204,10 +204,10 @@ bool ParserKQLSummarize::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (groupby) { - String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth); + String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth, pos.max_backtracks); Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); - IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth); + IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected)) return false; diff --git a/src/Parsers/Kusto/parseKQLQuery.cpp b/src/Parsers/Kusto/parseKQLQuery.cpp index bcc04ef7001..34a009873f8 100644 --- a/src/Parsers/Kusto/parseKQLQuery.cpp +++ b/src/Parsers/Kusto/parseKQLQuery.cpp @@ -322,12 +322,13 @@ ASTPtr tryParseKQLQuery( bool allow_multi_statements, size_t max_query_size, size_t max_parser_depth, + size_t max_parser_backtracks, bool skip_insignificant) { const char * query_begin = _out_query_end; Tokens tokens(query_begin, all_queries_end, max_query_size, skip_insignificant); /// NOTE: consider use UInt32 for max_parser_depth setting. - IParser::Pos token_iterator(tokens, static_cast(max_parser_depth)); + IParser::Pos token_iterator(tokens, static_cast(max_parser_depth), static_cast(max_parser_backtracks)); if (token_iterator->isEnd() || token_iterator->type == TokenType::Semicolon) @@ -441,10 +442,11 @@ ASTPtr parseKQLQueryAndMovePosition( const std::string & query_description, bool allow_multi_statements, size_t max_query_size, - size_t max_parser_depth) + size_t max_parser_depth, + size_t max_parser_backtracks) { std::string error_message; - ASTPtr res = tryParseKQLQuery(parser, pos, end, error_message, false, query_description, allow_multi_statements, max_query_size, max_parser_depth); + ASTPtr res = tryParseKQLQuery(parser, pos, end, error_message, false, query_description, allow_multi_statements, max_query_size, max_parser_depth, max_parser_backtracks); if (res) return res; @@ -458,9 +460,10 @@ ASTPtr parseKQLQuery( const char * end, const std::string & query_description, size_t max_query_size, - size_t max_parser_depth) + size_t max_parser_depth, + size_t max_parser_backtracks) { - return parseKQLQueryAndMovePosition(parser, begin, end, query_description, false, max_query_size, max_parser_depth); + return parseKQLQueryAndMovePosition(parser, begin, end, query_description, false, max_query_size, max_parser_depth, max_parser_backtracks); } ASTPtr parseKQLQuery( @@ -468,18 +471,20 @@ ASTPtr parseKQLQuery( const std::string & query, const std::string & query_description, size_t max_query_size, - size_t max_parser_depth) + size_t max_parser_depth, + size_t max_parser_backtracks) { - return parseKQLQuery(parser, query.data(), query.data() + query.size(), query_description, max_query_size, max_parser_depth); + return parseKQLQuery(parser, query.data(), query.data() + query.size(), query_description, max_query_size, max_parser_depth, max_parser_backtracks); } ASTPtr parseKQLQuery( IParser & parser, const std::string & query, size_t max_query_size, - size_t max_parser_depth) + size_t max_parser_depth, + size_t max_parser_backtracks) { - return parseKQLQuery(parser, query.data(), query.data() + query.size(), parser.getName(), max_query_size, max_parser_depth); + return parseKQLQuery(parser, query.data(), query.data() + query.size(), parser.getName(), max_query_size, max_parser_depth, max_parser_backtracks); } } diff --git a/src/Parsers/Kusto/parseKQLQuery.h b/src/Parsers/Kusto/parseKQLQuery.h index fca017e70fe..9e52ba56307 100644 --- a/src/Parsers/Kusto/parseKQLQuery.h +++ b/src/Parsers/Kusto/parseKQLQuery.h @@ -3,6 +3,7 @@ #include #include #include + namespace DB { @@ -10,10 +11,6 @@ namespace DB * Used in syntax error message. */ -} -namespace DB -{ - class IParser; /// Parse query or set 'out_error_message'. @@ -24,11 +21,11 @@ ASTPtr tryParseKQLQuery( std::string & out_error_message, bool hilite, const std::string & description, - bool allow_multi_statements, /// If false, check for non-space characters after semicolon and set error message if any. - size_t max_query_size, /// If (end - pos) > max_query_size and query is longer than max_query_size then throws "Max query size exceeded". - /// Disabled if zero. Is used in order to check query size if buffer can contains data for INSERT query. + bool allow_multi_statements, + size_t max_query_size, size_t max_parser_depth, - bool skip_insignificant = true); /// If true, lexer will skip all insignificant tokens (e.g. whitespaces) + size_t max_parser_backtracks, + bool skip_insignificant = true); /// Parse query or throw an exception with error message. @@ -39,7 +36,8 @@ ASTPtr parseKQLQueryAndMovePosition( const std::string & description, bool allow_multi_statements, size_t max_query_size, - size_t max_parser_depth); + size_t max_parser_depth, + size_t max_parser_backtracks); ASTPtr parseKQLQuery( IParser & parser, @@ -47,18 +45,22 @@ ASTPtr parseKQLQuery( const char * end, const std::string & description, size_t max_query_size, - size_t max_parser_depth); + size_t max_parser_depth, + size_t max_parser_backtracks); ASTPtr parseKQLQuery( IParser & parser, const std::string & query, const std::string & query_description, size_t max_query_size, - size_t max_parser_depth); + size_t max_parser_depth, + size_t max_parser_backtracks); ASTPtr parseKQLQuery( IParser & parser, const std::string & query, size_t max_query_size, - size_t max_parser_depth); + size_t max_parser_depth, + size_t max_parser_backtracks); + } diff --git a/src/Parsers/MySQL/tests/gtest_alter_command_parser.cpp b/src/Parsers/MySQL/tests/gtest_alter_command_parser.cpp index d406cdbd3b9..4db96646e16 100644 --- a/src/Parsers/MySQL/tests/gtest_alter_command_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_alter_command_parser.cpp @@ -11,7 +11,7 @@ using namespace DB::MySQLParser; static inline ASTPtr tryParserQuery(IParser & parser, const String & query) { - return parseQuery(parser, query.data(), query.data() + query.size(), "", 0, 0); + return parseQuery(parser, query.data(), query.data() + query.size(), "", 0, 0, 0); } TEST(ParserAlterCommand, AddAlterCommand) diff --git a/src/Parsers/MySQL/tests/gtest_alter_parser.cpp b/src/Parsers/MySQL/tests/gtest_alter_parser.cpp index 4ebbe332710..2b12d7bdcf1 100644 --- a/src/Parsers/MySQL/tests/gtest_alter_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_alter_parser.cpp @@ -9,7 +9,7 @@ using namespace DB::MySQLParser; static inline ASTPtr tryParserQuery(IParser & parser, const String & query) { - return parseQuery(parser, query.data(), query.data() + query.size(), "", 0, 0); + return parseQuery(parser, query.data(), query.data() + query.size(), "", 0, 0, 0); } TEST(ParserAlterQuery, AlterQuery) diff --git a/src/Parsers/MySQL/tests/gtest_column_parser.cpp b/src/Parsers/MySQL/tests/gtest_column_parser.cpp index b1c7c778bea..21c37e4ee2e 100644 --- a/src/Parsers/MySQL/tests/gtest_column_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_column_parser.cpp @@ -17,7 +17,7 @@ TEST(ParserColumn, AllNonGeneratedColumnOption) String input = "col_01 VARCHAR(100) NOT NULL DEFAULT NULL AUTO_INCREMENT UNIQUE KEY PRIMARY KEY COMMENT 'column comment' COLLATE utf8 " "COLUMN_FORMAT FIXED STORAGE MEMORY REFERENCES tbl_name (col_01) CHECK 1"; - ASTPtr ast = parseQuery(p_column, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_column, input.data(), input.data() + input.size(), "", 0, 0, 0); EXPECT_EQ(ast->as()->name, "col_01"); EXPECT_EQ(ast->as()->data_type->as()->name, "VARCHAR"); EXPECT_EQ(ast->as()->data_type->as()->arguments->children[0]->as()->value.safeGet(), 100); @@ -42,7 +42,7 @@ TEST(ParserColumn, AllGeneratedColumnOption) String input = "col_01 VARCHAR(100) NULL UNIQUE KEY PRIMARY KEY COMMENT 'column comment' COLLATE utf8 " "REFERENCES tbl_name (col_01) CHECK 1 GENERATED ALWAYS AS (1) STORED"; - ASTPtr ast = parseQuery(p_column, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_column, input.data(), input.data() + input.size(), "", 0, 0, 0); EXPECT_EQ(ast->as()->name, "col_01"); EXPECT_EQ(ast->as()->data_type->as()->name, "VARCHAR"); EXPECT_EQ(ast->as()->data_type->as()->arguments->children[0]->as()->value.safeGet(), 100); diff --git a/src/Parsers/MySQL/tests/gtest_constraint_parser.cpp b/src/Parsers/MySQL/tests/gtest_constraint_parser.cpp index 9c9124c9f58..a06f2ade24a 100644 --- a/src/Parsers/MySQL/tests/gtest_constraint_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_constraint_parser.cpp @@ -14,7 +14,7 @@ TEST(ParserConstraint, CheckConstraint) ParserDeclareConstraint p_constraint; String constraint_01 = "CONSTRAINT symbol_name CHECK col_01 = 1"; - ASTPtr ast_constraint_01 = parseQuery(p_constraint, constraint_01.data(), constraint_01.data() + constraint_01.size(), "", 0, 0); + ASTPtr ast_constraint_01 = parseQuery(p_constraint, constraint_01.data(), constraint_01.data() + constraint_01.size(), "", 0, 0, 0); EXPECT_EQ(ast_constraint_01->as()->constraint_name, "symbol_name"); auto * check_expression_01 = ast_constraint_01->as()->check_expression->as(); EXPECT_EQ(check_expression_01->name, "equals"); @@ -22,7 +22,7 @@ TEST(ParserConstraint, CheckConstraint) EXPECT_EQ(check_expression_01->arguments->children[1]->as()->value.safeGet(), 1); String constraint_02 = "CONSTRAINT CHECK col_01 = 1"; - ASTPtr ast_constraint_02 = parseQuery(p_constraint, constraint_02.data(), constraint_02.data() + constraint_02.size(), "", 0, 0); + ASTPtr ast_constraint_02 = parseQuery(p_constraint, constraint_02.data(), constraint_02.data() + constraint_02.size(), "", 0, 0, 0); EXPECT_EQ(ast_constraint_02->as()->constraint_name, ""); auto * check_expression_02 = ast_constraint_02->as()->check_expression->as(); EXPECT_EQ(check_expression_02->name, "equals"); @@ -30,7 +30,7 @@ TEST(ParserConstraint, CheckConstraint) EXPECT_EQ(check_expression_02->arguments->children[1]->as()->value.safeGet(), 1); String constraint_03 = "CHECK col_01 = 1"; - ASTPtr ast_constraint_03 = parseQuery(p_constraint, constraint_03.data(), constraint_03.data() + constraint_03.size(), "", 0, 0); + ASTPtr ast_constraint_03 = parseQuery(p_constraint, constraint_03.data(), constraint_03.data() + constraint_03.size(), "", 0, 0, 0); EXPECT_EQ(ast_constraint_03->as()->constraint_name, ""); auto * check_expression_03 = ast_constraint_03->as()->check_expression->as(); EXPECT_EQ(check_expression_03->name, "equals"); @@ -38,7 +38,7 @@ TEST(ParserConstraint, CheckConstraint) EXPECT_EQ(check_expression_03->arguments->children[1]->as()->value.safeGet(), 1); String constraint_04 = "CONSTRAINT CHECK col_01 = 1 ENFORCED"; - ASTPtr ast_constraint_04 = parseQuery(p_constraint, constraint_04.data(), constraint_04.data() + constraint_04.size(), "", 0, 0); + ASTPtr ast_constraint_04 = parseQuery(p_constraint, constraint_04.data(), constraint_04.data() + constraint_04.size(), "", 0, 0, 0); EXPECT_TRUE(ast_constraint_04->as()->enforced); EXPECT_EQ(ast_constraint_04->as()->constraint_name, ""); auto * check_expression_04 = ast_constraint_04->as()->check_expression->as(); @@ -47,7 +47,7 @@ TEST(ParserConstraint, CheckConstraint) EXPECT_EQ(check_expression_04->arguments->children[1]->as()->value.safeGet(), 1); String constraint_05 = "CONSTRAINT CHECK col_01 = 1 NOT ENFORCED"; - ASTPtr ast_constraint_05 = parseQuery(p_constraint, constraint_05.data(), constraint_05.data() + constraint_05.size(), "", 0, 0); + ASTPtr ast_constraint_05 = parseQuery(p_constraint, constraint_05.data(), constraint_05.data() + constraint_05.size(), "", 0, 0, 0); EXPECT_FALSE(ast_constraint_05->as()->enforced); EXPECT_EQ(ast_constraint_05->as()->constraint_name, ""); auto * check_expression_05 = ast_constraint_05->as()->check_expression->as(); diff --git a/src/Parsers/MySQL/tests/gtest_create_parser.cpp b/src/Parsers/MySQL/tests/gtest_create_parser.cpp index 2f65eb6e592..8512b88ffc1 100644 --- a/src/Parsers/MySQL/tests/gtest_create_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_create_parser.cpp @@ -15,16 +15,16 @@ TEST(CreateTableParser, LikeCreate) { ParserCreateQuery p_create_query; String like_create_01 = "CREATE TABLE IF NOT EXISTS table_name LIKE table_name_01"; - parseQuery(p_create_query, like_create_01.data(), like_create_01.data() + like_create_01.size(), "", 0, 0); + parseQuery(p_create_query, like_create_01.data(), like_create_01.data() + like_create_01.size(), "", 0, 0, 0); String like_create_02 = "CREATE TABLE IF NOT EXISTS table_name (LIKE table_name_01)"; - parseQuery(p_create_query, like_create_02.data(), like_create_02.data() + like_create_02.size(), "", 0, 0); + parseQuery(p_create_query, like_create_02.data(), like_create_02.data() + like_create_02.size(), "", 0, 0, 0); } TEST(CreateTableParser, SimpleCreate) { ParserCreateQuery p_create_query; String input = "CREATE TABLE IF NOT EXISTS table_name(col_01 VARCHAR(100), INDEX (col_01), CHECK 1) ENGINE INNODB PARTITION BY HASH(col_01)"; - ASTPtr ast = parseQuery(p_create_query, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_create_query, input.data(), input.data() + input.size(), "", 0, 0, 0); EXPECT_TRUE(ast->as()->if_not_exists); EXPECT_EQ(ast->as()->columns_list->as()->columns->children.size(), 1); EXPECT_EQ(ast->as()->columns_list->as()->indices->children.size(), 1); @@ -37,7 +37,7 @@ TEST(CreateTableParser, SS) { ParserCreateQuery p_create_query; String input = "CREATE TABLE `test_table_1` (`a` int DEFAULT NULL, `b` int DEFAULT NULL) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci"; - ASTPtr ast = parseQuery(p_create_query, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_create_query, input.data(), input.data() + input.size(), "", 0, 0, 0); WriteBufferFromOStream buf(std::cerr, 4096); ast->dumpTree(buf); buf.finalize(); diff --git a/src/Parsers/MySQL/tests/gtest_index_parser.cpp b/src/Parsers/MySQL/tests/gtest_index_parser.cpp index a8be6787b2c..187bac3e090 100644 --- a/src/Parsers/MySQL/tests/gtest_index_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_index_parser.cpp @@ -14,7 +14,7 @@ TEST(ParserIndex, AllIndexOptions) String input = "INDEX (col_01, col_02(100), col_03 DESC) KEY_BLOCK_SIZE 3 USING HASH WITH PARSER parser_name COMMENT 'index comment' VISIBLE"; ParserDeclareIndex p_index; - ASTPtr ast = parseQuery(p_index, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_index, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTDeclareIndex * declare_index = ast->as(); EXPECT_EQ(declare_index->index_columns->children[0]->as()->name(), "col_01"); @@ -33,7 +33,7 @@ TEST(ParserIndex, OptionalIndexOptions) String input = "INDEX (col_01, col_02(100), col_03 DESC) USING HASH INVISIBLE KEY_BLOCK_SIZE 3"; ParserDeclareIndex p_index; - ASTPtr ast = parseQuery(p_index, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_index, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTDeclareIndex * declare_index = ast->as(); EXPECT_EQ(declare_index->index_columns->children[0]->as()->name(), "col_01"); @@ -50,28 +50,28 @@ TEST(ParserIndex, OrdinaryIndex) { ParserDeclareIndex p_index; String non_unique_index_01 = "KEY index_name USING HASH (col_01) INVISIBLE"; - parseQuery(p_index, non_unique_index_01.data(), non_unique_index_01.data() + non_unique_index_01.size(), "", 0, 0); + parseQuery(p_index, non_unique_index_01.data(), non_unique_index_01.data() + non_unique_index_01.size(), "", 0, 0, 0); String non_unique_index_02 = "INDEX index_name USING HASH (col_01) INVISIBLE"; - parseQuery(p_index, non_unique_index_02.data(), non_unique_index_02.data() + non_unique_index_02.size(), "", 0, 0); + parseQuery(p_index, non_unique_index_02.data(), non_unique_index_02.data() + non_unique_index_02.size(), "", 0, 0, 0); String fulltext_index_01 = "FULLTEXT index_name (col_01) INVISIBLE"; - parseQuery(p_index, fulltext_index_01.data(), fulltext_index_01.data() + fulltext_index_01.size(), "", 0, 0); + parseQuery(p_index, fulltext_index_01.data(), fulltext_index_01.data() + fulltext_index_01.size(), "", 0, 0, 0); String fulltext_index_02 = "FULLTEXT INDEX index_name (col_01) INVISIBLE"; - parseQuery(p_index, fulltext_index_02.data(), fulltext_index_02.data() + fulltext_index_02.size(), "", 0, 0); + parseQuery(p_index, fulltext_index_02.data(), fulltext_index_02.data() + fulltext_index_02.size(), "", 0, 0, 0); String fulltext_index_03 = "FULLTEXT KEY index_name (col_01) INVISIBLE"; - parseQuery(p_index, fulltext_index_03.data(), fulltext_index_03.data() + fulltext_index_03.size(), "", 0, 0); + parseQuery(p_index, fulltext_index_03.data(), fulltext_index_03.data() + fulltext_index_03.size(), "", 0, 0, 0); String spatial_index_01 = "SPATIAL index_name (col_01) INVISIBLE"; - parseQuery(p_index, spatial_index_01.data(), spatial_index_01.data() + spatial_index_01.size(), "", 0, 0); + parseQuery(p_index, spatial_index_01.data(), spatial_index_01.data() + spatial_index_01.size(), "", 0, 0, 0); String spatial_index_02 = "SPATIAL INDEX index_name (col_01) INVISIBLE"; - parseQuery(p_index, spatial_index_02.data(), spatial_index_02.data() + spatial_index_02.size(), "", 0, 0); + parseQuery(p_index, spatial_index_02.data(), spatial_index_02.data() + spatial_index_02.size(), "", 0, 0, 0); String spatial_index_03 = "SPATIAL KEY index_name (col_01) INVISIBLE"; - parseQuery(p_index, spatial_index_03.data(), spatial_index_03.data() + spatial_index_03.size(), "", 0, 0); + parseQuery(p_index, spatial_index_03.data(), spatial_index_03.data() + spatial_index_03.size(), "", 0, 0, 0); } TEST(ParserIndex, ConstraintIndex) @@ -79,47 +79,47 @@ TEST(ParserIndex, ConstraintIndex) ParserDeclareIndex p_index; String primary_key_01 = "PRIMARY KEY (col_01) INVISIBLE"; - parseQuery(p_index, primary_key_01.data(), primary_key_01.data() + primary_key_01.size(), "", 0, 0); + parseQuery(p_index, primary_key_01.data(), primary_key_01.data() + primary_key_01.size(), "", 0, 0, 0); String primary_key_02 = "PRIMARY KEY USING BTREE (col_01) INVISIBLE"; - parseQuery(p_index, primary_key_02.data(), primary_key_02.data() + primary_key_02.size(), "", 0, 0); + parseQuery(p_index, primary_key_02.data(), primary_key_02.data() + primary_key_02.size(), "", 0, 0, 0); String primary_key_03 = "CONSTRAINT PRIMARY KEY USING BTREE (col_01) INVISIBLE"; - parseQuery(p_index, primary_key_03.data(), primary_key_03.data() + primary_key_03.size(), "", 0, 0); + parseQuery(p_index, primary_key_03.data(), primary_key_03.data() + primary_key_03.size(), "", 0, 0, 0); String primary_key_04 = "CONSTRAINT index_name PRIMARY KEY USING BTREE (col_01) INVISIBLE"; - parseQuery(p_index, primary_key_04.data(), primary_key_04.data() + primary_key_04.size(), "", 0, 0); + parseQuery(p_index, primary_key_04.data(), primary_key_04.data() + primary_key_04.size(), "", 0, 0, 0); String unique_key_01 = "UNIQUE (col_01) INVISIBLE"; - parseQuery(p_index, unique_key_01.data(), unique_key_01.data() + unique_key_01.size(), "", 0, 0); + parseQuery(p_index, unique_key_01.data(), unique_key_01.data() + unique_key_01.size(), "", 0, 0, 0); String unique_key_02 = "UNIQUE INDEX (col_01) INVISIBLE"; - parseQuery(p_index, unique_key_02.data(), unique_key_02.data() + unique_key_02.size(), "", 0, 0); + parseQuery(p_index, unique_key_02.data(), unique_key_02.data() + unique_key_02.size(), "", 0, 0, 0); String unique_key_03 = "UNIQUE KEY (col_01) INVISIBLE"; - parseQuery(p_index, unique_key_03.data(), unique_key_03.data() + unique_key_03.size(), "", 0, 0); + parseQuery(p_index, unique_key_03.data(), unique_key_03.data() + unique_key_03.size(), "", 0, 0, 0); String unique_key_04 = "UNIQUE KEY index_name (col_01) INVISIBLE"; - parseQuery(p_index, unique_key_04.data(), unique_key_04.data() + unique_key_04.size(), "", 0, 0); + parseQuery(p_index, unique_key_04.data(), unique_key_04.data() + unique_key_04.size(), "", 0, 0, 0); String unique_key_05 = "UNIQUE KEY index_name USING HASH (col_01) INVISIBLE"; - parseQuery(p_index, unique_key_05.data(), unique_key_05.data() + unique_key_05.size(), "", 0, 0); + parseQuery(p_index, unique_key_05.data(), unique_key_05.data() + unique_key_05.size(), "", 0, 0, 0); String unique_key_06 = "CONSTRAINT UNIQUE KEY index_name USING HASH (col_01) INVISIBLE"; - parseQuery(p_index, unique_key_06.data(), unique_key_06.data() + unique_key_06.size(), "", 0, 0); + parseQuery(p_index, unique_key_06.data(), unique_key_06.data() + unique_key_06.size(), "", 0, 0, 0); String unique_key_07 = "CONSTRAINT index_name UNIQUE KEY index_name_1 USING HASH (col_01) INVISIBLE"; - parseQuery(p_index, unique_key_07.data(), unique_key_07.data() + unique_key_07.size(), "", 0, 0); + parseQuery(p_index, unique_key_07.data(), unique_key_07.data() + unique_key_07.size(), "", 0, 0, 0); String foreign_key_01 = "FOREIGN KEY (col_01) REFERENCES tbl_name (col_01)"; - parseQuery(p_index, foreign_key_01.data(), foreign_key_01.data() + foreign_key_01.size(), "", 0, 0); + parseQuery(p_index, foreign_key_01.data(), foreign_key_01.data() + foreign_key_01.size(), "", 0, 0, 0); String foreign_key_02 = "FOREIGN KEY index_name (col_01) REFERENCES tbl_name (col_01)"; - parseQuery(p_index, foreign_key_02.data(), foreign_key_02.data() + foreign_key_02.size(), "", 0, 0); + parseQuery(p_index, foreign_key_02.data(), foreign_key_02.data() + foreign_key_02.size(), "", 0, 0, 0); String foreign_key_03 = "CONSTRAINT FOREIGN KEY index_name (col_01) REFERENCES tbl_name (col_01)"; - parseQuery(p_index, foreign_key_03.data(), foreign_key_03.data() + foreign_key_03.size(), "", 0, 0); + parseQuery(p_index, foreign_key_03.data(), foreign_key_03.data() + foreign_key_03.size(), "", 0, 0, 0); String foreign_key_04 = "CONSTRAINT index_name FOREIGN KEY index_name_01 (col_01) REFERENCES tbl_name (col_01)"; - parseQuery(p_index, foreign_key_04.data(), foreign_key_04.data() + foreign_key_04.size(), "", 0, 0); + parseQuery(p_index, foreign_key_04.data(), foreign_key_04.data() + foreign_key_04.size(), "", 0, 0, 0); } diff --git a/src/Parsers/MySQL/tests/gtest_partition_options_parser.cpp b/src/Parsers/MySQL/tests/gtest_partition_options_parser.cpp index 01b757e5891..6ec8d73530e 100644 --- a/src/Parsers/MySQL/tests/gtest_partition_options_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_partition_options_parser.cpp @@ -14,14 +14,14 @@ TEST(ParserPartitionOptions, HashPatitionOptions) String hash_partition = "PARTITION BY HASH(col_01)"; ParserDeclarePartitionOptions p_partition_options; - ASTPtr ast_01 = parseQuery(p_partition_options, hash_partition.data(), hash_partition.data() + hash_partition.size(), "", 0, 0); + ASTPtr ast_01 = parseQuery(p_partition_options, hash_partition.data(), hash_partition.data() + hash_partition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options_01 = ast_01->as(); EXPECT_EQ(declare_partition_options_01->partition_type, "hash"); EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name(), "col_01"); String linear_hash_partition = "PARTITION BY LINEAR HASH(col_01)"; - ASTPtr ast_02 = parseQuery(p_partition_options, linear_hash_partition.data(), linear_hash_partition.data() + linear_hash_partition.size(), "", 0, 0); + ASTPtr ast_02 = parseQuery(p_partition_options, linear_hash_partition.data(), linear_hash_partition.data() + linear_hash_partition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options_02 = ast_02->as(); EXPECT_EQ(declare_partition_options_02->partition_type, "linear_hash"); @@ -33,14 +33,14 @@ TEST(ParserPartitionOptions, KeyPatitionOptions) String key_partition = "PARTITION BY KEY(col_01)"; ParserDeclarePartitionOptions p_partition_options; - ASTPtr ast_01 = parseQuery(p_partition_options, key_partition.data(), key_partition.data() + key_partition.size(), "", 0, 0); + ASTPtr ast_01 = parseQuery(p_partition_options, key_partition.data(), key_partition.data() + key_partition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options_01 = ast_01->as(); EXPECT_EQ(declare_partition_options_01->partition_type, "key"); EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name(), "col_01"); String linear_key_partition = "PARTITION BY LINEAR KEY(col_01, col_02)"; - ASTPtr ast_02 = parseQuery(p_partition_options, linear_key_partition.data(), linear_key_partition.data() + linear_key_partition.size(), "", 0, 0); + ASTPtr ast_02 = parseQuery(p_partition_options, linear_key_partition.data(), linear_key_partition.data() + linear_key_partition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options_02 = ast_02->as(); EXPECT_EQ(declare_partition_options_02->partition_type, "linear_key"); @@ -49,7 +49,7 @@ TEST(ParserPartitionOptions, KeyPatitionOptions) EXPECT_EQ(columns_list->children[1]->as()->name(), "col_02"); String key_partition_with_algorithm = "PARTITION BY KEY ALGORITHM=1 (col_01)"; - ASTPtr ast_03 = parseQuery(p_partition_options, key_partition_with_algorithm.data(), key_partition_with_algorithm.data() + key_partition_with_algorithm.size(), "", 0, 0); + ASTPtr ast_03 = parseQuery(p_partition_options, key_partition_with_algorithm.data(), key_partition_with_algorithm.data() + key_partition_with_algorithm.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options_03 = ast_03->as(); EXPECT_EQ(declare_partition_options_03->partition_type, "key_1"); @@ -61,14 +61,14 @@ TEST(ParserPartitionOptions, RangePatitionOptions) String range_partition = "PARTITION BY RANGE(col_01)"; ParserDeclarePartitionOptions p_partition_options; - ASTPtr ast_01 = parseQuery(p_partition_options, range_partition.data(), range_partition.data() + range_partition.size(), "", 0, 0); + ASTPtr ast_01 = parseQuery(p_partition_options, range_partition.data(), range_partition.data() + range_partition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options_01 = ast_01->as(); EXPECT_EQ(declare_partition_options_01->partition_type, "range"); EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name(), "col_01"); String range_columns_partition = "PARTITION BY RANGE COLUMNS(col_01, col_02)"; - ASTPtr ast_02 = parseQuery(p_partition_options, range_columns_partition.data(), range_columns_partition.data() + range_columns_partition.size(), "", 0, 0); + ASTPtr ast_02 = parseQuery(p_partition_options, range_columns_partition.data(), range_columns_partition.data() + range_columns_partition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options_02 = ast_02->as(); EXPECT_EQ(declare_partition_options_02->partition_type, "range"); @@ -82,14 +82,14 @@ TEST(ParserPartitionOptions, ListPatitionOptions) String range_partition = "PARTITION BY LIST(col_01)"; ParserDeclarePartitionOptions p_partition_options; - ASTPtr ast_01 = parseQuery(p_partition_options, range_partition.data(), range_partition.data() + range_partition.size(), "", 0, 0); + ASTPtr ast_01 = parseQuery(p_partition_options, range_partition.data(), range_partition.data() + range_partition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options_01 = ast_01->as(); EXPECT_EQ(declare_partition_options_01->partition_type, "list"); EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name(), "col_01"); String range_columns_partition = "PARTITION BY LIST COLUMNS(col_01, col_02)"; - ASTPtr ast_02 = parseQuery(p_partition_options, range_columns_partition.data(), range_columns_partition.data() + range_columns_partition.size(), "", 0, 0); + ASTPtr ast_02 = parseQuery(p_partition_options, range_columns_partition.data(), range_columns_partition.data() + range_columns_partition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options_02 = ast_02->as(); EXPECT_EQ(declare_partition_options_02->partition_type, "list"); @@ -103,7 +103,7 @@ TEST(ParserPartitionOptions, PatitionNumberOptions) String numbers_partition = "PARTITION BY KEY(col_01) PARTITIONS 2"; ParserDeclarePartitionOptions p_partition_options; - ASTPtr ast = parseQuery(p_partition_options, numbers_partition.data(), numbers_partition.data() + numbers_partition.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_partition_options, numbers_partition.data(), numbers_partition.data() + numbers_partition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options = ast->as(); EXPECT_EQ(declare_partition_options->partition_type, "key"); @@ -116,7 +116,7 @@ TEST(ParserPartitionOptions, PatitionWithSubpartitionOptions) String partition_with_subpartition = "PARTITION BY KEY(col_01) PARTITIONS 3 SUBPARTITION BY HASH(col_02) SUBPARTITIONS 4"; ParserDeclarePartitionOptions p_partition_options; - ASTPtr ast = parseQuery(p_partition_options, partition_with_subpartition.data(), partition_with_subpartition.data() + partition_with_subpartition.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_partition_options, partition_with_subpartition.data(), partition_with_subpartition.data() + partition_with_subpartition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options = ast->as(); EXPECT_EQ(declare_partition_options->partition_type, "key"); @@ -134,7 +134,7 @@ TEST(ParserPartitionOptions, PatitionOptionsWithDeclarePartition) ParserDeclarePartitionOptions p_partition_options; ASTPtr ast = parseQuery(p_partition_options, partition_options_with_declare.data(), - partition_options_with_declare.data() + partition_options_with_declare.size(), "", 0, 0); + partition_options_with_declare.data() + partition_options_with_declare.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options = ast->as(); EXPECT_EQ(declare_partition_options->partition_type, "key"); @@ -153,7 +153,7 @@ TEST(ParserPartitionOptions, PatitionOptionsWithDeclarePartitions) ParserDeclarePartitionOptions p_partition_options; ASTPtr ast = parseQuery(p_partition_options, partition_options_with_declare.data(), - partition_options_with_declare.data() + partition_options_with_declare.size(), "", 0, 0); + partition_options_with_declare.data() + partition_options_with_declare.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options = ast->as(); EXPECT_EQ(declare_partition_options->partition_type, "key"); diff --git a/src/Parsers/MySQL/tests/gtest_partition_parser.cpp b/src/Parsers/MySQL/tests/gtest_partition_parser.cpp index 458c7acd553..07c7c03dbb7 100644 --- a/src/Parsers/MySQL/tests/gtest_partition_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_partition_parser.cpp @@ -17,7 +17,7 @@ TEST(ParserPartition, AllPatitionOptions) " TABLESPACE table_space_name"; ParserDeclarePartition p_partition; - ASTPtr ast = parseQuery(p_partition, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_partition, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition = ast->as(); EXPECT_EQ(declare_partition->partition_name, "partition_name"); @@ -35,7 +35,7 @@ TEST(ParserPartition, OptionalPatitionOptions) { String input = "PARTITION partition_name STORAGE engine = engine_name max_rows 1000 min_rows 0 tablespace table_space_name"; ParserDeclarePartition p_partition; - ASTPtr ast = parseQuery(p_partition, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_partition, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition = ast->as(); EXPECT_EQ(declare_partition->partition_name, "partition_name"); @@ -50,7 +50,7 @@ TEST(ParserPartition, PatitionOptionsWithLessThan) { ParserDeclarePartition p_partition; String partition_01 = "PARTITION partition_01 VALUES LESS THAN (1991) STORAGE engine = engine_name"; - ASTPtr ast_partition_01 = parseQuery(p_partition, partition_01.data(), partition_01.data() + partition_01.size(), "", 0, 0); + ASTPtr ast_partition_01 = parseQuery(p_partition, partition_01.data(), partition_01.data() + partition_01.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition_01 = ast_partition_01->as(); EXPECT_EQ(declare_partition_01->partition_name, "partition_01"); @@ -59,7 +59,7 @@ TEST(ParserPartition, PatitionOptionsWithLessThan) EXPECT_EQ(declare_options_01->changes["engine"]->as()->name(), "engine_name"); String partition_02 = "PARTITION partition_02 VALUES LESS THAN MAXVALUE STORAGE engine = engine_name"; - ASTPtr ast_partition_02 = parseQuery(p_partition, partition_02.data(), partition_02.data() + partition_02.size(), "", 0, 0); + ASTPtr ast_partition_02 = parseQuery(p_partition, partition_02.data(), partition_02.data() + partition_02.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition_02 = ast_partition_02->as(); EXPECT_EQ(declare_partition_02->partition_name, "partition_02"); @@ -68,7 +68,7 @@ TEST(ParserPartition, PatitionOptionsWithLessThan) EXPECT_EQ(declare_options_02->changes["engine"]->as()->name(), "engine_name"); String partition_03 = "PARTITION partition_03 VALUES LESS THAN (50, MAXVALUE) STORAGE engine = engine_name"; - ASTPtr ast_partition_03 = parseQuery(p_partition, partition_03.data(), partition_03.data() + partition_03.size(), "", 0, 0); + ASTPtr ast_partition_03 = parseQuery(p_partition, partition_03.data(), partition_03.data() + partition_03.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition_03 = ast_partition_03->as(); EXPECT_EQ(declare_partition_03->partition_name, "partition_03"); @@ -79,7 +79,7 @@ TEST(ParserPartition, PatitionOptionsWithLessThan) EXPECT_EQ(declare_options_03->changes["engine"]->as()->name(), "engine_name"); String partition_04 = "PARTITION partition_04 VALUES LESS THAN (MAXVALUE, MAXVALUE) STORAGE engine = engine_name"; - ASTPtr ast_partition_04 = parseQuery(p_partition, partition_04.data(), partition_04.data() + partition_04.size(), "", 0, 0); + ASTPtr ast_partition_04 = parseQuery(p_partition, partition_04.data(), partition_04.data() + partition_04.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition_04 = ast_partition_04->as(); EXPECT_EQ(declare_partition_04->partition_name, "partition_04"); @@ -94,7 +94,7 @@ TEST(ParserPartition, PatitionOptionsWithInExpression) { ParserDeclarePartition p_partition; String partition_01 = "PARTITION partition_01 VALUES IN (NULL, 1991, MAXVALUE) STORAGE engine = engine_name"; - ASTPtr ast_partition_01 = parseQuery(p_partition, partition_01.data(), partition_01.data() + partition_01.size(), "", 0, 0); + ASTPtr ast_partition_01 = parseQuery(p_partition, partition_01.data(), partition_01.data() + partition_01.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition_01 = ast_partition_01->as(); EXPECT_EQ(declare_partition_01->partition_name, "partition_01"); @@ -106,7 +106,7 @@ TEST(ParserPartition, PatitionOptionsWithInExpression) EXPECT_EQ(declare_options_01->changes["engine"]->as()->name(), "engine_name"); String partition_02 = "PARTITION partition_02 VALUES IN ((NULL, 1991), (1991, NULL), (MAXVALUE, MAXVALUE)) STORAGE engine = engine_name"; - ASTPtr ast_partition_02 = parseQuery(p_partition, partition_02.data(), partition_02.data() + partition_02.size(), "", 0, 0); + ASTPtr ast_partition_02 = parseQuery(p_partition, partition_02.data(), partition_02.data() + partition_02.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition_02 = ast_partition_02->as(); EXPECT_EQ(declare_partition_02->partition_name, "partition_02"); @@ -132,18 +132,17 @@ TEST(ParserPartition, PatitionOptionsWithSubpartitions) { ParserDeclarePartition p_partition; String partition_01 = "PARTITION partition_01 VALUES IN (NULL, 1991, MAXVALUE) STORAGE engine = engine_name (SUBPARTITION s_p01)"; - ASTPtr ast_partition_01 = parseQuery(p_partition, partition_01.data(), partition_01.data() + partition_01.size(), "", 0, 0); + ASTPtr ast_partition_01 = parseQuery(p_partition, partition_01.data(), partition_01.data() + partition_01.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition_01 = ast_partition_01->as(); EXPECT_EQ(declare_partition_01->partition_name, "partition_01"); EXPECT_TRUE(declare_partition_01->subpartitions->as()->children[0]->as()); String partition_02 = "PARTITION partition_02 VALUES IN (NULL, 1991, MAXVALUE) STORAGE engine = engine_name (SUBPARTITION s_p01, SUBPARTITION s_p02)"; - ASTPtr ast_partition_02 = parseQuery(p_partition, partition_02.data(), partition_02.data() + partition_02.size(), "", 0, 0); + ASTPtr ast_partition_02 = parseQuery(p_partition, partition_02.data(), partition_02.data() + partition_02.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition_02 = ast_partition_02->as(); EXPECT_EQ(declare_partition_02->partition_name, "partition_02"); EXPECT_TRUE(declare_partition_02->subpartitions->as()->children[0]->as()); EXPECT_TRUE(declare_partition_02->subpartitions->as()->children[1]->as()); } - diff --git a/src/Parsers/MySQL/tests/gtest_reference_parser.cpp b/src/Parsers/MySQL/tests/gtest_reference_parser.cpp index 7447f16fc7c..d5b3c9b596d 100644 --- a/src/Parsers/MySQL/tests/gtest_reference_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_reference_parser.cpp @@ -12,12 +12,12 @@ TEST(ParserReference, SimpleReference) ParserDeclareReference p_reference; String reference_01 = "REFERENCES table_name (ref_col_01)"; - ASTPtr ast_reference_01 = parseQuery(p_reference, reference_01.data(), reference_01.data() + reference_01.size(), "", 0, 0); + ASTPtr ast_reference_01 = parseQuery(p_reference, reference_01.data(), reference_01.data() + reference_01.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_01->as()->reference_table_name, "table_name"); EXPECT_EQ(ast_reference_01->as()->reference_expression->as()->name(), "ref_col_01"); String reference_02 = "REFERENCES table_name (ref_col_01, ref_col_02)"; - ASTPtr ast_reference_02 = parseQuery(p_reference, reference_02.data(), reference_02.data() + reference_02.size(), "", 0, 0); + ASTPtr ast_reference_02 = parseQuery(p_reference, reference_02.data(), reference_02.data() + reference_02.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_02->as()->reference_table_name, "table_name"); ASTPtr arguments = ast_reference_02->as()->reference_expression->as()->arguments; EXPECT_EQ(arguments->children[0]->as()->name(), "ref_col_01"); @@ -28,19 +28,19 @@ TEST(ParserReference, ReferenceDifferenceKind) { ParserDeclareReference p_reference; String reference_01 = "REFERENCES table_name (ref_col_01) MATCH FULL"; - ASTPtr ast_reference_01 = parseQuery(p_reference, reference_01.data(), reference_01.data() + reference_01.size(), "", 0, 0); + ASTPtr ast_reference_01 = parseQuery(p_reference, reference_01.data(), reference_01.data() + reference_01.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_01->as()->reference_table_name, "table_name"); EXPECT_EQ(ast_reference_01->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_01->as()->kind, ASTDeclareReference::MATCH_FULL); String reference_02 = "REFERENCES table_name (ref_col_01) MATCH PARTIAL"; - ASTPtr ast_reference_02 = parseQuery(p_reference, reference_02.data(), reference_02.data() + reference_02.size(), "", 0, 0); + ASTPtr ast_reference_02 = parseQuery(p_reference, reference_02.data(), reference_02.data() + reference_02.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_02->as()->reference_table_name, "table_name"); EXPECT_EQ(ast_reference_02->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_02->as()->kind, ASTDeclareReference::MATCH_PARTIAL); String reference_03 = "REFERENCES table_name (ref_col_01) MATCH SIMPLE"; - ASTPtr ast_reference_03 = parseQuery(p_reference, reference_03.data(), reference_03.data() + reference_03.size(), "", 0, 0); + ASTPtr ast_reference_03 = parseQuery(p_reference, reference_03.data(), reference_03.data() + reference_03.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_03->as()->reference_table_name, "table_name"); EXPECT_EQ(ast_reference_03->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_03->as()->kind, ASTDeclareReference::MATCH_SIMPLE); @@ -50,7 +50,7 @@ TEST(ParserReference, ReferenceDifferenceOption) { ParserDeclareReference p_reference; String reference_01 = "REFERENCES table_name (ref_col_01) MATCH FULL ON DELETE RESTRICT ON UPDATE RESTRICT"; - ASTPtr ast_reference_01 = parseQuery(p_reference, reference_01.data(), reference_01.data() + reference_01.size(), "", 0, 0); + ASTPtr ast_reference_01 = parseQuery(p_reference, reference_01.data(), reference_01.data() + reference_01.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_01->as()->reference_table_name, "table_name"); EXPECT_EQ(ast_reference_01->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_01->as()->kind, ASTDeclareReference::MATCH_FULL); @@ -58,7 +58,7 @@ TEST(ParserReference, ReferenceDifferenceOption) EXPECT_EQ(ast_reference_01->as()->on_update_option, ASTDeclareReference::RESTRICT); String reference_02 = "REFERENCES table_name (ref_col_01) MATCH FULL ON DELETE CASCADE ON UPDATE CASCADE"; - ASTPtr ast_reference_02 = parseQuery(p_reference, reference_02.data(), reference_02.data() + reference_02.size(), "", 0, 0); + ASTPtr ast_reference_02 = parseQuery(p_reference, reference_02.data(), reference_02.data() + reference_02.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_02->as()->reference_table_name, "table_name"); EXPECT_EQ(ast_reference_02->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_02->as()->kind, ASTDeclareReference::MATCH_FULL); @@ -66,7 +66,7 @@ TEST(ParserReference, ReferenceDifferenceOption) EXPECT_EQ(ast_reference_02->as()->on_update_option, ASTDeclareReference::CASCADE); String reference_03 = "REFERENCES table_name (ref_col_01) MATCH FULL ON DELETE SET NULL ON UPDATE SET NULL"; - ASTPtr ast_reference_03 = parseQuery(p_reference, reference_03.data(), reference_03.data() + reference_03.size(), "", 0, 0); + ASTPtr ast_reference_03 = parseQuery(p_reference, reference_03.data(), reference_03.data() + reference_03.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_03->as()->reference_table_name, "table_name"); EXPECT_EQ(ast_reference_03->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_03->as()->kind, ASTDeclareReference::MATCH_FULL); @@ -74,7 +74,7 @@ TEST(ParserReference, ReferenceDifferenceOption) EXPECT_EQ(ast_reference_03->as()->on_update_option, ASTDeclareReference::SET_NULL); String reference_04 = "REFERENCES table_name (ref_col_01) MATCH FULL ON UPDATE NO ACTION ON DELETE NO ACTION"; - ASTPtr ast_reference_04 = parseQuery(p_reference, reference_04.data(), reference_04.data() + reference_04.size(), "", 0, 0); + ASTPtr ast_reference_04 = parseQuery(p_reference, reference_04.data(), reference_04.data() + reference_04.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_04->as()->reference_table_name, "table_name"); EXPECT_EQ(ast_reference_04->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_04->as()->kind, ASTDeclareReference::MATCH_FULL); @@ -82,11 +82,10 @@ TEST(ParserReference, ReferenceDifferenceOption) EXPECT_EQ(ast_reference_04->as()->on_update_option, ASTDeclareReference::NO_ACTION); String reference_05 = "REFERENCES table_name (ref_col_01) MATCH FULL ON UPDATE SET DEFAULT ON DELETE SET DEFAULT"; - ASTPtr ast_reference_05 = parseQuery(p_reference, reference_05.data(), reference_05.data() + reference_05.size(), "", 0, 0); + ASTPtr ast_reference_05 = parseQuery(p_reference, reference_05.data(), reference_05.data() + reference_05.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_05->as()->reference_table_name, "table_name"); EXPECT_EQ(ast_reference_05->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_05->as()->kind, ASTDeclareReference::MATCH_FULL); EXPECT_EQ(ast_reference_05->as()->on_delete_option, ASTDeclareReference::SET_DEFAULT); EXPECT_EQ(ast_reference_05->as()->on_update_option, ASTDeclareReference::SET_DEFAULT); } - diff --git a/src/Parsers/MySQL/tests/gtest_subpartition_parser.cpp b/src/Parsers/MySQL/tests/gtest_subpartition_parser.cpp index b375f73c55c..1876cd1d028 100644 --- a/src/Parsers/MySQL/tests/gtest_subpartition_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_subpartition_parser.cpp @@ -14,7 +14,7 @@ TEST(ParserSubpartition, AllSubpatitionOptions) " DATA DIRECTORY 'data_directory' INDEX DIRECTORY 'index_directory' max_rows 1000 MIN_ROWs 0" " TABLESPACE table_space_name"; MySQLParser::ParserDeclareSubPartition p_subpartition; - ASTPtr ast = parseQuery(p_subpartition, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_subpartition, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTDeclareSubPartition * declare_subpartition = ast->as(); EXPECT_EQ(declare_subpartition->logical_name, "subpartition_name"); @@ -32,7 +32,7 @@ TEST(ParserSubpartition, OptionalSubpatitionOptions) { String input = "SUBPARTITION subpartition_name STORAGE engine = engine_name max_rows 1000 min_rows 0 tablespace table_space_name"; MySQLParser::ParserDeclareSubPartition p_subpartition; - ASTPtr ast = parseQuery(p_subpartition, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_subpartition, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTDeclareSubPartition * declare_subpartition = ast->as(); EXPECT_EQ(declare_subpartition->logical_name, "subpartition_name"); @@ -42,4 +42,3 @@ TEST(ParserSubpartition, OptionalSubpatitionOptions) EXPECT_EQ(declare_options->changes["max_rows"]->as()->value.safeGet(), 1000); EXPECT_EQ(declare_options->changes["tablespace"]->as()->name(), "table_space_name"); } - diff --git a/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp b/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp index 42b9279c96d..a84da7cb9d5 100644 --- a/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp @@ -18,7 +18,7 @@ TEST(ParserTableOptions, AllSubpatitionOptions) " STATS_PERSISTENT DEFAULT STATS_SAMPLE_PAGES 3 TABLESPACE tablespace_name STORAGE MEMORY UNION (table_01, table_02)"; ParserDeclareTableOptions p_table_options; - ASTPtr ast = parseQuery(p_table_options, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_table_options, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTDeclareOptions * declare_options = ast->as(); EXPECT_EQ(declare_options->changes["auto_increment"]->as()->value.safeGet(), 1); @@ -56,7 +56,7 @@ TEST(ParserTableOptions, OptionalTableOptions) { String input = "STATS_AUTO_RECALC DEFAULT AUTO_INCREMENt = 1 "; ParserDeclareTableOptions p_table_options; - ASTPtr ast = parseQuery(p_table_options, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_table_options, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTDeclareOptions * declare_options = ast->as(); EXPECT_EQ(declare_options->changes["auto_increment"]->as()->value.safeGet(), 1); diff --git a/src/Parsers/PRQL/ParserPRQLQuery.cpp b/src/Parsers/PRQL/ParserPRQLQuery.cpp index b3733b727dc..fb1796714cb 100644 --- a/src/Parsers/PRQL/ParserPRQLQuery.cpp +++ b/src/Parsers/PRQL/ParserPRQLQuery.cpp @@ -69,7 +69,9 @@ bool ParserPRQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) "", false, max_query_size, - max_parser_depth); + max_parser_depth, + max_parser_backtracks, + true); if (!node) throw Exception( diff --git a/src/Parsers/PRQL/ParserPRQLQuery.h b/src/Parsers/PRQL/ParserPRQLQuery.h index 4fc450df6b6..88bf97f69d1 100644 --- a/src/Parsers/PRQL/ParserPRQLQuery.h +++ b/src/Parsers/PRQL/ParserPRQLQuery.h @@ -13,9 +13,10 @@ private: // These fields are not used when PRQL is disabled at build time. [[maybe_unused]] size_t max_query_size; [[maybe_unused]] size_t max_parser_depth; + [[maybe_unused]] size_t max_parser_backtracks; public: - ParserPRQLQuery(size_t max_query_size_, size_t max_parser_depth_) : max_query_size{max_query_size_}, max_parser_depth{max_parser_depth_} + ParserPRQLQuery(size_t max_query_size_, size_t max_parser_depth_, size_t max_parser_backtracks_) : max_query_size(max_query_size_), max_parser_depth(max_parser_depth_), max_parser_backtracks(max_parser_backtracks_) { } diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index b1cc7622e00..4bc95e67afb 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -865,7 +865,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected else if (s_modify_sql_security.ignore(pos, expected)) { /// This is a hack so we can reuse parser from create and don't have to write `MODIFY SQL SECURITY SQL SECURITY INVOKER` - pos -= 2; + --pos; + --pos; if (!sql_security_p.parse(pos, command_sql_security, expected)) return false; command->type = ASTAlterCommand::MODIFY_SQL_SECURITY; diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 440a8bc1dc7..30bce57f9d9 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -286,7 +286,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E { const String type_int("INT"); Tokens tokens(type_int.data(), type_int.data() + type_int.size()); - Pos tmp_pos(tokens, 0); + Pos tmp_pos(tokens, pos.max_depth, pos.max_backtracks); Expected tmp_expected; ParserDataType().parse(tmp_pos, type, tmp_expected); } diff --git a/src/Parsers/QueryParameterVisitor.cpp b/src/Parsers/QueryParameterVisitor.cpp index b8679cc3b96..9afd9a8615c 100644 --- a/src/Parsers/QueryParameterVisitor.cpp +++ b/src/Parsers/QueryParameterVisitor.cpp @@ -43,7 +43,7 @@ NameSet analyzeReceiveQueryParams(const std::string & query) const char * query_end = query.data() + query.size(); ParserQuery parser(query_end); - ASTPtr extract_query_ast = parseQuery(parser, query_begin, query_end, "analyzeReceiveQueryParams", 0, 0); + ASTPtr extract_query_ast = parseQuery(parser, query_begin, query_end, "analyzeReceiveQueryParams", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); QueryParameterVisitor(query_params).visit(extract_query_ast); NameSet query_param_names; diff --git a/src/Parsers/TokenIterator.h b/src/Parsers/TokenIterator.h index 8cb59aa12e2..192f2f55e6a 100644 --- a/src/Parsers/TokenIterator.h +++ b/src/Parsers/TokenIterator.h @@ -62,18 +62,6 @@ public: return *this; } - ALWAYS_INLINE TokenIterator & operator-=(int value) - { - index -= value; - return *this; - } - - ALWAYS_INLINE TokenIterator & operator+=(int value) - { - index += value; - return *this; - } - ALWAYS_INLINE bool operator<(const TokenIterator & rhs) const { return index < rhs.index; } ALWAYS_INLINE bool operator<=(const TokenIterator & rhs) const { return index <= rhs.index; } ALWAYS_INLINE bool operator==(const TokenIterator & rhs) const { return index == rhs.index; } diff --git a/src/Parsers/examples/create_parser.cpp b/src/Parsers/examples/create_parser.cpp index c241b353b4f..b628c79435c 100644 --- a/src/Parsers/examples/create_parser.cpp +++ b/src/Parsers/examples/create_parser.cpp @@ -13,7 +13,7 @@ int main(int, char **) std::string input = "CREATE TABLE hits (URL String, UserAgentMinor2 FixedString(2), EventTime DateTime) ENGINE = Log"; ParserCreateQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); WriteBufferFromOStream out(std::cerr, 4096); formatAST(*ast, out); diff --git a/src/Parsers/examples/select_parser.cpp b/src/Parsers/examples/select_parser.cpp index 15295170c6b..3ed358121f6 100644 --- a/src/Parsers/examples/select_parser.cpp +++ b/src/Parsers/examples/select_parser.cpp @@ -23,7 +23,7 @@ try " FORMAT TabSeparated"; ParserQueryWithOutput parser(input.data() + input.size()); - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); std::cout << "Success." << std::endl; WriteBufferFromOStream out(std::cerr, 4096); diff --git a/src/Parsers/fuzzers/select_parser_fuzzer.cpp b/src/Parsers/fuzzers/select_parser_fuzzer.cpp index ae490ed4e56..aed83853c33 100644 --- a/src/Parsers/fuzzers/select_parser_fuzzer.cpp +++ b/src/Parsers/fuzzers/select_parser_fuzzer.cpp @@ -15,7 +15,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) DB::ParserQueryWithOutput parser(input.data() + input.size()); const UInt64 max_parser_depth = 1000; - DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, max_parser_depth); + const UInt64 max_parser_backtracks = 1000000; + DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, max_parser_depth, max_parser_backtracks); const UInt64 max_ast_depth = 1000; ast->checkDepth(max_ast_depth); diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index 8f9977c0b8d..7aad0b010a5 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -234,12 +234,13 @@ ASTPtr tryParseQuery( bool allow_multi_statements, size_t max_query_size, size_t max_parser_depth, + size_t max_parser_backtracks, bool skip_insignificant) { const char * query_begin = _out_query_end; Tokens tokens(query_begin, all_queries_end, max_query_size, skip_insignificant); /// NOTE: consider use UInt32 for max_parser_depth setting. - IParser::Pos token_iterator(tokens, static_cast(max_parser_depth)); + IParser::Pos token_iterator(tokens, static_cast(max_parser_depth), static_cast(max_parser_backtracks)); if (token_iterator->isEnd() || token_iterator->type == TokenType::Semicolon) @@ -356,10 +357,13 @@ ASTPtr parseQueryAndMovePosition( const std::string & query_description, bool allow_multi_statements, size_t max_query_size, - size_t max_parser_depth) + size_t max_parser_depth, + size_t max_parser_backtracks) { std::string error_message; - ASTPtr res = tryParseQuery(parser, pos, end, error_message, false, query_description, allow_multi_statements, max_query_size, max_parser_depth); + ASTPtr res = tryParseQuery( + parser, pos, end, error_message, false, query_description, allow_multi_statements, + max_query_size, max_parser_depth, max_parser_backtracks, true); if (res) return res; @@ -374,9 +378,10 @@ ASTPtr parseQuery( const char * end, const std::string & query_description, size_t max_query_size, - size_t max_parser_depth) + size_t max_parser_depth, + size_t max_parser_backtracks) { - return parseQueryAndMovePosition(parser, begin, end, query_description, false, max_query_size, max_parser_depth); + return parseQueryAndMovePosition(parser, begin, end, query_description, false, max_query_size, max_parser_depth, max_parser_backtracks); } @@ -385,9 +390,10 @@ ASTPtr parseQuery( const std::string & query, const std::string & query_description, size_t max_query_size, - size_t max_parser_depth) + size_t max_parser_depth, + size_t max_parser_backtracks) { - return parseQuery(parser, query.data(), query.data() + query.size(), query_description, max_query_size, max_parser_depth); + return parseQuery(parser, query.data(), query.data() + query.size(), query_description, max_query_size, max_parser_depth, max_parser_backtracks); } @@ -395,9 +401,10 @@ ASTPtr parseQuery( IParser & parser, const std::string & query, size_t max_query_size, - size_t max_parser_depth) + size_t max_parser_depth, + size_t max_parser_backtracks) { - return parseQuery(parser, query.data(), query.data() + query.size(), parser.getName(), max_query_size, max_parser_depth); + return parseQuery(parser, query.data(), query.data() + query.size(), parser.getName(), max_query_size, max_parser_depth, max_parser_backtracks); } @@ -406,6 +413,7 @@ std::pair splitMultipartQuery( std::vector & queries_list, size_t max_query_size, size_t max_parser_depth, + size_t max_parser_backtracks, bool allow_settings_after_format_in_insert) { ASTPtr ast; @@ -422,7 +430,7 @@ std::pair splitMultipartQuery( { begin = pos; - ast = parseQueryAndMovePosition(parser, pos, end, "", true, max_query_size, max_parser_depth); + ast = parseQueryAndMovePosition(parser, pos, end, "", true, max_query_size, max_parser_depth, max_parser_backtracks); auto * insert = ast->as(); diff --git a/src/Parsers/parseQuery.h b/src/Parsers/parseQuery.h index a087f145d2c..93c1a465267 100644 --- a/src/Parsers/parseQuery.h +++ b/src/Parsers/parseQuery.h @@ -19,7 +19,8 @@ ASTPtr tryParseQuery( size_t max_query_size, /// If (end - pos) > max_query_size and query is longer than max_query_size then throws "Max query size exceeded". /// Disabled if zero. Is used in order to check query size if buffer can contains data for INSERT query. size_t max_parser_depth, - bool skip_insignificant = true); /// If true, lexer will skip all insignificant tokens (e.g. whitespaces) + size_t max_parser_backtracks, + bool skip_insignificant); /// If true, lexer will skip all insignificant tokens (e.g. whitespaces) /// Parse query or throw an exception with error message. @@ -30,7 +31,8 @@ ASTPtr parseQueryAndMovePosition( const std::string & description, bool allow_multi_statements, size_t max_query_size, - size_t max_parser_depth); + size_t max_parser_depth, + size_t max_parser_backtracks); ASTPtr parseQuery( IParser & parser, @@ -38,20 +40,23 @@ ASTPtr parseQuery( const char * end, const std::string & description, size_t max_query_size, - size_t max_parser_depth); + size_t max_parser_depth, + size_t max_parser_backtracks); ASTPtr parseQuery( IParser & parser, const std::string & query, const std::string & query_description, size_t max_query_size, - size_t max_parser_depth); + size_t max_parser_depth, + size_t max_parser_backtracks); ASTPtr parseQuery( IParser & parser, const std::string & query, size_t max_query_size, - size_t max_parser_depth); + size_t max_parser_depth, + size_t max_parser_backtracks); /** Split queries separated by ; on to list of single queries @@ -63,6 +68,7 @@ std::pair splitMultipartQuery( std::vector & queries_list, size_t max_query_size, size_t max_parser_depth, + size_t max_parser_backtracks, bool allow_settings_after_format_in_insert); } diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 19947cd38cc..f0abc68f966 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -1,7 +1,4 @@ -#include -#include #include -#include #include #include #include @@ -10,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -54,12 +50,12 @@ TEST_P(ParserTest, parseQuery) { if (std::string(expected_ast).starts_with("throws")) { - EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0, 0), DB::Exception); } else { ASTPtr ast; - ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0, 0)); if (std::string("CREATE USER or ALTER USER query") != parser->getName() && std::string("ATTACH access entity query") != parser->getName()) { @@ -106,7 +102,7 @@ TEST_P(ParserTest, parseQuery) } else { - ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0, 0), DB::Exception); } } @@ -649,12 +645,13 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserKQLTest, static constexpr size_t kDummyMaxQuerySize = 256 * 1024; static constexpr size_t kDummyMaxParserDepth = 256; +static constexpr size_t kDummyMaxParserBacktracks = 1000000; INSTANTIATE_TEST_SUITE_P( ParserPRQL, ParserTest, ::testing::Combine( - ::testing::Values(std::make_shared(kDummyMaxQuerySize, kDummyMaxParserDepth)), + ::testing::Values(std::make_shared(kDummyMaxQuerySize, kDummyMaxParserDepth, kDummyMaxParserBacktracks)), ::testing::ValuesIn(std::initializer_list{ { "from albums\ngroup {author_id} (\n aggregate {first_published = min published}\n)\njoin a=author side:left (==author_id)\njoin p=purchases side:right (==author_id)\ngroup {a.id, p.purchase_id} (\n aggregate {avg_sell = min first_published}\n)", diff --git a/src/Parsers/tests/gtest_common.cpp b/src/Parsers/tests/gtest_common.cpp index 52d3ceb47e2..8ff9400d8a2 100644 --- a/src/Parsers/tests/gtest_common.cpp +++ b/src/Parsers/tests/gtest_common.cpp @@ -28,7 +28,7 @@ TEST_P(ParserRegexTest, parseQuery) ASSERT_TRUE(expected_ast); DB::ASTPtr ast; - ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0, 0)); DB::WriteBufferFromOwnString buf; formatAST(*ast->clone(), buf, false, false); EXPECT_THAT(buf.str(), ::testing::MatchesRegex(expected_ast)); @@ -45,12 +45,12 @@ TEST_P(ParserKQLTest, parseKQLQuery) { if (std::string(expected_ast).starts_with("throws")) { - EXPECT_THROW(parseKQLQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + EXPECT_THROW(parseKQLQuery(*parser, input_text.begin(), input_text.end(), 0, 0, 0), DB::Exception); } else { DB::ASTPtr ast; - ASSERT_NO_THROW(ast = parseKQLQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + ASSERT_NO_THROW(ast = parseKQLQuery(*parser, input_text.begin(), input_text.end(), 0, 0, 0)); if (std::string("CREATE USER or ALTER USER query") != parser->getName() && std::string("ATTACH access entity query") != parser->getName()) { @@ -78,6 +78,6 @@ TEST_P(ParserKQLTest, parseKQLQuery) } else { - ASSERT_THROW(parseKQLQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + ASSERT_THROW(parseKQLQuery(*parser, input_text.begin(), input_text.end(), 0, 0, 0), DB::Exception); } } diff --git a/src/Parsers/tests/gtest_dictionary_parser.cpp b/src/Parsers/tests/gtest_dictionary_parser.cpp index c0a975f7a38..a1ba46125a7 100644 --- a/src/Parsers/tests/gtest_dictionary_parser.cpp +++ b/src/Parsers/tests/gtest_dictionary_parser.cpp @@ -40,7 +40,7 @@ TEST(ParserDictionaryDDL, SimpleDictionary) " RANGE(MIN second_column MAX third_column)"; ParserCreateDictionaryQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTCreateQuery * create = ast->as(); EXPECT_EQ(create->getTable(), "dict1"); EXPECT_EQ(create->getDatabase(), "test"); @@ -136,7 +136,7 @@ TEST(ParserDictionaryDDL, AttributesWithMultipleProperties) " SOURCE(CLICKHOUSE(HOST 'localhost'))"; ParserCreateDictionaryQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTCreateQuery * create = ast->as(); EXPECT_EQ(create->getTable(), "dict2"); EXPECT_EQ(create->getDatabase(), ""); @@ -183,7 +183,7 @@ TEST(ParserDictionaryDDL, CustomAttributePropertiesOrder) " LIFETIME(300)"; ParserCreateDictionaryQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTCreateQuery * create = ast->as(); /// test attributes @@ -238,7 +238,7 @@ TEST(ParserDictionaryDDL, NestedSource) " RANGE(MIN second_column MAX third_column)"; ParserCreateDictionaryQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTCreateQuery * create = ast->as(); EXPECT_EQ(create->getTable(), "dict4"); EXPECT_EQ(create->getDatabase(), ""); @@ -286,7 +286,7 @@ TEST(ParserDictionaryDDL, Formatting) " RANGE(MIN second_column MAX third_column)"; ParserCreateDictionaryQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTCreateQuery * create = ast->as(); auto str = serializeAST(*create); EXPECT_EQ(str, "CREATE DICTIONARY test.dict5 (`key_column1` UInt64 DEFAULT 1 HIERARCHICAL INJECTIVE, `key_column2` String DEFAULT '', `second_column` UInt8 EXPRESSION intDiv(50, rand() % 1000), `third_column` UInt8) PRIMARY KEY key_column1, key_column2 SOURCE(MYSQL(HOST 'localhost' PORT 9000 USER 'default' REPLICA (HOST '127.0.0.1' PRIORITY 1) PASSWORD '')) LIFETIME(MIN 1 MAX 10) LAYOUT(CACHE(SIZE_IN_CELLS 50)) RANGE(MIN second_column MAX third_column)"); @@ -297,7 +297,7 @@ TEST(ParserDictionaryDDL, ParseDropQuery) String input1 = "DROP DICTIONARY test.dict1"; ParserDropQuery parser; - ASTPtr ast1 = parseQuery(parser, input1.data(), input1.data() + input1.size(), "", 0, 0); + ASTPtr ast1 = parseQuery(parser, input1.data(), input1.data() + input1.size(), "", 0, 0, 0); ASTDropQuery * drop1 = ast1->as(); EXPECT_TRUE(drop1->is_dictionary); @@ -308,7 +308,7 @@ TEST(ParserDictionaryDDL, ParseDropQuery) String input2 = "DROP DICTIONARY IF EXISTS dict2"; - ASTPtr ast2 = parseQuery(parser, input2.data(), input2.data() + input2.size(), "", 0, 0); + ASTPtr ast2 = parseQuery(parser, input2.data(), input2.data() + input2.size(), "", 0, 0, 0); ASTDropQuery * drop2 = ast2->as(); EXPECT_TRUE(drop2->is_dictionary); @@ -323,7 +323,7 @@ TEST(ParserDictionaryDDL, ParsePropertiesQueries) String input1 = "SHOW CREATE DICTIONARY test.dict1"; ParserTablePropertiesQuery parser; - ASTPtr ast1 = parseQuery(parser, input1.data(), input1.data() + input1.size(), "", 0, 0); + ASTPtr ast1 = parseQuery(parser, input1.data(), input1.data() + input1.size(), "", 0, 0, 0); ASTShowCreateDictionaryQuery * show1 = ast1->as(); EXPECT_EQ(show1->getTable(), "dict1"); @@ -332,7 +332,7 @@ TEST(ParserDictionaryDDL, ParsePropertiesQueries) String input2 = "EXISTS DICTIONARY dict2"; - ASTPtr ast2 = parseQuery(parser, input2.data(), input2.data() + input2.size(), "", 0, 0); + ASTPtr ast2 = parseQuery(parser, input2.data(), input2.data() + input2.size(), "", 0, 0, 0); ASTExistsDictionaryQuery * show2 = ast2->as(); EXPECT_EQ(show2->getTable(), "dict2"); diff --git a/src/Parsers/tests/gtest_format_hiliting.cpp b/src/Parsers/tests/gtest_format_hiliting.cpp index e87b093db9d..00e8197af1f 100644 --- a/src/Parsers/tests/gtest_format_hiliting.cpp +++ b/src/Parsers/tests/gtest_format_hiliting.cpp @@ -50,7 +50,7 @@ void compare(const String & expected, const String & query) { using namespace DB; ParserQuery parser(query.data() + query.size()); - ASTPtr ast = parseQuery(parser, query, 0, 0); + ASTPtr ast = parseQuery(parser, query, 0, 0, 0); WriteBufferFromOwnString write_buffer; IAST::FormatSettings settings(write_buffer, true, true); diff --git a/src/Planner/PlannerContext.cpp b/src/Planner/PlannerContext.cpp index f33255f0a44..f939b959ce7 100644 --- a/src/Planner/PlannerContext.cpp +++ b/src/Planner/PlannerContext.cpp @@ -48,6 +48,12 @@ PlannerContext::PlannerContext(ContextMutablePtr query_context_, GlobalPlannerCo , is_ast_level_optimization_allowed(!(query_context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY || select_query_options_.ignore_ast_optimizations)) {} +PlannerContext::PlannerContext(ContextMutablePtr query_context_, PlannerContextPtr planner_context_) + : query_context(std::move(query_context_)) + , global_planner_context(planner_context_->global_planner_context) + , is_ast_level_optimization_allowed(planner_context_->is_ast_level_optimization_allowed) +{} + TableExpressionData & PlannerContext::getOrCreateTableExpressionData(const QueryTreeNodePtr & table_expression_node) { auto [it, _] = table_expression_node_to_data.emplace(table_expression_node, TableExpressionData()); diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h index 4d9ba037cac..418240fa34e 100644 --- a/src/Planner/PlannerContext.h +++ b/src/Planner/PlannerContext.h @@ -75,12 +75,18 @@ private: using GlobalPlannerContextPtr = std::shared_ptr; +class PlannerContext; +using PlannerContextPtr = std::shared_ptr; + class PlannerContext { public: /// Create planner context with query context and global planner context PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_, const SelectQueryOptions & select_query_options_); + /// Create planner with modified query_context + PlannerContext(ContextMutablePtr query_context_, PlannerContextPtr planner_context_); + /// Get planner context query context ContextPtr getQueryContext() const { @@ -191,6 +197,4 @@ private: PreparedSets prepared_sets; }; -using PlannerContextPtr = std::shared_ptr; - } diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 7b3fb0c5c91..8ca8f0f258b 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -538,7 +538,7 @@ FilterDAGInfo buildAdditionalFiltersIfNeeded(const StoragePtr & storage, ParserExpression parser; additional_filter_ast = parseQuery( parser, filter.data(), filter.data() + filter.size(), - "additional filter", settings.max_query_size, settings.max_parser_depth); + "additional filter", settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); break; } } diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index bd0b831ee58..a04f9f502e2 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -523,7 +523,7 @@ ASTPtr parseAdditionalResultFilter(const Settings & settings) ParserExpression parser; auto additional_result_filter_ast = parseQuery( parser, additional_result_filter.data(), additional_result_filter.data() + additional_result_filter.size(), - "additional result filter", settings.max_query_size, settings.max_parser_depth); + "additional result filter", settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); return additional_result_filter_ast; } diff --git a/src/Processors/CMakeLists.txt b/src/Processors/CMakeLists.txt index 7e965188b4c..e69de29bb2d 100644 --- a/src/Processors/CMakeLists.txt +++ b/src/Processors/CMakeLists.txt @@ -1,4 +0,0 @@ -if (ENABLE_EXAMPLES) - add_subdirectory(examples) -endif () - diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index f91f7cf536b..9d056b42101 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -537,7 +537,7 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType( ParserArrayOfLiterals parser_array; ParserTupleOfLiterals parser_tuple; - IParser::Pos iterator(token_iterator, static_cast(settings.max_parser_depth)); + IParser::Pos iterator(token_iterator, static_cast(settings.max_parser_depth), static_cast(settings.max_parser_backtracks)); while (iterator->begin < istr.position()) ++iterator; Expected expected; diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp index 9c7f095e661..67bdd1cf877 100644 --- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp @@ -274,7 +274,8 @@ static bool tryToExtractStructureFromCreateQuery(ReadBuffer & in, NamesAndTypesL String error; const char * start = create_query_str.data(); const char * end = create_query_str.data() + create_query_str.size(); - ASTPtr query = tryParseQuery(parser, start, end, error, false, "MySQL create query", false, DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr query = tryParseQuery(parser, start, end, error, false, "MySQL create query", false, + DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS, true); if (!query) return false; diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 8659dcd2318..353de76eea8 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -194,7 +194,7 @@ void ValuesBlockInputFormat::readUntilTheEndOfRowAndReTokenize(size_t current_co auto * row_end = buf->position(); buf->rollbackToCheckpoint(); tokens.emplace(buf->position(), row_end); - token_iterator.emplace(*tokens, static_cast(context->getSettingsRef().max_parser_depth)); + token_iterator.emplace(*tokens, static_cast(context->getSettingsRef().max_parser_depth), static_cast(context->getSettingsRef().max_parser_backtracks)); auto const & first = (*token_iterator).get(); if (first.isError() || first.isEnd()) { @@ -418,7 +418,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx { Expected expected; /// Keep a copy to the start of the column tokens to use if later if necessary - ti_start = IParser::Pos(*token_iterator, static_cast(settings.max_parser_depth)); + ti_start = IParser::Pos(*token_iterator, static_cast(settings.max_parser_depth), static_cast(settings.max_parser_backtracks)); parsed = parser.parse(*token_iterator, ast, expected); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 21e3cfcceab..f1ce5a7802f 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -131,6 +131,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int TOO_MANY_ROWS; extern const int CANNOT_PARSE_TEXT; + extern const int PARAMETER_OUT_OF_BOUND; } static MergeTreeReaderSettings getMergeTreeReaderSettings( @@ -348,7 +349,14 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( /// We have a special logic for local replica. It has to read less data, because in some cases it should /// merge states of aggregate functions or do some other important stuff other than reading from Disk. - pool_settings.min_marks_for_concurrent_read = static_cast(pool_settings.min_marks_for_concurrent_read * context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier); + const auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; + if (auto result = pool_settings.min_marks_for_concurrent_read * multiplier; canConvertTo(result)) + pool_settings.min_marks_for_concurrent_read = static_cast(result); + else + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "Exceeded limit for the number of marks per a single task for parallel replicas. " + "Make sure that `parallel_replicas_single_task_marks_count_multiplier` is in some reasonable boundaries, current value is: {}", + multiplier); auto pool = std::make_shared( std::move(extension), @@ -512,8 +520,14 @@ Pipe ReadFromMergeTree::readInOrder( .columns_to_read = required_columns, }; - pool_settings.min_marks_for_concurrent_read = static_cast( - pool_settings.min_marks_for_concurrent_read * context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier); + const auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; + if (auto result = pool_settings.min_marks_for_concurrent_read * multiplier; canConvertTo(result)) + pool_settings.min_marks_for_concurrent_read = static_cast(result); + else + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "Exceeded limit for the number of marks per a single task for parallel replicas. " + "Make sure that `parallel_replicas_single_task_marks_count_multiplier` is in some reasonable boundaries, current value is: {}", + multiplier); CoordinationMode mode = read_type == ReadType::InOrder ? CoordinationMode::WithOrder @@ -1343,7 +1357,7 @@ static void buildIndexes( { const auto & indices = settings.ignore_data_skipping_indices.toString(); Tokens tokens(indices.data(), indices.data() + indices.size(), settings.max_query_size); - IParser::Pos pos(tokens, static_cast(settings.max_parser_depth)); + IParser::Pos pos(tokens, static_cast(settings.max_parser_depth), static_cast(settings.max_parser_backtracks)); Expected expected; /// Use an unordered list rather than string vector diff --git a/src/Processors/examples/CMakeLists.txt b/src/Processors/examples/CMakeLists.txt deleted file mode 100644 index 5d43a0d7d08..00000000000 --- a/src/Processors/examples/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -if (TARGET ch_contrib::hivemetastore) - clickhouse_add_executable (comma_separated_streams comma_separated_streams.cpp) - target_link_libraries (comma_separated_streams PRIVATE dbms) -endif() diff --git a/src/Processors/examples/comma_separated_streams.cpp b/src/Processors/examples/comma_separated_streams.cpp deleted file mode 100644 index 2ec5564f346..00000000000 --- a/src/Processors/examples/comma_separated_streams.cpp +++ /dev/null @@ -1,117 +0,0 @@ -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace DB; - -int main() -try -{ - Block sample; - { - // a - ColumnWithTypeAndName col; - col.name = "a"; - col.type = std::make_shared(); - sample.insert(std::move(col)); - } - { - // b - ColumnWithTypeAndName col; - col.name = "b"; - col.type = std::make_shared(); - sample.insert(std::move(col)); - } - { - // c - ColumnWithTypeAndName col; - col.name = "c"; - col.type = std::make_shared(); - sample.insert(std::move(col)); - } - { - // d - ColumnWithTypeAndName col; - col.name = "d"; - col.type = std::make_shared(); - sample.insert(std::move(col)); - } - { - // e - ColumnWithTypeAndName col; - col.name = "e"; - col.type = std::make_shared(); - sample.insert(std::move(col)); - } - { - // f - ColumnWithTypeAndName col; - col.name = "f"; - col.type = std::make_shared(); - sample.insert(std::move(col)); - } - { - // g - ColumnWithTypeAndName col; - col.name = "g"; - col.type = std::make_shared(); - sample.insert(std::move(col)); - } - { - // h - ColumnWithTypeAndName col; - col.name = "h"; - col.type = std::make_shared(); - sample.insert(std::move(col)); - } - - - ReadBufferFromFile in_buf("test_in"); - WriteBufferFromFile out_buf("test_out"); - - FormatSettings format_settings; - format_settings.with_names_use_header = true; - format_settings.skip_unknown_fields = true; - format_settings.csv.delimiter = '\x01'; - format_settings.hive_text.input_field_names = - { - "d", - "e", - "f", - "a", - "b", - "c", - "g", - "h", - "i", - "j", - }; - - RowInputFormatParams in_params{DEFAULT_INSERT_BLOCK_SIZE}; - InputFormatPtr input_format = std::make_shared(sample, in_buf, in_params, format_settings); - auto pipeline = QueryPipeline(std::move(input_format)); - auto reader = std::make_unique(pipeline); - - OutputFormatPtr output_format = std::make_shared(out_buf, sample, true, true, format_settings); - Block res; - while (reader->pull(res)) - { - output_format->write(res); - } - return 0; -} -catch (...) -{ - std::cerr << getCurrentExceptionMessage(true) << '\n'; - return 1; -} diff --git a/src/Processors/examples/test_in b/src/Processors/examples/test_in deleted file mode 100644 index c7df97a26a6..00000000000 --- a/src/Processors/examples/test_in +++ /dev/null @@ -1,8 +0,0 @@ -2021-09-14JPall20.0200 -2021-09-14CIall20.0100 -2021-09-14JMall40.25411 -2021-09-14MMall310.19354838709677422766 -2021-09-14TZAndroid30.3333333333333333311 -2021-09-14SGall80.25412 -2021-09-14PYall11.0001 -2021-09-14MXall10.0100 diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 15765f99b4b..f21991e8d58 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -885,7 +885,7 @@ namespace const char * begin = query_text.data(); const char * end = begin + query_text.size(); ParserQuery parser(end, settings.allow_settings_after_format_in_insert); - ast = parseQuery(parser, begin, end, "", settings.max_query_size, settings.max_parser_depth); + ast = parseQuery(parser, begin, end, "", settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); /// Choose input format. insert_query = ast->as(); diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp index c62dc8109ea..83e06628185 100644 --- a/src/Server/PostgreSQLHandler.cpp +++ b/src/Server/PostgreSQLHandler.cpp @@ -284,6 +284,7 @@ void PostgreSQLHandler::processQuery() auto parse_res = splitMultipartQuery(query->query, queries, settings.max_query_size, settings.max_parser_depth, + settings.max_parser_backtracks, settings.allow_settings_after_format_in_insert); if (!parse_res.second) throw Exception(ErrorCodes::SYNTAX_ERROR, "Cannot parse and execute the following part of query: {}", String(parse_res.first)); diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index e08dac3a332..16b89f24243 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -145,7 +145,7 @@ void ColumnDescription::readText(ReadBuffer & buf) readEscapedStringUntilEOL(modifiers, buf); ParserColumnDeclaration column_parser(/* require type */ true); - ASTPtr ast = parseQuery(column_parser, "x T " + modifiers, "column parser", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr ast = parseQuery(column_parser, "x T " + modifiers, "column parser", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); if (auto * col_ast = ast->as()) { @@ -211,7 +211,7 @@ void ColumnsDescription::setAliases(NamesAndAliases aliases) const char * alias_expression_pos = alias.expression.data(); const char * alias_expression_end = alias_expression_pos + alias.expression.size(); ParserExpression expression_parser; - description.default_desc.expression = parseQuery(expression_parser, alias_expression_pos, alias_expression_end, "expression", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + description.default_desc.expression = parseQuery(expression_parser, alias_expression_pos, alias_expression_end, "expression", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); add(std::move(description)); } diff --git a/src/Storages/ConstraintsDescription.cpp b/src/Storages/ConstraintsDescription.cpp index 219c3fd0c97..d492de2c2b2 100644 --- a/src/Storages/ConstraintsDescription.cpp +++ b/src/Storages/ConstraintsDescription.cpp @@ -45,7 +45,7 @@ ConstraintsDescription ConstraintsDescription::parse(const String & str) ConstraintsDescription res; ParserConstraintDeclarationList parser; - ASTPtr list = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr list = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); for (const auto & constraint : list->children) res.constraints.push_back(constraint); diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp index c723fa4225c..14555dca63b 100644 --- a/src/Storages/IndicesDescription.cpp +++ b/src/Storages/IndicesDescription.cpp @@ -173,7 +173,7 @@ IndicesDescription IndicesDescription::parse(const String & str, const ColumnsDe return result; ParserIndexDeclarationList parser; - ASTPtr list = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr list = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); for (const auto & index : list->children) result.emplace_back(IndexDescription::getIndexFromAST(index, columns, context)); diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index c407cef627d..d63b40e2b11 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -171,7 +171,7 @@ KeyDescription KeyDescription::parse(const String & str, const ColumnsDescriptio return result; ParserExpression parser; - ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); FunctionNameNormalizer().visit(ast.get()); return getKeyFromAST(ast, columns, context); diff --git a/src/Storages/MemorySettings.h b/src/Storages/MemorySettings.h index 9e1a8db3595..ac6cdf73329 100644 --- a/src/Storages/MemorySettings.h +++ b/src/Storages/MemorySettings.h @@ -10,6 +10,10 @@ class ASTStorage; #define MEMORY_SETTINGS(M, ALIAS) \ M(Bool, compress, false, "Compress data in memory", 0) \ + M(UInt64, min_rows_to_keep, 0, "Minimum block size (in rows) to retain in Memory table buffer.", 0) \ + M(UInt64, max_rows_to_keep, 0, "Maximum block size (in rows) to retain in Memory table buffer.", 0) \ + M(UInt64, min_bytes_to_keep, 0, "Minimum block size (in bytes) to retain in Memory table buffer.", 0) \ + M(UInt64, max_bytes_to_keep, 0, "Maximum block size (in bytes) to retain in Memory table buffer.", 0) \ DECLARE_SETTINGS_TRAITS(memorySettingsTraits, MEMORY_SETTINGS) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 0245baa836c..023202019e4 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -609,6 +609,15 @@ UInt64 IMergeTreeDataPart::getMarksCount() const return index_granularity.getMarksCount(); } +UInt64 IMergeTreeDataPart::getExistingBytesOnDisk() const +{ + if (storage.getSettings()->exclude_deleted_rows_for_part_size_in_merge && supportLightweightDeleteMutate() && hasLightweightDelete() + && existing_rows_count.has_value() && existing_rows_count.value() < rows_count && rows_count > 0) + return bytes_on_disk * existing_rows_count.value() / rows_count; + else + return bytes_on_disk; +} + size_t IMergeTreeDataPart::getFileSizeOrZero(const String & file_name) const { auto checksum = checksums.files.find(file_name); @@ -691,6 +700,7 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks calculateColumnsAndSecondaryIndicesSizesOnDisk(); loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`. + loadExistingRowsCount(); /// Must be called after loadRowsCount() as it uses the value of `rows_count`. loadPartitionAndMinMaxIndex(); if (!parent_part) { @@ -947,7 +957,7 @@ void IMergeTreeDataPart::loadDefaultCompressionCodec() try { ParserCodec codec_parser; - auto codec_ast = parseQuery(codec_parser, codec_line.data() + buf.getPosition(), codec_line.data() + codec_line.length(), "codec parser", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + auto codec_ast = parseQuery(codec_parser, codec_line.data() + buf.getPosition(), codec_line.data() + codec_line.length(), "codec parser", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); default_codec = CompressionCodecFactory::instance().get(codec_ast, {}); } catch (const DB::Exception & ex) @@ -1334,6 +1344,87 @@ void IMergeTreeDataPart::loadRowsCount() } } +void IMergeTreeDataPart::loadExistingRowsCount() +{ + if (existing_rows_count.has_value()) + return; + + if (!rows_count || !storage.getSettings()->load_existing_rows_count_for_old_parts || !supportLightweightDeleteMutate() + || !hasLightweightDelete()) + existing_rows_count = rows_count; + else + existing_rows_count = readExistingRowsCount(); +} + +UInt64 IMergeTreeDataPart::readExistingRowsCount() +{ + const size_t total_mark = getMarksCount(); + if (!total_mark) + return rows_count; + + NamesAndTypesList cols; + cols.emplace_back(RowExistsColumn::name, RowExistsColumn::type); + + StorageMetadataPtr metadata_ptr = storage.getInMemoryMetadataPtr(); + StorageSnapshotPtr storage_snapshot_ptr = std::make_shared(storage, metadata_ptr); + + MergeTreeReaderPtr reader = getReader( + cols, + storage_snapshot_ptr, + MarkRanges{MarkRange(0, total_mark)}, + /*virtual_fields=*/ {}, + /*uncompressed_cache=*/{}, + storage.getContext()->getMarkCache().get(), + std::make_shared(), + MergeTreeReaderSettings{}, + ValueSizeMap{}, + ReadBufferFromFileBase::ProfileCallback{}); + + if (!reader) + { + LOG_WARNING(storage.log, "Create reader failed while reading existing rows count"); + return rows_count; + } + + size_t current_mark = 0; + bool continue_reading = false; + size_t current_row = 0; + size_t existing_count = 0; + + while (current_row < rows_count) + { + size_t rows_to_read = index_granularity.getMarkRows(current_mark); + continue_reading = (current_mark != 0); + + Columns result; + result.resize(1); + + size_t rows_read = reader->readRows(current_mark, total_mark, continue_reading, rows_to_read, result); + if (!rows_read) + { + LOG_WARNING(storage.log, "Part {} has lightweight delete, but _row_exists column not found", name); + return rows_count; + } + + current_row += rows_read; + current_mark += (rows_to_read == rows_read); + + const ColumnUInt8 * row_exists_col = typeid_cast(result[0].get()); + if (!row_exists_col) + { + LOG_WARNING(storage.log, "Part {} _row_exists column type is not UInt8", name); + return rows_count; + } + + for (UInt8 row_exists : row_exists_col->getData()) + if (row_exists) + existing_count++; + } + + LOG_DEBUG(storage.log, "Part {} existing_rows_count = {}", name, existing_count); + return existing_count; +} + void IMergeTreeDataPart::appendFilesOfRowsCount(Strings & files) { files.push_back("count.txt"); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index aaae64a5970..8bd32e777bc 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -231,6 +231,9 @@ public: size_t rows_count = 0; + /// Existing rows count (excluding lightweight deleted rows) + std::optional existing_rows_count; + time_t modification_time = 0; /// When the part is removed from the working set. Changes once. mutable std::atomic remove_time { std::numeric_limits::max() }; @@ -373,6 +376,10 @@ public: void setBytesOnDisk(UInt64 bytes_on_disk_) { bytes_on_disk = bytes_on_disk_; } void setBytesUncompressedOnDisk(UInt64 bytes_uncompressed_on_disk_) { bytes_uncompressed_on_disk = bytes_uncompressed_on_disk_; } + /// Returns estimated size of existing rows if setting exclude_deleted_rows_for_part_size_in_merge is true + /// Otherwise returns bytes_on_disk + UInt64 getExistingBytesOnDisk() const; + size_t getFileSizeOrZero(const String & file_name) const; auto getFilesChecksums() const { return checksums.files; } @@ -499,6 +506,9 @@ public: /// True if here is lightweight deleted mask file in part. bool hasLightweightDelete() const; + /// Read existing rows count from _row_exists column + UInt64 readExistingRowsCount(); + void writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings); /// Checks the consistency of this data part. @@ -664,6 +674,9 @@ private: /// For the older format version calculates rows count from the size of a column with a fixed size. void loadRowsCount(); + /// Load existing rows count from _row_exists column if load_existing_rows_count_for_old_parts is true. + void loadExistingRowsCount(); + static void appendFilesOfRowsCount(Strings & files); /// Loads ttl infos in json format from file ttl.txt. If file doesn't exists assigns ttl infos with all zeros diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index ae6e398026d..5ef004ec019 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -174,7 +174,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() } /// Start to make the main work - size_t estimated_space_for_merge = MergeTreeDataMergerMutator::estimateNeededDiskSpace(parts); + size_t estimated_space_for_merge = MergeTreeDataMergerMutator::estimateNeededDiskSpace(parts, true); /// Can throw an exception while reserving space. IMergeTreeDataPart::TTLInfos ttl_infos; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 8569d61e263..aacac5366a9 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8263,6 +8263,7 @@ std::pair MergeTreeData::createE new_data_part->setColumns(columns, {}, metadata_snapshot->getMetadataVersion()); new_data_part->rows_count = block.rows(); + new_data_part->existing_rows_count = block.rows(); new_data_part->partition = partition; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 1bf1d4a3c29..53d49b51e8f 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -405,7 +405,7 @@ MergeTreeDataMergerMutator::MergeSelectingInfo MergeTreeDataMergerMutator::getPo } IMergeSelector::Part part_info; - part_info.size = part->getBytesOnDisk(); + part_info.size = part->getExistingBytesOnDisk(); part_info.age = res.current_time - part->modification_time; part_info.level = part->info.level; part_info.data = ∂ @@ -611,7 +611,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti return SelectPartsDecision::CANNOT_SELECT; } - sum_bytes += (*it)->getBytesOnDisk(); + sum_bytes += (*it)->getExistingBytesOnDisk(); prev_it = it; ++it; @@ -793,7 +793,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart } -size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts) +size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & account_for_deleted) { size_t res = 0; time_t current_time = std::time(nullptr); @@ -804,7 +804,10 @@ size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData:: if (part_max_ttl && part_max_ttl <= current_time) continue; - res += part->getBytesOnDisk(); + if (account_for_deleted) + res += part->getExistingBytesOnDisk(); + else + res += part->getBytesOnDisk(); } return static_cast(res * DISK_USAGE_COEFFICIENT_TO_RESERVE); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index f3a3f51b6c3..669ee040af3 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -193,7 +193,7 @@ public: /// The approximate amount of disk space needed for merge or mutation. With a surplus. - static size_t estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts); + static size_t estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & account_for_deleted = false); private: /** Select all parts belonging to the same partition. diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 1721fd15b8d..fe45d0bee54 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -12,7 +12,7 @@ namespace ErrorCodes static CompressionCodecPtr getMarksCompressionCodec(const String & marks_compression_codec) { ParserCodec codec_parser; - auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); return CompressionCodecFactory::instance().get(ast, nullptr); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index fd83d2ebfe9..a31da5bc4fe 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -243,7 +243,7 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex() if (compress_primary_key) { ParserCodec codec_parser; - auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.primary_key_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.primary_key_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); CompressionCodecPtr primary_key_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr); index_compressor_stream = std::make_unique(*index_file_hashing_stream, primary_key_compression_codec, settings.primary_key_compress_block_size); index_source_hashing_stream = std::make_unique(*index_compressor_stream); @@ -268,7 +268,7 @@ void MergeTreeDataPartWriterOnDisk::initStatistics() void MergeTreeDataPartWriterOnDisk::initSkipIndices() { ParserCodec codec_parser; - auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr); for (const auto & skip_index : skip_indices) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index d79590ded21..6a3b08d4d65 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -135,7 +135,7 @@ void MergeTreeDataPartWriterWide::addStreams( compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, nullptr, default_codec, true); ParserCodec codec_parser; - auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr); const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), column.getNameInStorage()); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index ef679b61a79..6471f510291 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -606,7 +606,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd Strings forced_indices; { Tokens tokens(indices.data(), indices.data() + indices.size(), settings.max_query_size); - IParser::Pos pos(tokens, static_cast(settings.max_parser_depth)); + IParser::Pos pos(tokens, static_cast(settings.max_parser_depth), static_cast(settings.max_parser_backtracks)); Expected expected; if (!parseIdentifiersOrStringLiterals(pos, expected, forced_indices)) throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse force_data_skipping_indices ('{}')", indices); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index fdac16ae19a..cadd94867ec 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -241,7 +241,7 @@ std::vector scatterAsyncInsertInfoBySelector(AsyncInsertInfo } BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts( - const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, AsyncInsertInfoPtr async_insert_info) + Block && block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, AsyncInsertInfoPtr async_insert_info) { BlocksWithPartition result; if (!block || !block.rows()) @@ -320,7 +320,7 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts( } Block MergeTreeDataWriter::mergeBlock( - const Block & block, + Block && block, SortDescription sort_description, const Names & partition_key_columns, IColumn::Permutation *& permutation, @@ -410,7 +410,11 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartWithoutPref } MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( - BlockWithPartition & block_with_partition, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, int64_t block_number, bool need_tmp_prefix) + BlockWithPartition & block_with_partition, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr context, + int64_t block_number, + bool need_tmp_prefix) { TemporaryPart temp_part; Block & block = block_with_partition.block; @@ -498,7 +502,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( if (context->getSettingsRef().optimize_on_insert) { ProfileEventTimeIncrement watch(ProfileEvents::MergeTreeDataWriterMergingBlocksMicroseconds); - block = mergeBlock(block, sort_description, partition_key_columns, perm_ptr, data.merging_params); + block = mergeBlock(std::move(block), sort_description, partition_key_columns, perm_ptr, data.merging_params); } /// Size of part would not be greater than block.bytes() + epsilon @@ -537,6 +541,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( new_data_part->setColumns(columns, infos, metadata_snapshot->getMetadataVersion()); new_data_part->rows_count = block.rows(); + new_data_part->existing_rows_count = block.rows(); new_data_part->partition = std::move(partition); new_data_part->minmax_idx = std::move(minmax_idx); new_data_part->is_temp = true; @@ -717,7 +722,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( MergeTreeData::MergingParams projection_merging_params; projection_merging_params.mode = MergeTreeData::MergingParams::Aggregating; - block = mergeBlock(block, sort_description, {}, perm_ptr, projection_merging_params); + block = mergeBlock(std::move(block), sort_description, {}, perm_ptr, projection_merging_params); } /// This effectively chooses minimal compression method: diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index 8fb8b82dbe6..9dffea0a471 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -53,7 +53,7 @@ public: * (split rows by partition) * Works deterministically: if same block was passed, function will return same result in same order. */ - static BlocksWithPartition splitBlockIntoParts(const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, AsyncInsertInfoPtr async_insert_info = nullptr); + static BlocksWithPartition splitBlockIntoParts(Block && block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, AsyncInsertInfoPtr async_insert_info = nullptr); /// This structure contains not completely written temporary part. /// Some writes may happen asynchronously, e.g. for blob storages. @@ -107,7 +107,7 @@ public: size_t block_num); static Block mergeBlock( - const Block & block, + Block && block, SortDescription sort_description, const Names & partition_key_columns, IColumn::Permutation *& permutation, diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 48bf3eacd88..9c67a86997b 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -42,6 +42,7 @@ struct Settings; M(UInt64, compact_parts_max_bytes_to_buffer, 128 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \ M(UInt64, compact_parts_max_granules_to_buffer, 128, "Only available in ClickHouse Cloud", 0) \ M(UInt64, compact_parts_merge_max_bytes_to_prefetch_part, 16 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \ + M(Bool, load_existing_rows_count_for_old_parts, false, "Whether to load existing_rows_count for existing parts. If false, existing_rows_count will be equal to rows_count for existing parts.", 0) \ \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ @@ -79,6 +80,7 @@ struct Settings; M(UInt64, number_of_mutations_to_throw, 1000, "If table has at least that many unfinished mutations, throw 'Too many mutations' exception. Disabled if set to 0", 0) \ M(UInt64, min_delay_to_mutate_ms, 10, "Min delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \ M(UInt64, max_delay_to_mutate_ms, 1000, "Max delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \ + M(Bool, exclude_deleted_rows_for_part_size_in_merge, false, "Use an estimated source part size (excluding lightweight deleted rows) when selecting parts to merge", 0) \ \ /** Inserts settings. */ \ M(UInt64, parts_to_delay_insert, 1000, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 87b0a04d244..2da7565daad 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -63,7 +63,7 @@ void MergeTreeSink::consume(Chunk chunk) if (!storage_snapshot->object_columns.empty()) convertDynamicColumnsToTuples(block, storage_snapshot); - auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context); + auto part_blocks = storage.writer.splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context); using DelayedPartitions = std::vector; DelayedPartitions partitions; @@ -87,6 +87,10 @@ void MergeTreeSink::consume(Chunk chunk) elapsed_ns = watch.elapsed(); } + /// Reset earlier to free memory + current_block.block.clear(); + current_block.partition.clear(); + /// If optimize_on_insert setting is true, current_block could become empty after merge /// and we didn't create part. if (!temp_part.part) diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index f2fe2e0f255..d8555d69788 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -188,6 +188,11 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( new_part->index_granularity = writer->getIndexGranularity(); new_part->calculateColumnsAndSecondaryIndicesSizesOnDisk(); + /// In mutation, existing_rows_count is already calculated in PartMergerWriter + /// In merge situation, lightweight deleted rows was physically deleted, existing_rows_count equals rows_count + if (!new_part->existing_rows_count.has_value()) + new_part->existing_rows_count = rows_count; + if (default_codec != nullptr) new_part->default_codec = default_codec; diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index a9ff687fe4d..620b0e34c6a 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -49,7 +49,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() } /// TODO - some better heuristic? - size_t estimated_space_for_result = MergeTreeDataMergerMutator::estimateNeededDiskSpace({source_part}); + size_t estimated_space_for_result = MergeTreeDataMergerMutator::estimateNeededDiskSpace({source_part}, false); if (entry.create_time + storage_settings_ptr->prefer_fetch_merged_part_time_threshold.totalSeconds() <= time(nullptr) && estimated_space_for_result >= storage_settings_ptr->prefer_fetch_merged_part_size_threshold) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 150cc27c369..bfdc109a89d 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -60,6 +60,26 @@ static bool checkOperationIsNotCanceled(ActionBlocker & merges_blocker, MergeLis return true; } +static UInt64 getExistingRowsCount(const Block & block) +{ + auto column = block.getByName(RowExistsColumn::name).column; + const ColumnUInt8 * row_exists_col = typeid_cast(column.get()); + + if (!row_exists_col) + { + LOG_WARNING(&Poco::Logger::get("MutationHelpers::getExistingRowsCount"), "_row_exists column type is not UInt8"); + return block.rows(); + } + + UInt64 existing_count = 0; + + for (UInt8 row_exists : row_exists_col->getData()) + if (row_exists) + existing_count++; + + return existing_count; +} + /** Split mutation commands into two parts: * First part should be executed by mutations interpreter. * Other is just simple drop/renames, so they can be executed without interpreter. @@ -997,6 +1017,9 @@ struct MutationContext bool need_prefix = true; scope_guard temporary_directory_lock; + + /// Whether we need to count lightweight delete rows in this mutation + bool count_lightweight_deleted_rows; }; using MutationContextPtr = std::shared_ptr; @@ -1191,6 +1214,7 @@ public: } case State::SUCCESS: { + finalize(); return false; } } @@ -1226,6 +1250,11 @@ private: const ProjectionsDescription & projections; ExecutableTaskPtr merge_projection_parts_task_ptr; + + /// Existing rows count calculated during part writing. + /// It is initialized in prepare(), calculated in mutateOriginalPartAndPrepareProjections() + /// and set to new_data_part in finalize() + size_t existing_rows_count; }; @@ -1238,6 +1267,8 @@ void PartMergerWriter::prepare() // We split the materialization into multiple stages similar to the process of INSERT SELECT query. projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); } + + existing_rows_count = 0; } @@ -1251,6 +1282,10 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ctx->out->write(cur_block); + /// TODO: move this calculation to DELETE FROM mutation + if (ctx->count_lightweight_deleted_rows) + existing_rows_count += MutationHelpers::getExistingRowsCount(cur_block); + for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { const auto & projection = *ctx->projections_to_build[i]; @@ -1340,6 +1375,12 @@ bool PartMergerWriter::iterateThroughAllProjections() return true; } +void PartMergerWriter::finalize() +{ + if (ctx->count_lightweight_deleted_rows) + ctx->new_data_part->existing_rows_count = existing_rows_count; +} + class MutateAllPartColumnsTask : public IExecutableTask { public: @@ -2185,6 +2226,20 @@ bool MutateTask::prepare() if (ctx->mutating_pipeline_builder.initialized()) ctx->execute_ttl_type = MutationHelpers::shouldExecuteTTL(ctx->metadata_snapshot, ctx->interpreter->getColumnDependencies()); + if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && ctx->updated_header.has(RowExistsColumn::name)) + { + /// This mutation contains lightweight delete and we need to count the deleted rows, + /// Reset existing_rows_count of new data part to 0 and it will be updated while writing _row_exists column + ctx->count_lightweight_deleted_rows = true; + } + else + { + ctx->count_lightweight_deleted_rows = false; + + /// No need to count deleted rows, copy existing_rows_count from source part + ctx->new_data_part->existing_rows_count = ctx->source_part->existing_rows_count.value_or(ctx->source_part->rows_count); + } + /// All columns from part are changed and may be some more that were missing before in part /// TODO We can materialize compact part without copying data if (!isWidePart(ctx->source_part) || !isFullPartStorage(ctx->source_part->getDataPartStorage()) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 858eae4afd9..42f564f40da 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1350,7 +1350,10 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( auto part = data.getPartIfExists(name, {MergeTreeDataPartState::PreActive, MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}); if (part) { - sum_parts_size_in_bytes += part->getBytesOnDisk(); + if (entry.type == LogEntry::MERGE_PARTS) + sum_parts_size_in_bytes += part->getExistingBytesOnDisk(); + else + sum_parts_size_in_bytes += part->getBytesOnDisk(); if (entry.type == LogEntry::MUTATE_PART && !storage.mutation_backoff_policy.partCanBeMutated(part->name)) { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index e50b4007d64..768a680d9bd 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -288,7 +288,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts"); } - auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context, async_insert_info); + auto part_blocks = storage.writer.splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); using DelayedPartition = typename ReplicatedMergeTreeSinkImpl::DelayedChunk::Partition; using DelayedPartitions = std::vector; @@ -383,6 +383,12 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) partitions = DelayedPartitions{}; } + if constexpr (!async_insert) + { + /// Reset earlier to free memory. + current_block.block.clear(); + current_block.partition.clear(); + } partitions.emplace_back(DelayedPartition( log, diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 41188891118..0ca7a4d74d9 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -432,7 +432,7 @@ StorageInMemoryMetadata ReplicatedMergeTreeTableMetadata::Diff::getNewMetadata(c auto parse_key_expr = [] (const String & key_expr) { ParserNotEmptyExpressionList parser(false); - auto new_sorting_key_expr_list = parseQuery(parser, key_expr, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + auto new_sorting_key_expr_list = parseQuery(parser, key_expr, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); ASTPtr order_by_ast; if (new_sorting_key_expr_list->children.size() == 1) @@ -489,7 +489,7 @@ StorageInMemoryMetadata ReplicatedMergeTreeTableMetadata::Diff::getNewMetadata(c if (!new_ttl_table.empty()) { ParserTTLExpressionList parser; - auto ttl_for_table_ast = parseQuery(parser, new_ttl_table, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + auto ttl_for_table_ast = parseQuery(parser, new_ttl_table, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); new_metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST( ttl_for_table_ast, new_metadata.columns, context, new_metadata.primary_key, true /* allow_suspicious; because it is replication */); } diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp index f6ec277c270..aaf5c1b5d87 100644 --- a/src/Storages/MutationCommands.cpp +++ b/src/Storages/MutationCommands.cpp @@ -228,7 +228,7 @@ void MutationCommands::readText(ReadBuffer & in) ParserAlterCommandList p_alter_commands; auto commands_ast = parseQuery( - p_alter_commands, commands_str.data(), commands_str.data() + commands_str.length(), "mutation commands list", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + p_alter_commands, commands_str.data(), commands_str.data() + commands_str.length(), "mutation commands list", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); for (const auto & child : commands_ast->children) { diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index 64d329f74b2..f686fbda664 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -479,7 +479,7 @@ ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery( ASTPtr result; Tokens tokens(attr.attr_def.data(), attr.attr_def.data() + attr.attr_def.size()); - IParser::Pos pos(tokens, DBMS_DEFAULT_MAX_PARSER_DEPTH); + IParser::Pos pos(tokens, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); if (!expr_parser.parse(pos, result, expected)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse default expression: {}", attr.attr_def); diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 08ebe3a10d0..0bcbedee41a 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -341,7 +341,7 @@ ProjectionsDescription ProjectionsDescription::parse(const String & str, const C return result; ParserProjectionDeclarationList parser; - ASTPtr list = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr list = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); for (const auto & projection_ast : list->children) { diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 7a8fb9feeda..c6222d2124e 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -46,6 +46,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int CANNOT_RESTORE_TABLE; extern const int NOT_IMPLEMENTED; + extern const int SETTING_CONSTRAINT_VIOLATION; } class MemorySink : public SinkToStorage @@ -103,16 +104,37 @@ public: std::lock_guard lock(storage.mutex); auto new_data = std::make_unique(*(storage.data.get())); + UInt64 new_total_rows = storage.total_size_rows.load(std::memory_order_relaxed) + inserted_rows; + UInt64 new_total_bytes = storage.total_size_bytes.load(std::memory_order_relaxed) + inserted_bytes; + while (!new_data->empty() + && ((storage.max_bytes_to_keep && new_total_bytes > storage.max_bytes_to_keep) + || (storage.max_rows_to_keep && new_total_rows > storage.max_rows_to_keep))) + { + Block oldest_block = new_data->front(); + UInt64 rows_to_remove = oldest_block.rows(); + UInt64 bytes_to_remove = oldest_block.allocatedBytes(); + if (new_total_bytes - bytes_to_remove < storage.min_bytes_to_keep + || new_total_rows - rows_to_remove < storage.min_rows_to_keep) + { + break; // stop - removing next block will put us under min_bytes / min_rows threshold + } + + // delete old block from current storage table + new_total_rows -= rows_to_remove; + new_total_bytes -= bytes_to_remove; + new_data->erase(new_data->begin()); + } + + // append new data to modified storage table and commit new_data->insert(new_data->end(), new_blocks.begin(), new_blocks.end()); storage.data.set(std::move(new_data)); - storage.total_size_bytes.fetch_add(inserted_bytes, std::memory_order_relaxed); - storage.total_size_rows.fetch_add(inserted_rows, std::memory_order_relaxed); + storage.total_size_rows.store(new_total_rows, std::memory_order_relaxed); + storage.total_size_bytes.store(new_total_bytes, std::memory_order_relaxed); } private: Blocks new_blocks; - StorageMemory & storage; StorageSnapshotPtr storage_snapshot; }; @@ -123,8 +145,10 @@ StorageMemory::StorageMemory( ColumnsDescription columns_description_, ConstraintsDescription constraints_, const String & comment, - bool compress_) - : IStorage(table_id_), data(std::make_unique()), compress(compress_) + const MemorySettings & settings) + : IStorage(table_id_), data(std::make_unique()), compress(settings.compress), + min_rows_to_keep(settings.min_rows_to_keep), max_rows_to_keep(settings.max_rows_to_keep), + min_bytes_to_keep(settings.min_bytes_to_keep), max_bytes_to_keep(settings.max_bytes_to_keep) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(std::move(columns_description_)); @@ -542,7 +566,11 @@ void registerStorageMemory(StorageFactory & factory) if (has_settings) settings.loadFromQuery(*args.storage_def); - return std::make_shared(args.table_id, args.columns, args.constraints, args.comment, settings.compress); + if (settings.min_bytes_to_keep > settings.max_bytes_to_keep + || settings.min_rows_to_keep > settings.max_rows_to_keep) + throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION, "Min. bytes / rows must be set with a max."); + + return std::make_shared(args.table_id, args.columns, args.constraints, args.comment, settings); }, { .supports_settings = true, diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index 3293e5e4fe5..13f1c971d82 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -7,6 +7,7 @@ #include #include #include +#include #include @@ -30,7 +31,7 @@ public: ColumnsDescription columns_description_, ConstraintsDescription constraints_, const String & comment, - bool compress_ = false); + const MemorySettings & settings = MemorySettings()); String getName() const override { return "Memory"; } @@ -134,6 +135,11 @@ private: std::atomic total_size_rows = 0; bool compress; + UInt64 min_rows_to_keep; + UInt64 max_rows_to_keep; + UInt64 min_bytes_to_keep; + UInt64 max_bytes_to_keep; + friend class ReadFromMemoryStorageStep; }; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index e07bcf339c3..9161b2773da 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -422,6 +422,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu std::vector> pipelines; auto table_it = selected_tables.begin(); + auto modified_context = Context::createCopy(context); for (size_t i = 0; i < selected_tables.size(); ++i, ++table_it) { auto & child_plan = child_plans->at(i); @@ -438,7 +439,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu if (child_plan.row_policy_data_opt) child_plan.row_policy_data_opt->extendNames(real_column_names); - auto modified_query_info = getModifiedQueryInfo(context, table, nested_storage_snaphsot, real_column_names, column_names_as_aliases, aliases); + auto modified_query_info = getModifiedQueryInfo(modified_context, table, nested_storage_snaphsot, real_column_names, column_names_as_aliases, aliases); auto source_pipeline = createSources( child_plan.plan, @@ -547,9 +548,10 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ } /// Settings will be modified when planning children tables. - auto modified_context = Context::createCopy(context); for (const auto & table : selected_tables) { + auto modified_context = Context::createCopy(context); + size_t current_need_streams = tables_count >= num_streams ? 1 : (num_streams / tables_count); size_t current_streams = std::min(current_need_streams, remaining_streams); remaining_streams -= current_streams; @@ -570,25 +572,25 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ auto & aliases = res.back().table_aliases; auto & row_policy_data_opt = res.back().row_policy_data_opt; auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr(); - auto nested_storage_snaphsot = storage->getStorageSnapshot(storage_metadata_snapshot, context); + auto nested_storage_snaphsot = storage->getStorageSnapshot(storage_metadata_snapshot, modified_context); Names column_names_as_aliases; Names real_column_names = column_names; const auto & database_name = std::get<0>(table); const auto & table_name = std::get<3>(table); - auto row_policy_filter_ptr = context->getRowPolicyFilter( + auto row_policy_filter_ptr = modified_context->getRowPolicyFilter( database_name, table_name, RowPolicyFilterType::SELECT_FILTER); if (row_policy_filter_ptr) { - row_policy_data_opt = RowPolicyData(row_policy_filter_ptr, storage, context); + row_policy_data_opt = RowPolicyData(row_policy_filter_ptr, storage, modified_context); row_policy_data_opt->extendNames(real_column_names); } auto modified_query_info - = getModifiedQueryInfo(context, table, nested_storage_snaphsot, real_column_names, column_names_as_aliases, aliases); + = getModifiedQueryInfo(modified_context, table, nested_storage_snaphsot, real_column_names, column_names_as_aliases, aliases); if (!context->getSettingsRef().allow_experimental_analyzer) { @@ -657,10 +659,9 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ row_policy_data_opt, modified_context, current_streams); + res.back().plan.addInterpreterContext(modified_context); } - if (!res.empty()) - res[0].plan.addInterpreterContext(modified_context); return res; } @@ -864,7 +865,7 @@ QueryTreeNodePtr replaceTableExpressionAndRemoveJoin( } -SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_context, +SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & modified_context, const StorageWithLockAndName & storage_with_lock_and_name, const StorageSnapshotPtr & storage_snapshot_, Names required_column_names, @@ -878,6 +879,9 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_ if (modified_query_info.optimized_prewhere_info && !modified_query_info.prewhere_info) modified_query_info.prewhere_info = modified_query_info.optimized_prewhere_info; + if (modified_query_info.planner_context) + modified_query_info.planner_context = std::make_shared(modified_context, modified_query_info.planner_context); + if (modified_query_info.table_expression) { auto replacement_table_expression = std::make_shared(storage, storage_lock, storage_snapshot_); diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 556649f622d..c049d50f3b4 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -192,7 +192,7 @@ private: using Aliases = std::vector; - SelectQueryInfo getModifiedQueryInfo(const ContextPtr & modified_context, + SelectQueryInfo getModifiedQueryInfo(const ContextMutablePtr & modified_context, const StorageWithLockAndName & storage_with_lock_and_name, const StorageSnapshotPtr & storage_snapshot, Names required_column_names, diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 663e7f435b7..c816a6f0dce 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1113,7 +1113,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if (isTTLMergeType(future_part->merge_type)) getContext()->getMergeList().bookMergeWithTTL(); - merging_tagger = std::make_unique(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace(future_part->parts), *this, metadata_snapshot, false); + merging_tagger = std::make_unique(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace(future_part->parts, true), *this, metadata_snapshot, false); return std::make_shared(future_part, std::move(merging_tagger), std::make_shared()); } @@ -1336,7 +1336,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( future_part->name = part->getNewName(new_part_info); future_part->part_format = part->getFormat(); - tagger = std::make_unique(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}), *this, metadata_snapshot, true); + tagger = std::make_unique(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}, false), *this, metadata_snapshot, true); return std::make_shared(future_part, std::move(tagger), commands, txn); } } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 11da394feec..ff055508aa6 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1451,7 +1451,8 @@ void StorageS3::Configuration::connect(const ContextPtr & context) auth_settings.expiration_window_seconds.value_or( context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), - }); + }, + credentials.GetSessionToken()); } void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection) diff --git a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp index ac5dd6c05d0..635686780a0 100644 --- a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp +++ b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp @@ -78,7 +78,8 @@ static String clusterNameFromDDLQuery(ContextPtr context, const DDLTask & task) ParserQuery parser_query(end, settings.allow_settings_after_format_in_insert); ASTPtr query = parseQuery(parser_query, begin, end, description, settings.max_query_size, - settings.max_parser_depth); + settings.max_parser_depth, + settings.max_parser_backtracks); String cluster_name; if (const auto * query_on_cluster = dynamic_cast(query.get())) diff --git a/src/Storages/System/StorageSystemScheduler.cpp b/src/Storages/System/StorageSystemScheduler.cpp index cae42011fc5..651ca815420 100644 --- a/src/Storages/System/StorageSystemScheduler.cpp +++ b/src/Storages/System/StorageSystemScheduler.cpp @@ -30,7 +30,9 @@ ColumnsDescription StorageSystemScheduler::getColumnsDescription() {"is_active", std::make_shared(), "Whether this node is currently active - has resource requests to be dequeued and constraints satisfied."}, {"active_children", std::make_shared(), "The number of children in active state."}, {"dequeued_requests", std::make_shared(), "The total number of resource requests dequeued from this node."}, + {"canceled_requests", std::make_shared(), "The total number of resource requests canceled from this node."}, {"dequeued_cost", std::make_shared(), "The sum of costs (e.g. size in bytes) of all requests dequeued from this node."}, + {"canceled_cost", std::make_shared(), "The sum of costs (e.g. size in bytes) of all requests canceled from this node."}, {"busy_periods", std::make_shared(), "The total number of deactivations of this node."}, {"vruntime", std::make_shared(std::make_shared()), "For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner."}, @@ -93,7 +95,9 @@ void StorageSystemScheduler::fillData(MutableColumns & res_columns, ContextPtr c res_columns[i++]->insert(node->isActive()); res_columns[i++]->insert(node->activeChildren()); res_columns[i++]->insert(node->dequeued_requests.load()); + res_columns[i++]->insert(node->canceled_requests.load()); res_columns[i++]->insert(node->dequeued_cost.load()); + res_columns[i++]->insert(node->canceled_cost.load()); res_columns[i++]->insert(node->busy_periods.load()); Field vruntime; diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index 3482867bbf7..5afdd7a02ac 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -478,7 +478,7 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d ParserCreateQuery parser; ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "Attach query from embedded resource " + metadata_resource_name, - DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH); + DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); auto & ast_create = ast->as(); assert(view_name == ast_create.getTable()); diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index a675afbdc26..3d1ce76dff1 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -425,7 +425,7 @@ TTLTableDescription TTLTableDescription::parse(const String & str, const Columns return result; ParserTTLExpressionList parser; - ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); FunctionNameNormalizer().visit(ast.get()); return getTTLForTableFromAST(ast, columns, context, primary_key, context->getSettingsRef().allow_suspicious_ttl_expressions); diff --git a/src/Storages/examples/CMakeLists.txt b/src/Storages/examples/CMakeLists.txt index 225337d8ec8..cddfc9404d4 100644 --- a/src/Storages/examples/CMakeLists.txt +++ b/src/Storages/examples/CMakeLists.txt @@ -6,11 +6,3 @@ target_link_libraries (merge_selector2 PRIVATE dbms) clickhouse_add_executable (get_current_inserts_in_replicated get_current_inserts_in_replicated.cpp) target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper string_utils) - -clickhouse_add_executable (get_abandonable_lock_in_all_partitions get_abandonable_lock_in_all_partitions.cpp) -target_link_libraries (get_abandonable_lock_in_all_partitions PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper) - -if (TARGET ch_contrib::hdfs) - clickhouse_add_executable (async_read_buffer_from_hdfs async_read_buffer_from_hdfs.cpp) - target_link_libraries (async_read_buffer_from_hdfs PRIVATE dbms ch_contrib::hdfs) -endif () diff --git a/src/Storages/examples/active_parts.py b/src/Storages/examples/active_parts.py deleted file mode 100644 index d82c5ca96bf..00000000000 --- a/src/Storages/examples/active_parts.py +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/python -# coding=UTF-8 - -# Displays a list of active parts - parts that are not overlapped by any other part. -# Usage: `ls /var/lib/clickhouse/data/merge/visits | active_parts.py` - -import sys -import re - -parts = {} -for s in sys.stdin.read().split(): - m = re.match( - "^([0-9]{6})[0-9]{2}_([0-9]{6})[0-9]{2}_([0-9]+)_([0-9]+)_([0-9]+)$", s - ) - if m == None: - continue - m1 = m.group(1) - m2 = m.group(2) - i1 = int(m.group(3)) - i2 = int(m.group(4)) - l = int(m.group(5)) - if m1 != m2: - raise Exception("not in single month: " + s) - if m1 not in parts: - parts[m1] = [] - parts[m1].append((i1, i2, l, s)) - -for m, ps in sorted(parts.items()): - ps.sort(key=lambda i1_i2_l_s: (i1_i2_l_s[0], -i1_i2_l_s[1], -i1_i2_l_s[2])) - (x2, y2, l2, s2) = (-1, -1, -1, -1) - for x1, y1, l1, s1 in ps: - if x1 >= x2 and y1 <= y2 and l1 < l2 and (x1, y1) != (x2, y2): # 2 contains 1 - pass - elif x1 > y2: # 1 is to the right of 2 - if x1 != y2 + 1 and y2 != -1: - print() # to see the missing numbers - (x2, y2, l2, s2) = (x1, y1, l1, s1) - print(s1) - else: - raise Exception("invalid parts intersection: " + s1 + " and " + s2) - print() diff --git a/src/Storages/examples/async_read_buffer_from_hdfs.cpp b/src/Storages/examples/async_read_buffer_from_hdfs.cpp deleted file mode 100644 index 4f6aed8ef65..00000000000 --- a/src/Storages/examples/async_read_buffer_from_hdfs.cpp +++ /dev/null @@ -1,37 +0,0 @@ -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -int main() -{ - using namespace DB; - namespace fs = std::filesystem; - - String config_path = "/path/to/config/file"; - ConfigProcessor config_processor(config_path, false, true); - config_processor.setConfigPath(fs::path(config_path).parent_path()); - auto loaded_config = config_processor.loadConfig(false); - auto * config = loaded_config.configuration.duplicate(); - - String hdfs_namenode_url = "hdfs://namenode:port/"; - String path = "/path/to/hdfs/file"; - ReadSettings settings = {}; - auto in = std::make_unique(hdfs_namenode_url, path, *config, settings); - auto & reader = getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); - AsynchronousReadBufferFromHDFS buf(reader, {}, std::move(in)); - - String output; - WriteBufferFromString out(output); - copyData(buf, out); - std::cout << "output:" << output << std::endl; - return 0; -} diff --git a/src/Storages/examples/get_abandonable_lock_in_all_partitions.cpp b/src/Storages/examples/get_abandonable_lock_in_all_partitions.cpp deleted file mode 100644 index 4607d68f02d..00000000000 --- a/src/Storages/examples/get_abandonable_lock_in_all_partitions.cpp +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include - -#include - -#include - - -using namespace DB; - -/// This test is useful for assessing the performance of acquiring block numbers in all partitions (and there -/// can be ~1000 of them). This is needed when creating a mutation entry for a ReplicatedMergeTree table. -int main(int argc, char ** argv) -try -{ - if (argc != 3) - { - std::cerr << "usage: " << argv[0] << " " << std::endl; - return 3; - } - - ConfigProcessor processor(argv[1], false, true); - auto config = processor.loadConfig().configuration; - String root_path = argv[2]; - - auto zk = zkutil::ZooKeeper::createWithoutKillingPreviousSessions(*config, zkutil::getZooKeeperConfigName(*config), nullptr); - - String temp_path = root_path + "/temp"; - String blocks_path = root_path + "/block_numbers"; - - Stopwatch total_timer; - Stopwatch timer; - - EphemeralLocksInAllPartitions locks(blocks_path, "test_lock-", temp_path, *zk); - - std::cerr << "Locked, elapsed: " << timer.elapsedSeconds() << std::endl; - for (const auto & lock : locks.getLocks()) - std::cout << lock.partition_id << " " << lock.number << std::endl; - timer.restart(); - - locks.unlock(); - std::cerr << "Abandoned, elapsed: " << timer.elapsedSeconds() << std::endl; - - std::cerr << "Total elapsed: " << total_timer.elapsedSeconds() << std::endl; - - return 0; -} -catch (const Exception & e) -{ - std::cerr << e.what() << ", " << e.displayText() << ": " << std::endl - << e.getStackTraceString() << std::endl; - throw; -} -catch (Poco::Exception & e) -{ - std::cerr << "Exception: " << e.displayText() << std::endl; - throw; -} -catch (std::exception & e) -{ - std::cerr << "std::exception: " << e.what() << std::endl; - throw; -} -catch (...) -{ - std::cerr << "Some exception" << std::endl; - throw; -} diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 32266f20923..c545367b63d 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -32,6 +32,7 @@ ColumnsDescription getStructureOfRemoteTableInShard( const ASTPtr & table_func_ptr) { String query; + const Settings & settings = context->getSettingsRef(); if (table_func_ptr) { @@ -110,7 +111,8 @@ ColumnsDescription getStructureOfRemoteTableInShard( column.default_desc.kind = columnDefaultKindFromString(kind_name); String expr_str = (*default_expr)[i].get(); column.default_desc.expression = parseQuery( - expr_parser, expr_str.data(), expr_str.data() + expr_str.size(), "default expression", 0, context->getSettingsRef().max_parser_depth); + expr_parser, expr_str.data(), expr_str.data() + expr_str.size(), "default expression", + 0, settings.max_parser_depth, settings.max_parser_backtracks); } res.add(column); diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 475cf5a4eae..7e2d393c3d1 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -118,7 +118,7 @@ static void checkOld( const std::string & expected) { ParserSelectQuery parser; - ASTPtr ast = parseQuery(parser, query, 1000, 1000); + ASTPtr ast = parseQuery(parser, query, 1000, 1000, 1000000); SelectQueryInfo query_info; SelectQueryOptions select_options; query_info.syntax_analyzer_result @@ -161,7 +161,7 @@ static void checkNewAnalyzer( const std::string & expected) { ParserSelectQuery parser; - ASTPtr ast = parseQuery(parser, query, 1000, 1000); + ASTPtr ast = parseQuery(parser, query, 1000, 1000, 1000000); SelectQueryOptions select_query_options; auto query_tree = buildQueryTree(ast, state.context); diff --git a/src/TableFunctions/Hive/TableFunctionHive.cpp b/src/TableFunctions/Hive/TableFunctionHive.cpp index e840d5fc8be..80494dbe5a8 100644 --- a/src/TableFunctions/Hive/TableFunctionHive.cpp +++ b/src/TableFunctions/Hive/TableFunctionHive.cpp @@ -17,6 +17,7 @@ #include #include + namespace DB { @@ -99,7 +100,8 @@ StoragePtr TableFunctionHive::executeImpl( "(" + partition_by_def + ")", "partition by declaration list", settings.max_query_size, - settings.max_parser_depth); + settings.max_parser_depth, + settings.max_parser_backtracks); StoragePtr storage; storage = std::make_shared( hive_metastore_url, diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp index 400fc81e6d4..8607597fa67 100644 --- a/src/TableFunctions/TableFunctionExplain.cpp +++ b/src/TableFunctions/TableFunctionExplain.cpp @@ -63,7 +63,7 @@ std::vector TableFunctionExplain::skipAnalysisForArguments(const QueryTr return {}; } -void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPtr /*context*/) +void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPtr context) { const auto * function = ast_function->as(); if (!function || !function->arguments) @@ -94,12 +94,12 @@ void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPt const auto & settings_str = settings_arg->value.get(); if (!settings_str.empty()) { - constexpr UInt64 max_size = 4096; - constexpr UInt64 max_depth = 16; + const Settings & settings = context->getSettingsRef(); /// parse_only_internals_ = true - we don't want to parse `SET` keyword ParserSetQuery settings_parser(/* parse_only_internals_ = */ true); - ASTPtr settings_ast = parseQuery(settings_parser, settings_str, max_size, max_depth); + ASTPtr settings_ast = parseQuery(settings_parser, settings_str, + settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); explain_query->setSettings(std::move(settings_ast)); } diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt index e71a047c215..a893840f198 100644 --- a/tests/analyzer_integration_broken_tests.txt +++ b/tests/analyzer_integration_broken_tests.txt @@ -1,3 +1,2 @@ test_concurrent_backups_s3/test.py::test_concurrent_backups test_distributed_type_object/test.py::test_distributed_type_object -test_merge_table_over_distributed/test.py::test_global_in diff --git a/tests/clickhouse-test b/tests/clickhouse-test index ce0feadf050..057502379ed 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -4,9 +4,11 @@ # pylint: disable=global-variable-not-assigned # pylint: disable=too-many-lines # pylint: disable=anomalous-backslash-in-string +# pylint: disable=protected-access import copy import enum +import tempfile import glob # Not requests, to avoid requiring extra dependency. @@ -68,6 +70,144 @@ TEST_FILE_EXTENSIONS = [".sql", ".sql.j2", ".sh", ".py", ".expect"] VERSION_PATTERN = r"^((\d+\.)?(\d+\.)?(\d+\.)?\d+)$" +class SharedEngineReplacer: + ENGINES_NON_REPLICATED_REGEXP = r"[ =]((Collapsing|VersionedCollapsing|Summing|Replacing|Aggregating|)MergeTree\(?\)?)" + ENGINES_MAPPING_REPLICATED = [ + ("ReplicatedMergeTree", "SharedMergeTree"), + ("ReplicatedCollapsingMergeTree", "SharedCollapsingMergeTree"), + ( + "ReplicatedVersionedCollapsingMergeTree", + "SharedVersionedCollapsingMergeTree", + ), + ("ReplicatedSummingMergeTree", "SharedSummingMergeTree"), + ("ReplicatedReplacingMergeTree", "SharedReplacingMergeTree"), + ("ReplicatedAggregatingMergeTree", "SharedAggregatingMergeTree"), + ] + NEW_SYNTAX_REPLICATED_MERGE_TREE_RE = ( + r"Replicated[a-zA-Z]*MergeTree\((\\?'.*\\?')?,?(\\?'.*\\?')?[a-zA-Z, _}{]*\)" + ) + OLD_SYNTAX_OR_ARGUMENTS_RE = r"Tree\(.*[0-9]+.*\)" + + def _check_replicad_new_syntax(self, line): + return re.search(self.NEW_SYNTAX_REPLICATED_MERGE_TREE_RE, line) is not None + + def _check_old_syntax_or_arguments(self, line): + return re.search(self.OLD_SYNTAX_OR_ARGUMENTS_RE, line) is not None + + @staticmethod + def _is_comment_line(line): + return line.startswith("SELECT") or line.startswith("select") + + @staticmethod + def _is_create_query(line): + return ( + line.startswith("CREATE") + or line.startswith("create") + or line.startswith("ENGINE") + or line.startswith("engine") + ) + + def _replace_non_replicated(self, line, escape_quotes, use_random_path): + groups = re.search(self.ENGINES_NON_REPLICATED_REGEXP, line) + if groups is not None and not self._check_old_syntax_or_arguments(line): + non_replicated_engine = groups.groups()[0] + basename_no_ext = os.path.splitext(os.path.basename(self.file_name))[0] + if use_random_path: + shared_path = "/" + os.path.join( + basename_no_ext.replace("_", "/"), + str(os.getpid()), + str(random.randint(1, 1000)), + ) + else: + shared_path = "/" + os.path.join( + basename_no_ext.replace("_", "/"), str(os.getpid()) + ) + + if escape_quotes: + shared_engine = ( + "Shared" + + non_replicated_engine.replace("()", "") + + f"(\\'{shared_path}\\', \\'1\\')" + ) + else: + shared_engine = ( + "Shared" + + non_replicated_engine.replace("()", "") + + f"('{shared_path}', '1')" + ) + return line.replace(non_replicated_engine, shared_engine) + + return line + + def _need_to_replace_something(self): + return ( + self.replace_replicated or self.replace_non_replicated + ) and "shared_merge_tree" not in self.file_name + + def _has_show_create_table(self): + with open(self.file_name, "r", encoding="utf-8") as f: + return re.search("show create table", f.read(), re.IGNORECASE) + + def __init__( + self, file_name, replace_replicated, replace_non_replicated, reference_file + ): + self.file_name = file_name + self.temp_file_path = get_temp_file_path() + self.replace_replicated = replace_replicated + self.replace_non_replicated = replace_non_replicated + + use_random_path = not reference_file and not self._has_show_create_table() + + if not self._need_to_replace_something(): + return + + shutil.copyfile(self.file_name, self.temp_file_path) + shutil.copymode(self.file_name, self.temp_file_path) + + with open(self.file_name, "w", newline="", encoding="utf-8") as modified: + with open(self.temp_file_path, "r", newline="", encoding="utf-8") as source: + for line in source: + if self._is_comment_line(line) or ( + reference_file and not self._is_create_query(line) + ): + modified.write(line) + continue + + if self.replace_replicated: + for ( + engine_from, + engine_to, + ) in SharedEngineReplacer.ENGINES_MAPPING_REPLICATED: + if engine_from in line and ( + self._check_replicad_new_syntax(line) + or engine_from + " " in line + or engine_from + ";" in line + ): + line = line.replace(engine_from, engine_to) + break + + if self.replace_non_replicated: + line = self._replace_non_replicated( + line, reference_file, use_random_path + ) + + modified.write(line) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, exc_tb): + if not self._need_to_replace_something(): + return + shutil.move(self.temp_file_path, self.file_name) + + +def get_temp_file_path(): + return os.path.join( + tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()) + ) + + def stringhash(s: str) -> int: # default hash() function consistent # only during process invocation https://stackoverflow.com/a/42089311 @@ -92,6 +232,16 @@ def trim_for_log(s): return "\n".join(lines) +def is_valid_utf_8(fname): + try: + with open(fname, "rb") as f: + contents = f.read() + contents.decode("utf-8") + return True + except UnicodeDecodeError: + return False + + class TestException(Exception): pass @@ -536,6 +686,8 @@ class FailureReason(enum.Enum): INTERNAL_QUERY_FAIL = "Internal query (CREATE/DROP DATABASE) failed:" # SKIPPED reasons + NOT_SUPPORTED_IN_CLOUD = "not supported in cloud environment" + NOT_SUPPORTED_IN_PRIVATE = "not supported in private build" DISABLED = "disabled" SKIP = "skip" NO_JINJA = "no jinja" @@ -548,6 +700,7 @@ class FailureReason(enum.Enum): S3_STORAGE = "s3-storage" BUILD = "not running for current build" NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas" + SHARED_MERGE_TREE = "no-shared-merge-tree" # UNKNOWN reasons NO_REFERENCE = "no reference file" @@ -606,8 +759,6 @@ class SettingsRandomizer: "read_in_order_two_level_merge_threshold": lambda: random.randint(0, 100), "optimize_aggregation_in_order": lambda: random.randint(0, 1), "aggregation_in_order_max_block_bytes": lambda: random.randint(0, 50000000), - "min_compress_block_size": lambda: random.randint(1, 1048576 * 3), - "max_compress_block_size": lambda: random.randint(1, 1048576 * 3), "use_uncompressed_cache": lambda: random.randint(0, 1), "min_bytes_to_use_direct_io": threshold_generator( 0.2, 0.5, 1, 10 * 1024 * 1024 * 1024 @@ -659,6 +810,11 @@ class SettingsRandomizer: 0.3, 0.5, 1, 10 * 1024 * 1024 * 1024 ), "max_bytes_before_remerge_sort": lambda: random.randint(1, 3000000000), + "min_compress_block_size": lambda: random.randint(1, 1048576 * 3), + "max_compress_block_size": lambda: random.randint(1, 1048576 * 3), + "merge_tree_compact_parts_min_granules_to_multibuffer_read": lambda: random.randint( + 1, 128 + ), "optimize_sorting_by_input_stream_properties": lambda: random.randint(0, 1), "http_response_buffer_size": lambda: random.randint(0, 10 * 1048576), "http_wait_end_of_query": lambda: random.random() > 0.5, @@ -684,6 +840,7 @@ class SettingsRandomizer: get_localzone(), ] ), + "prefer_warmed_unmerged_parts_seconds": lambda: random.randint(0, 10), "use_page_cache_for_disks_without_file_cache": lambda: random.random() < 0.7, "page_cache_inject_eviction": lambda: random.random() < 0.5, } @@ -733,6 +890,17 @@ class MergeTreeSettingsRandomizer: "primary_key_compress_block_size": lambda: random.randint(8000, 100000), "replace_long_file_name_to_hash": lambda: random.randint(0, 1), "max_file_name_length": threshold_generator(0.3, 0.3, 0, 128), + "min_bytes_for_full_part_storage": threshold_generator( + 0.3, 0.3, 0, 512 * 1024 * 1024 + ), + "compact_parts_max_bytes_to_buffer": lambda: random.randint( + 1024, 512 * 1024 * 1024 + ), + "compact_parts_max_granules_to_buffer": threshold_generator(0.15, 0.15, 1, 256), + "compact_parts_merge_max_bytes_to_prefetch_part": lambda: random.randint( + 1, 32 * 1024 * 1024 + ), + "cache_populated_by_fetch": lambda: random.randint(0, 1), } @staticmethod @@ -744,6 +912,10 @@ class MergeTreeSettingsRandomizer: return random_settings +def replace_in_file(filename, what, with_what): + os.system(f"LC_ALL=C sed -i -e 's|{what}|{with_what}|g' {filename}") + + class TestResult: def __init__( self, @@ -972,6 +1144,15 @@ class TestCase: if tags and ("disabled" in tags) and not args.disabled: return FailureReason.DISABLED + elif args.private and self.name in suite.private_skip_list: + return FailureReason.NOT_SUPPORTED_IN_PRIVATE + + elif args.cloud and ("no-replicated-database" in tags): + return FailureReason.REPLICATED_DB + + elif args.cloud and self.name in suite.cloud_skip_list: + return FailureReason.NOT_SUPPORTED_IN_CLOUD + elif ( os.path.exists(os.path.join(suite.suite_path, self.name) + ".disabled") and not args.disabled @@ -1022,6 +1203,13 @@ class TestCase: ): return FailureReason.NON_ATOMIC_DB + elif ( + tags + and ("no-shared-merge-tree" in tags) + and args.replace_replicated_with_shared + ): + return FailureReason.SHARED_MERGE_TREE + elif tags and ("no-s3-storage" in tags) and args.s3_storage: return FailureReason.S3_STORAGE elif ( @@ -1051,7 +1239,8 @@ class TestCase: ): description = "" - debug_log = trim_for_log(debug_log) + if debug_log: + debug_log = "\n".join(debug_log.splitlines()[:100]) if proc: if proc.returncode is None: @@ -1136,6 +1325,7 @@ class TestCase: description += "\nstdout:\n" description += trim_for_log(stdout) description += "\n" + if debug_log: description += "\n" description += debug_log @@ -1148,9 +1338,7 @@ class TestCase: ) if "Exception" in stdout: - description += "\n" - description += trim_for_log(stdout) - description += "\n" + description += "\n{}\n".format("\n".join(stdout.splitlines()[:100])) if debug_log: description += "\n" description += debug_log @@ -1358,7 +1546,13 @@ class TestCase: # because there are also output of per test database creation pattern = "{test} > {stdout} 2> {stderr}" - if self.ext == ".sql": + if self.ext == ".sql" and args.cloud: + # Get at least some logs, because we don't have access to system.text_log and pods... + pattern = ( + "{client} --send_logs_level={logs_level} {secure} --multiquery {options}" + " --send_logs_level=trace < {test} > {stdout} 2>> /test_output/some_logs_from_server.log" + ) + elif self.ext == ".sql" and not args.cloud: pattern = ( "{client} --send_logs_level={logs_level} {secure} --multiquery {options} < " + pattern @@ -1396,17 +1590,15 @@ class TestCase: total_time = (datetime.now() - start_time).total_seconds() # Normalize randomized database names in stdout, stderr files. - os.system(f"LC_ALL=C sed -i -e 's/{database}/default/g' {self.stdout_file}") + replace_in_file(self.stdout_file, database, "default") if args.hide_db_name: - os.system(f"LC_ALL=C sed -i -e 's/{database}/default/g' {self.stderr_file}") + replace_in_file(self.stderr_file, database, "default") if args.replicated_database: - os.system(f"LC_ALL=C sed -i -e 's|/auto_{{shard}}||g' {self.stdout_file}") - os.system(f"LC_ALL=C sed -i -e 's|auto_{{replica}}||g' {self.stdout_file}") + replace_in_file(self.stdout_file, "/auto_{shard}", "") + replace_in_file(self.stdout_file, "auto_{replica}", "") # Normalize hostname in stdout file. - os.system( - f"LC_ALL=C sed -i -e 's/{socket.gethostname()}/localhost/g' {self.stdout_file}" - ) + replace_in_file(self.stdout_file, socket.gethostname(), "localhost") stdout = "" if os.path.exists(self.stdout_file): @@ -1444,18 +1636,51 @@ class TestCase: self.testcase_args = self.configure_testcase_args( args, self.case_file, suite.suite_tmp_path ) + client_options = self.add_random_settings(client_options) - proc, stdout, stderr, debug_log, total_time = self.run_single_test( - server_logs_level, client_options - ) - result = self.process_result_impl( - proc, stdout, stderr, debug_log, total_time - ) - result.check_if_need_retry(args, stdout, stderr, self.runs_count) - # to avoid breaking CSV parser - result.description = result.description.replace("\0", "") + if not is_valid_utf_8(self.case_file) or not is_valid_utf_8( + self.reference_file + ): + proc, stdout, stderr, debug_log, total_time = self.run_single_test( + server_logs_level, client_options + ) + result = self.process_result_impl( + proc, stdout, stderr, debug_log, total_time + ) + result.check_if_need_retry(args, stdout, stderr, self.runs_count) + # to avoid breaking CSV parser + result.description = result.description.replace("\0", "") + else: + with SharedEngineReplacer( + self.case_file, + args.replace_replicated_with_shared, + args.replace_non_replicated_with_shared, + False, + ): + with SharedEngineReplacer( + self.reference_file, + args.replace_replicated_with_shared, + args.replace_non_replicated_with_shared, + True, + ): + ( + proc, + stdout, + stderr, + debug_log, + total_time, + ) = self.run_single_test(server_logs_level, client_options) + + result = self.process_result_impl( + proc, stdout, stderr, debug_log, total_time + ) + result.check_if_need_retry( + args, stdout, stderr, self.runs_count + ) + # to avoid breaking CSV parser + result.description = result.description.replace("\0", "") if result.status == TestStatus.FAIL: result.description = self.add_info_about_settings(result.description) @@ -1688,6 +1913,8 @@ class TestSuite: self.suite_path: str = suite_path self.suite_tmp_path: str = suite_tmp_path self.suite: str = suite + self.cloud_skip_list: List[str] = [] + self.private_skip_list: List[str] = [] if args.run_by_hash_num is not None and args.run_by_hash_total is not None: if args.run_by_hash_num > args.run_by_hash_total: @@ -1987,10 +2214,16 @@ def check_server_started(args): sys.stdout.flush() retry_count = args.server_check_retries + query = "SELECT version(), arrayStringConcat(groupArray(value), ' ') FROM system.build_options WHERE name IN ('GIT_HASH', 'GIT_BRANCH')" while retry_count > 0: try: - clickhouse_execute(args, "SELECT 1", max_http_retries=1) + res = ( + str(clickhouse_execute(args, query).decode()) + .strip() + .replace("\t", " @ ") + ) print(" OK") + print(f"Connected to server {res}") sys.stdout.flush() return True except (ConnectionError, http.client.ImproperConnectionState) as e: @@ -2412,6 +2645,23 @@ def reportLogStats(args): print("\n") +def try_get_skip_list(base_dir, name): + test_names_to_skip = [] + skip_list_path = os.path.join(base_dir, name) + if not os.path.exists(skip_list_path): + return test_names_to_skip + + with open(skip_list_path, "r", encoding="utf-8") as fd: + for line in fd.read().split("\n"): + if line == "" or line[0] == " ": + continue + test_name = line.split()[0].strip() + if test_name != "": + test_names_to_skip.append(test_name) + + return test_names_to_skip + + def main(args): global server_died global stop_time @@ -2430,18 +2680,18 @@ def main(args): args.build_flags = collect_build_flags(args) args.changed_merge_tree_settings = collect_changed_merge_tree_settings(args) - args.suppport_system_processes_is_all_data_sent = check_table_column( - args, "system", "processes", "is_all_data_sent" - ) - if args.s3_storage and ( - BuildFlags.THREAD in args.build_flags or BuildFlags.DEBUG in args.build_flags - ): + if args.s3_storage and (BuildFlags.RELEASE not in args.build_flags): args.no_random_settings = True if args.skip: args.skip = set(args.skip) + if args.replace_replicated_with_shared: + if not args.skip: + args.skip = set([]) + args.skip = set(args.skip) + base_dir = os.path.abspath(args.queries) # Keep same default values as in queries/shell_config.sh @@ -2516,6 +2766,8 @@ def main(args): ) total_tests_run = 0 + cloud_skip_list = try_get_skip_list(base_dir, "../queries-no-cloud-tests.txt") + private_skip_list = try_get_skip_list(base_dir, "../queries-no-private-tests.txt") for suite in sorted(os.listdir(base_dir), key=suite_key_func): if server_died.is_set(): @@ -2525,6 +2777,8 @@ def main(args): if test_suite is None: continue + test_suite.cloud_skip_list = cloud_skip_list + test_suite.private_skip_list = private_skip_list total_tests_run += do_run_tests(args.jobs, test_suite, args.parallel) if server_died.is_set(): @@ -2644,7 +2898,14 @@ def find_clickhouse_command(binary, command): def get_additional_client_options(args): if args.client_option: - return " ".join("--" + option for option in args.client_option) + client_options = " ".join("--" + option for option in args.client_option) + if "CLICKHOUSE_CLIENT_OPT" in os.environ: + return os.environ["CLICKHOUSE_CLIENT_OPT"] + client_options + else: + return client_options + else: + if "CLICKHOUSE_CLIENT_OPT" in os.environ: + return os.environ["CLICKHOUSE_CLIENT_OPT"] return "" @@ -2839,6 +3100,43 @@ def parse_args(): help="Display $ characters after line with trailing whitespaces in diff output", ) + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument( + "--cloud", + action="store_true", + default=None, + dest="cloud", + help="Run only tests that are supported in ClickHouse Cloud environment", + ) + + group.add_argument( + "--no-cloud", + action="store_false", + default=None, + dest="cloud", + help="Run all the tests, including the ones not supported in ClickHouse Cloud environment", + ) + parser.set_defaults(cloud=False) + + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument( + "--private", + action="store_true", + default=None, + dest="private", + help="Run only tests that are supported in the private build", + ) + + group.add_argument( + "--no-private", + action="store_false", + default=None, + dest="private", + help="Run all the tests, including the ones not supported in the private build", + ) + # Only used to skip tests via "../queries-no-private-tests.txt", so it's fine to keep it enabled by default + parser.set_defaults(private=True) + group = parser.add_mutually_exclusive_group(required=False) group.add_argument( "--zookeeper", @@ -2920,6 +3218,18 @@ def parse_args(): default=False, help="Do not include tests that are not supported with parallel replicas feature", ) + parser.add_argument( + "--replace-replicated-with-shared", + action="store_true", + default=os.environ.get("USE_META_IN_KEEPER_FOR_MERGE_TREE", False), + help="Replace ReplicatedMergeTree engine with SharedMergeTree", + ) + parser.add_argument( + "--replace-non-replicated-with-shared", + action="store_true", + default=False, + help="Replace ordinary MergeTree engine with SharedMergeTree", + ) return parser.parse_args() @@ -3062,6 +3372,7 @@ if __name__ == "__main__": client_options_query_str = get_additional_client_options_url(args) args.client_options_query_str = client_options_query_str + "&" + args.client_options_query_str += os.environ["CLICKHOUSE_URL_PARAMS"] os.environ["CLICKHOUSE_URL_PARAMS"] += client_options_query_str else: args.client_options_query_str = "" @@ -3072,4 +3383,7 @@ if __name__ == "__main__": if args.db_engine and args.db_engine == "Ordinary": MESSAGES_TO_RETRY.append(" locking attempt on ") + if args.replace_replicated_with_shared: + args.s3_storage = True + main(args) diff --git a/tests/integration/test_config_substitutions/configs/config_zk_include_test.xml b/tests/integration/test_config_substitutions/configs/config_zk_include_test.xml new file mode 100644 index 00000000000..743770c3024 --- /dev/null +++ b/tests/integration/test_config_substitutions/configs/config_zk_include_test.xml @@ -0,0 +1,12 @@ + + + 44 + + + 99 + 1 + 1111 + + + + diff --git a/tests/integration/test_config_substitutions/test.py b/tests/integration/test_config_substitutions/test.py index 564985b2f50..ac75771cb9c 100644 --- a/tests/integration/test_config_substitutions/test.py +++ b/tests/integration/test_config_substitutions/test.py @@ -13,7 +13,12 @@ node2 = cluster.add_instance( env_variables={"MAX_QUERY_SIZE": "55555"}, ) node3 = cluster.add_instance( - "node3", user_configs=["configs/config_zk.xml"], with_zookeeper=True + "node3", + user_configs=[ + "configs/config_zk.xml", + ], + main_configs=["configs/config_zk_include_test.xml"], + with_zookeeper=True, ) node4 = cluster.add_instance( "node4", @@ -62,6 +67,16 @@ def start_cluster(): value=b"default", makepath=True, ) + zk.create( + path="/min_bytes_for_wide_part", + value=b"33", + makepath=True, + ) + zk.create( + path="/merge_max_block_size", + value=b"8888", + makepath=True, + ) cluster.add_zookeeper_startup_command(create_zk_roots) @@ -237,3 +252,63 @@ def test_allow_databases(start_cluster): ).strip() == "" ) + + +def test_config_multiple_zk_substitutions(start_cluster): + assert ( + node3.query( + "SELECT value FROM system.merge_tree_settings WHERE name='min_bytes_for_wide_part'" + ) + == "33\n" + ) + assert ( + node3.query( + "SELECT value FROM system.merge_tree_settings WHERE name='min_rows_for_wide_part'" + ) + == "1111\n" + ) + assert ( + node3.query( + "SELECT value FROM system.merge_tree_settings WHERE name='merge_max_block_size'" + ) + == "8888\n" + ) + assert ( + node3.query( + "SELECT value FROM system.server_settings WHERE name='background_pool_size'" + ) + == "44\n" + ) + + zk = cluster.get_kazoo_client("zoo1") + zk.create( + path="/background_pool_size", + value=b"72", + makepath=True, + ) + + node3.replace_config( + "/etc/clickhouse-server/config.d/config_zk_include_test.xml", + """ + + + 44 + + + 1 + 1111 + + + + +""", + ) + + node3.query("SYSTEM RELOAD CONFIG") + + assert ( + node3.query( + "SELECT value FROM system.server_settings WHERE name='background_pool_size'" + ) + == "72\n" + ) diff --git a/tests/integration/test_polymorphic_parts/test.py b/tests/integration/test_polymorphic_parts/test.py index 01bc4804c9f..b91a72c5534 100644 --- a/tests/integration/test_polymorphic_parts/test.py +++ b/tests/integration/test_polymorphic_parts/test.py @@ -332,7 +332,13 @@ def test_different_part_types_on_replicas(start_cluster, table, part_type): for _ in range(3): insert_random_data(table, leader, 100) - leader.query("OPTIMIZE TABLE {} FINAL".format(table)) + exec_query_with_retry( + leader, + "OPTIMIZE TABLE {} FINAL".format(table), + settings={"optimize_throw_if_noop": 1}, + silent=True, + ) + follower.query("SYSTEM SYNC REPLICA {}".format(table), timeout=20) expected = "{}\t1\n".format(part_type) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index dbbe670e8ca..6d5b84a8143 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1414,10 +1414,10 @@ def test_signatures(started_cluster): ) assert int(result) == 1 - result = instance.query( + error = instance.query_and_get_error( f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'minio', 'minio123', '{session_token}')" ) - assert int(result) == 1 + assert "S3_ERROR" in error result = instance.query( f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'Arrow', 'x UInt64', 'auto')" @@ -1429,20 +1429,20 @@ def test_signatures(started_cluster): ) assert int(result) == 1 - result = instance.query( + error = instance.query_and_get_error( f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'minio', 'minio123', '{session_token}', 'Arrow')" ) - assert int(result) == 1 + assert "S3_ERROR" in error - lt = instance.query( + error = instance.query_and_get_error( f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'minio', 'minio123', '{session_token}', 'Arrow', 'x UInt64')" ) - assert int(result) == 1 + assert "S3_ERROR" in error - lt = instance.query( + error = instance.query_and_get_error( f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'minio', 'minio123', '{session_token}', 'Arrow', 'x UInt64', 'auto')" ) - assert int(result) == 1 + assert "S3_ERROR" in error def test_select_columns(started_cluster): diff --git a/tests/integration/test_undrop_query/test.py b/tests/integration/test_undrop_query/test.py index 590a5690e55..d57aa8c2dc7 100644 --- a/tests/integration/test_undrop_query/test.py +++ b/tests/integration/test_undrop_query/test.py @@ -29,30 +29,39 @@ def test_undrop_drop_and_undrop_loop(started_cluster): logging.info( "random_sec: " + random_sec.__str__() + ", table_uuid: " + table_uuid ) + node.query( - "create table test_undrop_loop" + "CREATE TABLE test_undrop_loop" + count.__str__() + " UUID '" + table_uuid - + "' (id Int32) Engine=MergeTree() order by id;" + + "' (id Int32) ENGINE = MergeTree() ORDER BY id;" ) - node.query("drop table test_undrop_loop" + count.__str__() + ";") + + node.query("DROP TABLE test_undrop_loop" + count.__str__() + ";") + time.sleep(random_sec) + if random_sec >= 5: error = node.query_and_get_error( - "undrop table test_undrop_loop" + "UNDROP TABLE test_undrop_loop" + count.__str__() - + " uuid '" + + " UUID '" + table_uuid + "';" ) assert "UNKNOWN_TABLE" in error - else: + elif random_sec <= 3: + # (*) node.query( - "undrop table test_undrop_loop" + "UNDROP TABLE test_undrop_loop" + count.__str__() - + " uuid '" + + " UUID '" + table_uuid + "';" ) count = count + 1 + else: + pass + # ignore random_sec = 4 to account for communication delay with the database. + # if we don't do that, then the second case (*) may find the table already dropped and receive an unexpected exception from the database (Bug #55167) diff --git a/tests/queries/0_stateless/00765_locate.reference b/tests/queries/0_stateless/00765_locate.reference new file mode 100644 index 00000000000..3b066c0d68b --- /dev/null +++ b/tests/queries/0_stateless/00765_locate.reference @@ -0,0 +1,7 @@ +-- negative tests +-- test mysql compatibility setting +0 +0 +3 +-- the function name needs to be case-insensitive for historical reasons +0 diff --git a/tests/queries/0_stateless/00765_locate.sql b/tests/queries/0_stateless/00765_locate.sql new file mode 100644 index 00000000000..3467ebd4249 --- /dev/null +++ b/tests/queries/0_stateless/00765_locate.sql @@ -0,0 +1,15 @@ +SET send_logs_level = 'fatal'; + +SELECT '-- negative tests'; +SELECT locate(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT locate(1, 'abc'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT locate('abc', 1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT locate('abc', 'abc', 'abc'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT '-- test mysql compatibility setting'; +SELECT locate('abcabc', 'ca'); +SELECT locate('abcabc', 'ca') SETTINGS function_locate_has_mysql_compatible_argument_order = true; +SELECT locate('abcabc', 'ca') SETTINGS function_locate_has_mysql_compatible_argument_order = false; + +SELECT '-- the function name needs to be case-insensitive for historical reasons'; +SELECT LoCaTe('abcabc', 'ca'); diff --git a/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference b/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference index 285b9a62d20..6d31168c2b7 100644 --- a/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference +++ b/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference @@ -4,7 +4,6 @@ foo FOO baz zzz -2 fo oo o diff --git a/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql b/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql index da0eb9bea6d..995aaef9ea5 100644 --- a/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql +++ b/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql @@ -6,7 +6,6 @@ select LOWER('Foo'); select UPPER('Foo'); select REPLACE('bar', 'r', 'z'); select REGEXP_REPLACE('bar', '.', 'z'); -select Locate('foo', 'o'); select SUBSTRING('foo', 1, 2); select Substr('foo', 2); select mid('foo', 3); diff --git a/tests/queries/0_stateless/01599_multiline_input_and_singleline_comments.sh b/tests/queries/0_stateless/01599_multiline_input_and_singleline_comments.sh index f1acd39136f..07c2e345009 100755 --- a/tests/queries/0_stateless/01599_multiline_input_and_singleline_comments.sh +++ b/tests/queries/0_stateless/01599_multiline_input_and_singleline_comments.sh @@ -2,7 +2,6 @@ log_user 0 -# In some places `-timeout 1` is used to avoid expect to always wait for the whole timeout set timeout 60 match_max 100000 @@ -14,15 +13,23 @@ expect ":) " # Make a query send -- "SELECT 1\r" -expect -timeout 1 ":-] " send -- "-- xxx\r" -expect -timeout 1 ":-] " send -- ", 2\r" -expect -timeout 1 ":-] " -send -- ";\r" +send -- ";" + +# For some reason this sleep is required for this test to work properly +sleep 1 +send -- "\r" + +expect { + "│ 1 │ 2 │" { } + timeout { exit 1 } +} -expect "│ 1 │ 2 │" expect ":) " -send -- "\4" -expect eof +send -- "" +expect { + eof { exit 0 } + timeout { exit 1 } +} diff --git a/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference index de5a62159ef..76dd04b0e42 100644 --- a/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference +++ b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference @@ -26,7 +26,6 @@ SELECT least(1), length('1'), log(1), - position('1', '1'), log(1), log10(1), log2(1), diff --git a/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql index dda2e045e76..d8054b9757c 100644 --- a/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql +++ b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql @@ -1 +1 @@ -EXPLAIN SYNTAX SELECT CAST(1 AS INT), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH('1'), CHARACTER_LENGTH('1'), COALESCE(1), CONCAT('1', '1'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), SCHEMA(), DATEDIFF('DAY', toDate('2020-10-24'), toDate('2019-10-24')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE('A'), LEAST(1), LENGTH('1'), LN(1), LOCATE('1', '1'), LOG(1), LOG10(1), LOG2(1), LOWER('A'), MAX(1), MID('123', 1, 1), MIN(1), MOD(1, 1), NOT(1), NOW(), NOW64(), NULLIF(1, 1), PI(), POSITION('123', '2'), POW(1, 1), POWER(1, 1), RAND(), REPLACE('1', '1', '2'), REVERSE('123'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR('123', 2), SUBSTRING('123', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE('A'), UPPER('A'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate('2020-10-24')), YEARWEEK(toDate('2020-10-24')) format TSVRaw; +EXPLAIN SYNTAX SELECT CAST(1 AS INT), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH('1'), CHARACTER_LENGTH('1'), COALESCE(1), CONCAT('1', '1'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), SCHEMA(), DATEDIFF('DAY', toDate('2020-10-24'), toDate('2019-10-24')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE('A'), LEAST(1), LENGTH('1'), LN(1), LOG(1), LOG10(1), LOG2(1), LOWER('A'), MAX(1), MID('123', 1, 1), MIN(1), MOD(1, 1), NOT(1), NOW(), NOW64(), NULLIF(1, 1), PI(), POSITION('123', '2'), POW(1, 1), POWER(1, 1), RAND(), REPLACE('1', '1', '2'), REVERSE('123'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR('123', 2), SUBSTRING('123', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE('A'), UPPER('A'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate('2020-10-24')), YEARWEEK(toDate('2020-10-24')) format TSVRaw; diff --git a/tests/queries/0_stateless/02151_invalid_setting_with_hints_in_query.sh b/tests/queries/0_stateless/02151_invalid_setting_with_hints_in_query.sh index 2faaa3bb1b6..367d0163497 100755 --- a/tests/queries/0_stateless/02151_invalid_setting_with_hints_in_query.sh +++ b/tests/queries/0_stateless/02151_invalid_setting_with_hints_in_query.sh @@ -4,4 +4,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_LOCAL --query="SET input_format_with_names_use_headers = 1" 2>&1 | grep -qF "Code: 115. DB::Exception: Unknown setting input_format_with_names_use_headers: Maybe you meant ['input_format_with_names_use_header','input_format_with_types_use_header']. (UNKNOWN_SETTING)" && echo 'OK' || echo 'FAIL' ||: +$CLICKHOUSE_LOCAL --query="SET input_format_with_names_use_headers = 1" 2>&1 | grep -qF "Code: 115. DB::Exception: Unknown setting 'input_format_with_names_use_headers': Maybe you meant ['input_format_with_names_use_header','input_format_with_types_use_header']. (UNKNOWN_SETTING)" && echo 'OK' || echo 'FAIL' ||: diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index e15002da69c..8b85ac48c16 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -194,10 +194,7 @@ blockSerializedSize blockSize buildId byteSize -caseWithExpr caseWithExpression -caseWithoutExpr -caseWithoutExpression catboostEvaluate cbrt ceil @@ -312,7 +309,6 @@ fromUnixTimestamp64Micro fromUnixTimestamp64Milli fromUnixTimestamp64Nano fromUnixTimestampInJodaSyntax -fullHostName fuzzBits gccMurmurHash gcd diff --git a/tests/queries/0_stateless/02681_undrop_query.sql b/tests/queries/0_stateless/02681_undrop_query.sql index 66447fc6c44..d038a383690 100644 --- a/tests/queries/0_stateless/02681_undrop_query.sql +++ b/tests/queries/0_stateless/02681_undrop_query.sql @@ -85,5 +85,5 @@ drop table 02681_undrop_multiple; select table from system.dropped_tables where table = '02681_undrop_multiple' limit 1; undrop table 02681_undrop_multiple; select * from 02681_undrop_multiple order by id; -undrop table 02681_undrop_multiple; -- { serverError 57 } +undrop table 02681_undrop_multiple; -- { serverError TABLE_ALREADY_EXISTS } drop table 02681_undrop_multiple sync; diff --git a/tests/queries/0_stateless/02998_system_dns_cache_table.reference b/tests/queries/0_stateless/02998_system_dns_cache_table.reference index ed6cb000142..600d0bc0b39 100644 --- a/tests/queries/0_stateless/02998_system_dns_cache_table.reference +++ b/tests/queries/0_stateless/02998_system_dns_cache_table.reference @@ -1 +1,2 @@ -localhost 127.0.0.1 IPv4 1 +hostname ip_address ip_family cached_at +String String Enum8(\'IPv4\' = 0, \'IPv6\' = 1, \'UNIX_LOCAL\' = 2) DateTime diff --git a/tests/queries/0_stateless/02998_system_dns_cache_table.sh b/tests/queries/0_stateless/02998_system_dns_cache_table.sh deleted file mode 100755 index b74fc00ab3b..00000000000 --- a/tests/queries/0_stateless/02998_system_dns_cache_table.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# Retries are necessary because the DNS cache may be flushed before second statement is executed -i=0 -retries=5 -while [[ $i -lt $retries ]]; do - ${CLICKHOUSE_CURL} -sS --fail --data "SELECT * FROM url('http://localhost:8123/ping', CSV, 'auto', headers())" "${CLICKHOUSE_URL}" | grep -oP -q 'Ok.' || continue - - RECORDS=$(${CLICKHOUSE_CURL} -sS --fail --data "SELECT hostname, ip_address, ip_family, (isNotNull(cached_at) AND cached_at > '1970-01-01 00:00:00') FROM system.dns_cache WHERE hostname = 'localhost' and ip_family = 'IPv4';" "${CLICKHOUSE_URL}") - - if [[ -n "${RECORDS}" ]]; then - echo "${RECORDS}" - exit 0 - fi - - ((++i)) - sleep 0.2 -done - -echo "All tries to fetch entries for localhost failed, no rows returned. -Probably the DNS cache is disabled or the ClickHouse instance not responds to ping." -exit 1 diff --git a/tests/queries/0_stateless/02998_system_dns_cache_table.sql b/tests/queries/0_stateless/02998_system_dns_cache_table.sql new file mode 100644 index 00000000000..0ceb3d8a95a --- /dev/null +++ b/tests/queries/0_stateless/02998_system_dns_cache_table.sql @@ -0,0 +1,3 @@ +SELECT hostname, ip_address, ip_family, cached_at FROM system.dns_cache +LIMIT 0 +FORMAT TSVWithNamesAndTypes; diff --git a/tests/queries/0_stateless/03001_consider_lwd_when_merge.reference b/tests/queries/0_stateless/03001_consider_lwd_when_merge.reference new file mode 100644 index 00000000000..19920de3d3c --- /dev/null +++ b/tests/queries/0_stateless/03001_consider_lwd_when_merge.reference @@ -0,0 +1,3 @@ +2 +2 +1 diff --git a/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql b/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql new file mode 100644 index 00000000000..988d7058f21 --- /dev/null +++ b/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS lwd_merge; + +CREATE TABLE lwd_merge (id UInt64 CODEC(NONE)) + ENGINE = MergeTree ORDER BY id +SETTINGS max_bytes_to_merge_at_max_space_in_pool = 80000, exclude_deleted_rows_for_part_size_in_merge = 0; + +INSERT INTO lwd_merge SELECT number FROM numbers(10000); +INSERT INTO lwd_merge SELECT number FROM numbers(10000, 10000); + +OPTIMIZE TABLE lwd_merge; +SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1; + +DELETE FROM lwd_merge WHERE id % 10 > 0; + +OPTIMIZE TABLE lwd_merge; +SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1; + +ALTER TABLE lwd_merge MODIFY SETTING exclude_deleted_rows_for_part_size_in_merge = 1; + +-- delete again because deleted rows will be counted in mutation +DELETE FROM lwd_merge WHERE id % 100 == 0; + +OPTIMIZE TABLE lwd_merge; +SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1; + +DROP TABLE IF EXISTS lwd_merge; diff --git a/tests/queries/0_stateless/03002_modify_query_cte.reference b/tests/queries/0_stateless/03002_modify_query_cte.reference new file mode 100644 index 00000000000..50e4a7c6a07 --- /dev/null +++ b/tests/queries/0_stateless/03002_modify_query_cte.reference @@ -0,0 +1,2 @@ +CREATE MATERIALIZED VIEW default.mv_03002 TO default.table_03002\n(\n `ts` DateTime\n)\nAS SELECT ts\nFROM default.table_03002 +CREATE MATERIALIZED VIEW default.mv_03002 TO default.table_03002\n(\n `ts` DateTime\n)\nAS WITH MY_CTE AS\n (\n SELECT ts\n FROM default.table_03002\n )\nSELECT *\nFROM MY_CTE diff --git a/tests/queries/0_stateless/03002_modify_query_cte.sql b/tests/queries/0_stateless/03002_modify_query_cte.sql new file mode 100644 index 00000000000..3a36ce7e7fd --- /dev/null +++ b/tests/queries/0_stateless/03002_modify_query_cte.sql @@ -0,0 +1,15 @@ + +CREATE TABLE table_03002 (ts DateTime, event_type String) ENGINE = MergeTree ORDER BY (event_type, ts); + +CREATE MATERIALIZED VIEW mv_03002 TO table_03002 AS SELECT ts FROM table_03002; + +SHOW CREATE TABLE mv_03002; + +ALTER TABLE mv_03002 MODIFY QUERY +WITH MY_CTE AS (SELECT ts FROM table_03002) +SELECT * FROM MY_CTE; + +SHOW CREATE TABLE mv_03002; + +DROP TABLE mv_03002; +DROP TABLE table_03002; diff --git a/tests/queries/0_stateless/03008_optimize_equal_ranges.reference b/tests/queries/0_stateless/03008_optimize_equal_ranges.reference new file mode 100644 index 00000000000..fc7a4f3c118 --- /dev/null +++ b/tests/queries/0_stateless/03008_optimize_equal_ranges.reference @@ -0,0 +1,16 @@ +0 30000 +1 30000 +2 30000 +0 30000 +1 30000 +2 30000 +0 449985000 +1 449985000 +2 449985000 +0 449985000 +1 449985000 +2 449985000 +sum 1 1 +sum 16 1 +uniqExact 1 1 +uniqExact 16 0 diff --git a/tests/queries/0_stateless/03008_optimize_equal_ranges.sql b/tests/queries/0_stateless/03008_optimize_equal_ranges.sql new file mode 100644 index 00000000000..6d769c7382a --- /dev/null +++ b/tests/queries/0_stateless/03008_optimize_equal_ranges.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS t_optimize_equal_ranges; + +CREATE TABLE t_optimize_equal_ranges (a UInt64, b String, c UInt64) ENGINE = MergeTree ORDER BY a; + +SET max_block_size = 1024; +SET max_bytes_before_external_group_by = 0; +SET optimize_aggregation_in_order = 0; +SET optimize_use_projections = 0; + +INSERT INTO t_optimize_equal_ranges SELECT 0, toString(number), number FROM numbers(30000); +INSERT INTO t_optimize_equal_ranges SELECT 1, toString(number), number FROM numbers(30000); +INSERT INTO t_optimize_equal_ranges SELECT 2, toString(number), number FROM numbers(30000); + +SELECT a, uniqExact(b) FROM t_optimize_equal_ranges GROUP BY a ORDER BY a SETTINGS max_threads = 16; +SELECT a, uniqExact(b) FROM t_optimize_equal_ranges GROUP BY a ORDER BY a SETTINGS max_threads = 1; +SELECT a, sum(c) FROM t_optimize_equal_ranges GROUP BY a ORDER BY a SETTINGS max_threads = 16; +SELECT a, sum(c) FROM t_optimize_equal_ranges GROUP BY a ORDER BY a SETTINGS max_threads = 1; + +SYSTEM FLUSH LOGS; + +SELECT + used_aggregate_functions[1] AS func, + Settings['max_threads'] AS threads, + ProfileEvents['AggregationOptimizedEqualRangesOfKeys'] > 0 +FROM system.query_log +WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND query LIKE '%SELECT%FROM%t_optimize_equal_ranges%' +ORDER BY func, threads; + +DROP TABLE t_optimize_equal_ranges; diff --git a/tests/queries/0_stateless/03008_uniq_exact_equal_ranges.reference b/tests/queries/0_stateless/03008_uniq_exact_equal_ranges.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03008_uniq_exact_equal_ranges.sql b/tests/queries/0_stateless/03008_uniq_exact_equal_ranges.sql new file mode 100644 index 00000000000..2e708f28cac --- /dev/null +++ b/tests/queries/0_stateless/03008_uniq_exact_equal_ranges.sql @@ -0,0 +1,36 @@ +DROP TABLE IF EXISTS t_uniq_exact; + +CREATE TABLE t_uniq_exact (a UInt64, b String, c UInt64) ENGINE = MergeTree ORDER BY a; + +SET group_by_two_level_threshold_bytes = 1; +SET group_by_two_level_threshold = 1; +SET max_threads = 4; +SET max_bytes_before_external_group_by = 0; +SET optimize_aggregation_in_order = 0; + +INSERT INTO t_uniq_exact SELECT 0, randomPrintableASCII(5), rand() FROM numbers(300000); +INSERT INTO t_uniq_exact SELECT 1, randomPrintableASCII(5), rand() FROM numbers(300000); +INSERT INTO t_uniq_exact SELECT 2, randomPrintableASCII(5), rand() FROM numbers(300000); +INSERT INTO t_uniq_exact SELECT 3, randomPrintableASCII(5), rand() FROM numbers(300000); +INSERT INTO t_uniq_exact SELECT 4, randomPrintableASCII(5), rand() FROM numbers(300000); +INSERT INTO t_uniq_exact SELECT 5, randomPrintableASCII(5), rand() FROM numbers(300000); +INSERT INTO t_uniq_exact SELECT 6, randomPrintableASCII(5), rand() FROM numbers(300000); +INSERT INTO t_uniq_exact SELECT 7, randomPrintableASCII(5), rand() FROM numbers(300000); +INSERT INTO t_uniq_exact SELECT 8, randomPrintableASCII(5), rand() FROM numbers(300000); +INSERT INTO t_uniq_exact SELECT 9, randomPrintableASCII(5), rand() FROM numbers(300000); + +OPTIMIZE TABLE t_uniq_exact FINAL; + +SELECT a, uniqExact(b) FROM t_uniq_exact GROUP BY a ORDER BY a +SETTINGS min_hit_rate_to_use_consecutive_keys_optimization = 1.0 +EXCEPT +SELECT a, uniqExact(b) FROM t_uniq_exact GROUP BY a ORDER BY a +SETTINGS min_hit_rate_to_use_consecutive_keys_optimization = 0.5; + +SELECT a, sum(c) FROM t_uniq_exact GROUP BY a ORDER BY a +SETTINGS min_hit_rate_to_use_consecutive_keys_optimization = 1.0 +EXCEPT +SELECT a, sum(c) FROM t_uniq_exact GROUP BY a ORDER BY a +SETTINGS min_hit_rate_to_use_consecutive_keys_optimization = 0.5; + +DROP TABLE t_uniq_exact; diff --git a/tests/queries/0_stateless/03009_storage_memory_circ_buffer_usage.reference b/tests/queries/0_stateless/03009_storage_memory_circ_buffer_usage.reference new file mode 100644 index 00000000000..20dda4fa15a --- /dev/null +++ b/tests/queries/0_stateless/03009_storage_memory_circ_buffer_usage.reference @@ -0,0 +1,16 @@ +TESTING BYTES +8192 +9216 +9216 +65536 +TESTING ROWS +50 +1000 +1020 +1100 +TESTING NO CIRCULAR-BUFFER +8192 +9216 +17408 +82944 +TESTING INVALID SETTINGS diff --git a/tests/queries/0_stateless/03009_storage_memory_circ_buffer_usage.sql b/tests/queries/0_stateless/03009_storage_memory_circ_buffer_usage.sql new file mode 100644 index 00000000000..fa4ba96277d --- /dev/null +++ b/tests/queries/0_stateless/03009_storage_memory_circ_buffer_usage.sql @@ -0,0 +1,63 @@ +SET max_block_size = 65409; -- Default value + +DROP TABLE IF EXISTS memory; +CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_bytes_to_keep = 4096, max_bytes_to_keep = 16384; + +SELECT 'TESTING BYTES'; +/* 1. testing oldest block doesn't get deleted because of min-threshold */ +INSERT INTO memory SELECT * FROM numbers(0, 1600); +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +/* 2. adding block that doesn't get deleted */ +INSERT INTO memory SELECT * FROM numbers(1000, 100); +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +/* 3. testing oldest block gets deleted - 9216 bytes - 1100 */ +INSERT INTO memory SELECT * FROM numbers(9000, 1000); +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +/* 4.check large block over-writes all bytes / rows */ +INSERT INTO memory SELECT * FROM numbers(9000, 10000); +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +DROP TABLE IF EXISTS memory; +CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 100, max_rows_to_keep = 1000; + +SELECT 'TESTING ROWS'; +/* 1. add normal number of rows */ +INSERT INTO memory SELECT * FROM numbers(0, 50); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +/* 2. table should have 1000 */ +INSERT INTO memory SELECT * FROM numbers(50, 950); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +/* 3. table should have 1020 - removed first 50 */ +INSERT INTO memory SELECT * FROM numbers(2000, 70); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +/* 4. check large block over-writes all rows */ +INSERT INTO memory SELECT * FROM numbers(3000, 1100); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +SELECT 'TESTING NO CIRCULAR-BUFFER'; +DROP TABLE IF EXISTS memory; +CREATE TABLE memory (i UInt32) ENGINE = Memory; + +INSERT INTO memory SELECT * FROM numbers(0, 1600); +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +INSERT INTO memory SELECT * FROM numbers(1000, 100); +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +INSERT INTO memory SELECT * FROM numbers(9000, 1000); +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +INSERT INTO memory SELECT * FROM numbers(9000, 10000); +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +SELECT 'TESTING INVALID SETTINGS'; +CREATE TABLE faulty_memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 100; -- { serverError 452 } +CREATE TABLE faulty_memory (i UInt32) ENGINE = Memory SETTINGS min_bytes_to_keep = 100; -- { serverError 452 } + +DROP TABLE memory; \ No newline at end of file diff --git a/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.reference b/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.reference new file mode 100644 index 00000000000..89e5f639c66 --- /dev/null +++ b/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.reference @@ -0,0 +1,68 @@ +(5,NULL) +(5,NULL) +(5,NULL) +QUERY id: 0 + PROJECTION COLUMNS + (sumIf(toNullable(1), equals(modulo(number, 2), 0)), NULL) Tuple(Nullable(UInt64), Nullable(Nothing)) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: tuple, function_type: ordinary, result_type: Tuple(Nullable(UInt64), Nullable(Nothing)) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sumIf, function_type: aggregate, result_type: Nullable(UInt64) + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: Nullable(UInt8) + EXPRESSION + FUNCTION id: 7, function_name: toNullable, function_type: ordinary, result_type: Nullable(UInt8) + ARGUMENTS + LIST id: 8, nodes: 1 + CONSTANT id: 9, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 10, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: number, result_type: UInt64, source_id: 15 + CONSTANT id: 16, constant_value: UInt64_2, constant_value_type: UInt8 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: NULL, constant_value_type: Nullable(Nothing) + JOIN TREE + TABLE_FUNCTION id: 15, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 19, nodes: 1 + CONSTANT id: 20, constant_value: UInt64_10, constant_value_type: UInt8 +(5,NULL) +QUERY id: 0 + PROJECTION COLUMNS + (sum(if(equals(modulo(number, 2), 0), toNullable(1), 0)), NULL) Tuple(Nullable(UInt64), Nullable(Nothing)) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: tuple, function_type: ordinary, result_type: Tuple(Nullable(UInt64), Nullable(Nothing)) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sumOrNullIf, function_type: aggregate, result_type: Nullable(UInt64) + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: Nullable(UInt8) + EXPRESSION + FUNCTION id: 7, function_name: toNullable, function_type: ordinary, result_type: Nullable(UInt8) + ARGUMENTS + LIST id: 8, nodes: 1 + CONSTANT id: 9, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 10, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: number, result_type: UInt64, source_id: 15 + CONSTANT id: 16, constant_value: UInt64_2, constant_value_type: UInt8 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: NULL, constant_value_type: Nullable(Nothing) + JOIN TREE + TABLE_FUNCTION id: 15, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 19, nodes: 1 + CONSTANT id: 20, constant_value: UInt64_10, constant_value_type: UInt8 diff --git a/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.sql b/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.sql new file mode 100644 index 00000000000..b283a69a020 --- /dev/null +++ b/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.sql @@ -0,0 +1,11 @@ +SET optimize_rewrite_sum_if_to_count_if = 1; + +SET allow_experimental_analyzer = 0; +SELECT (sumIf(toNullable(1), (number % 2) = 0), NULL) FROM numbers(10); +SELECT (sum(if((number % 2) = 0, toNullable(1), 0)), NULL) FROM numbers(10); + +SET allow_experimental_analyzer = 1; +SELECT (sumIf(toNullable(1), (number % 2) = 0), NULL) FROM numbers(10); +EXPLAIN QUERY TREE SELECT (sumIf(toNullable(1), (number % 2) = 0), NULL) FROM numbers(10); +SELECT (sum(if((number % 2) = 0, toNullable(1), 0)), NULL) FROM numbers(10); +EXPLAIN QUERY TREE SELECT (sum(if((number % 2) = 0, toNullable(1), 0)), NULL) FROM numbers(10); \ No newline at end of file diff --git a/tests/queries/0_stateless/03011_definitive_guide_to_cast.reference b/tests/queries/0_stateless/03011_definitive_guide_to_cast.reference new file mode 100644 index 00000000000..7c875a24b6d --- /dev/null +++ b/tests/queries/0_stateless/03011_definitive_guide_to_cast.reference @@ -0,0 +1,81 @@ +123 +2009-02-14 00:31:30 +[1,2,3] +123 Nullable(UInt8) +\N Nullable(UInt8) +1 +255 +['Hello','wo\'rld\\'] +Hello wo\\\'rld\\\\ +wo\'rld\\ wo\\\'rld\\\\ +133 210 +210 +[123,456] +1 -1 +[] [] Array(Nothing) Array(Array(Array(Tuple(UInt64, String)))) +1970-01-01 01:00:00 +2009-02-14 00:31:30.123456 +1970-01-01 00:59:59.888889 +2009-02-14 00:31:30 +1970-01-01 01:00:00 +2299-12-31 23:59:59.000000 +2009-02-14 +2009-02-14 +123\0\0 +123 +123 +123 +123 +123 +123 +123 +123 +123 +123 +String 123 +123 UInt8 +200 UInt8 +123 +123 +1.1 +1.10000000000000016387 +18446744073709551615 +[1.1,2.3] +[1.10000000000000016387,2.29999999999999967236] +Row 1: +────── +CAST('1.1', 'Decimal(30, 20)'): 1.1 +CAST('1.1', 'Decimal(30, 20)'): 1.1 +CAST(plus(1, 1), 'UInt8'): 2 +-1 +\N +0 +0 +1970-01-01 01:00:00 +255 +123 +Hello\0\0\0\0\0 +Hello\0\0\0\0\0 +123.45 +2024-04-25 01:02:03 +2024-04-25 01:02:03.000000 +2024-04-25 01:02:03 +2024-04-25 01:02:03.456789 +2024-03-16 11:01:25 +2024-03-16 19:01:25 +2024-03-16 19:01:25 +2024-03-16 11:01:25 +123 \N \N \N +123 0 0 0 +123 10 10 10 +123 0 0 0 +Nullable(UInt8) UInt8 +123 +123 +123 +\N +2024-04-25 2024-01-01 02:03:04 1 12 +2024-04-25 2024-01-01 02:03:04.000000 2009-02-14 00:31:30 +2024-04-25 2024-01-01 02:03:04.000000 2009-02-14 00:31:30 +1986-04-25 13:00:00 +14 diff --git a/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql b/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql new file mode 100644 index 00000000000..708db0adce0 --- /dev/null +++ b/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql @@ -0,0 +1,356 @@ +SET session_timezone = 'Europe/Amsterdam'; + +-- Type conversion functions and operators. + + +-- 1. SQL standard CAST operator: `CAST(value AS Type)`. + +SELECT CAST(123 AS String); + +-- It converts between various data types, including parameterized data types + +SELECT CAST(1234567890 AS DateTime('Europe/Amsterdam')); + +-- and composite data types: + +SELECT CAST('[1, 2, 3]' AS Array(UInt8)); + +-- Its return type depends on the setting `cast_keep_nullable`. If it is enabled, if the source argument type is Nullable, the resulting data type will be also Nullable, even if it is not written explicitly: + +SET cast_keep_nullable = 1; +SELECT CAST(x AS UInt8) AS y, toTypeName(y) FROM VALUES('x Nullable(String)', ('123'), ('NULL')); + +SET cast_keep_nullable = 0; +SELECT CAST(x AS UInt8) AS y, toTypeName(y) FROM VALUES('x Nullable(String)', ('123'), ('NULL')); -- { serverError CANNOT_PARSE_TEXT } + +-- There are various type conversion rules, some worth noting. + +-- Conversion between numeric types can involve implementation-defined overflow: + +SELECT CAST(257 AS UInt8); +SELECT CAST(-1 AS UInt8); + +-- Conversion from string acts like parsing, and for composite data types like Array, Tuple, it works in the same way as from the `Values` data format: + +SELECT CAST($$['Hello', 'wo\'rld\\']$$ AS Array(String)); + +-- ' +-- While for simple data types, it does not interpret escape sequences: + +SELECT arrayJoin(CAST($$['Hello', 'wo\'rld\\']$$ AS Array(String))) AS x, CAST($$wo\'rld\\$$ AS FixedString(9)) AS y; + +-- As conversion from String is similar to direct parsing rather than conversion from other types, +-- it can be stricter for numbers by not tolerating overflows in some cases: + +SELECT CAST(-123 AS UInt8), CAST(1234 AS UInt8); + +SELECT CAST('-123' AS UInt8); -- { serverError CANNOT_PARSE_NUMBER } + +-- In some cases it still allows overflows, but it is implementation defined: + +SELECT CAST('1234' AS UInt8); + +-- Parsing from a string does not tolerate extra whitespace characters: + +SELECT CAST(' 123' AS UInt8); -- { serverError CANNOT_PARSE_TEXT } +SELECT CAST('123 ' AS UInt8); -- { serverError CANNOT_PARSE_TEXT } + +-- But for composite data types, it involves a more featured parser, that takes care of whitespace inside the data structures: + +SELECT CAST('[ 123 ,456, ]' AS Array(UInt16)); + +-- Conversion from a floating point value to an integer will involve truncation towards zero: + +SELECT CAST(1.9, 'Int64'), CAST(-1.9, 'Int64'); + +-- Conversion from NULL into a non-Nullable type will throw an exception, as well as conversions from denormal floating point numbers (NaN, inf, -inf) to an integer, or conversion between arrays of different dimensions. + +-- However, you might find it amusing that an empty array of Nothing data type can be converted to arrays of any dimensions: + +SELECT [] AS x, CAST(x AS Array(Array(Array(Tuple(UInt64, String))))) AS y, toTypeName(x), toTypeName(y); + +-- Conversion between numbers and DateTime/Date data types interprets the number as the number of seconds/days from the Unix epoch, +-- where Unix epoch starts from 1970-01-01T00:00:00Z (the midnight of Gregorian year 1970 in UTC), +-- and the number of seconds don't count the coordination seconds, as in Unix. + +-- For example, it is 1 AM in Amsterdam: + +SELECT CAST(0 AS DateTime('Europe/Amsterdam')); + +-- The numbers can be fractional and negative (for DateTime64): + +SELECT CAST(1234567890.123456 AS DateTime64(6, 'Europe/Amsterdam')); +SELECT CAST(-0.111111 AS DateTime64(6, 'Europe/Amsterdam')); + +-- If the result does not fit in the range of the corresponding time data types, it is truncated and saturated to the boundaries: + +SELECT CAST(1234567890.123456 AS DateTime('Europe/Amsterdam')); +SELECT CAST(-1 AS DateTime('Europe/Amsterdam')); + +SELECT CAST(1e20 AS DateTime64(6, 'Europe/Amsterdam')); + +-- A special case is DateTime64(9) - the maximum resolution, where is does not cover the usual range, +-- and in this case, it throws an exception on overflow (I don't mind if we change this behavior in the future): + + SELECT CAST(1e20 AS DateTime64(9, 'Europe/Amsterdam')); -- { serverError DECIMAL_OVERFLOW } + +-- If a number is converted to a Date data type, the value is interpreted as the number of days since the Unix epoch, +-- but if the number is larger than the range of the data type, it is interpreted as a unix timestamp +-- (the number of seconds since the Unix epoch), similarly how it is done for the DateTime data type, +-- for convenience (while the internal representation of Date is the number of days, +-- often people want the unix timestamp to be also parsed into the Date data type): + +SELECT CAST(14289 AS Date); +SELECT CAST(1234567890 AS Date); + +-- When converting to a FixedString, if the length of the result data type is larger than the value, the result is padded with zero bytes: + +SELECT CAST('123' AS FixedString(5)) FORMAT TSV; + +-- But if it does not fit, an exception is thrown: + +SELECT CAST('12345' AS FixedString(3)) FORMAT TSV; -- { serverError TOO_LARGE_STRING_SIZE } + +-- The operator is case-insensitive: + +SELECT CAST(123 AS String); +SELECT cast(123 AS String); +SELECT Cast(123 AS String); + + +-- 2. The functional form of this operator: `CAST(value, 'Type')`: + +SELECT CAST(123, 'String'); + +-- This form is equivalent. Keep in mind that the type has to be a constant expression: + +SELECT CAST(123, 'Str'||'ing'); -- this works. + +-- This does not work: SELECT materialize('String') AS type, CAST(123, type); + +-- It is also case-insensitive: + +SELECT CasT(123, 'String'); + +-- The functional form exists for the consistency of implementation (as every operator also exists in the functional form and the functional form is represented in the query's Abstract Syntax Tree). Anyway, the functional form also makes sense for users, when they need to construct a data type name from a constant expression, or when they want to generate a query programmatically. + +-- It's worth noting that the operator form does not allow to specify the type name as a string literal: + +-- This does not work: SELECT CAST(123 AS 'String'); + +-- By only allowing it as an identifier, either bare word: + +SELECT CAST(123 AS String); + +-- Or as a MySQL or PostgreSQL quoted identifiers: + +SELECT CAST(123 AS `String`); +SELECT CAST(123 AS "String"); + +-- While the functional form only allows the type name as a string literal: + +SELECT CAST(123, 'String'); -- works +SELECT CAST(123, String); -- { serverError UNKNOWN_IDENTIFIER } + +-- However, you can cheat: + +SELECT 'String' AS String, CAST(123, String); + + +-- 3. The internal function `_CAST` which is different from `CAST` only by being not dependent on the value of `cast_keep_nullable` setting and other settings. + +-- This is needed when ClickHouse has to persist an expression for future use, like in table definitions, including primary and partition keys and other indices. + +-- The function is not intended to be used directly. When a user uses a regular `CAST` operator or function in a table definition, it is transparently converted to `_CAST` to persist its behavior. However, the user can still use the internal version directly: + +SELECT _CAST(x, 'UInt8') AS y, toTypeName(y) FROM VALUES('x Nullable(String)', ('123'), ('456')); + +-- There is no operator form of this function: + +-- does not work, here UInt8 is interpreted as an alias for the value: +SELECT _CAST(123 AS UInt8); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT CAST(123 AS UInt8); -- works + + +-- 4. PostgreSQL-style cast syntax `::` + +SELECT 123::String; + +-- It has a difference from the `CAST` operator: if it is applied to a simple literal value, instead of performing a type conversion, it invokes the SQL parser directly on the corresponding text fragment of the query. The most important case will be the floating-point and decimal types. + +-- In this example, we parse `1.1` as Decimal and do not involve any type conversion: + +SELECT 1.1::Decimal(30, 20); + +-- In this example, `1.1` is first parsed as usual, yielding a Float64 value, and then converted to Decimal, producing a wrong result: + +SELECT CAST(1.1 AS Decimal(30, 20)); + +-- We can change this behavior in the future. + +-- Another example: + +SELECT -1::UInt64; -- { serverError CANNOT_PARSE_NUMBER } + +SELECT CAST(-1 AS UInt64); -- conversion with overflow + +-- For composite data types, if a value is a literal, it is parsed directly: + +SELECT [1.1, 2.3]::Array(Decimal(30, 20)); + +-- But if the value contains expressions, the usage of the `::` operator will be equivalent to invoking the CAST operator on the expression: + +SELECT [1.1, 2.3 + 0]::Array(Decimal(30, 20)); + +-- The automatic column name for the result of an application of the `::` operator may be the same as for the result of an application of the CAST operator to a string containing the corresponding fragment of the query or to a corresponding expression: + +SELECT 1.1::Decimal(30, 20), CAST('1.1' AS Decimal(30, 20)), (1+1)::UInt8 FORMAT Vertical; + +-- The operator has the highest priority among others: + +SELECT 1-1::String; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- But one interesting example is the unary minus. Here the minus is not an operator but part of the numeric literal: + +SELECT -1::String; + +-- Here it is an operator: + +SELECT 1 AS x, -x::String; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + + +-- 5. Accurate casting functions: `accurateCast`, `accurateCastOrNull`, `accurateCastOrDefault`. + +-- These functions check if the value is exactly representable in the target data type. + +-- The function `accurateCast` performs the conversion or throws an exception if the value is not exactly representable: + +SELECT accurateCast(1.123456789, 'Float32'); -- { serverError CANNOT_CONVERT_TYPE } + +-- The function `accurateCastOrNull` always wraps the target type into Nullable, and returns NULL if the value is not exactly representable: + +SELECT accurateCastOrNull(1.123456789, 'Float32'); + +-- The function `accurateCastOrDefault` takes an additional parameter, which must be of the target type, and returns it if the value is not exactly representable: + +SELECT accurateCastOrDefault(-1, 'UInt64', 0::UInt64); + +-- If this parameter is omitted, it is assumed to be the default value of the corresponding data type: + +SELECT accurateCastOrDefault(-1, 'UInt64'); +SELECT accurateCastOrDefault(-1, 'DateTime'); + +-- Unfortunately, this does not work as expected: SELECT accurateCastOrDefault(-1, $$Enum8('None' = 1, 'Hello' = 2, 'World' = 3)$$); +-- https://github.com/ClickHouse/ClickHouse/issues/61495 + +-- These functions are case-sensitive, and there are no corresponding operators: + +SELECT ACCURATECAST(1, 'String'); -- { serverError UNKNOWN_FUNCTION }. + + +-- 6. Explicit conversion functions: + +-- `toString`, `toFixedString`, +-- `toUInt8`, `toUInt16`, `toUInt32`, `toUInt64`, `toUInt128`, `toUInt256`, +-- `toInt8`, `toInt16`, `toInt32`, `toInt64`, `toInt128`, `toInt256`, +-- `toFloat32`, `toFloat64`, +-- `toDecimal32`, `toDecimal64`, `toDecimal128`, `toDecimal256`, +-- `toDate`, `toDate32`, `toDateTime`, `toDateTime64`, +-- `toUUID`, `toIPv4`, `toIPv6`, +-- `toIntervalNanosecond`, `toIntervalMicrosecond`, `toIntervalMillisecond`, +-- `toIntervalSecond`, `toIntervalMinute`, `toIntervalHour`, +-- `toIntervalDay`, `toIntervalWeek`, `toIntervalMonth`, `toIntervalQuarter`, `toIntervalYear` + +-- These functions work under the same rules as the CAST operator and can be thought as elementary implementation parts of that operator. They allow implementation-defined overflow while converting between numeric types. + +SELECT toUInt8(-1); + +-- These are ClickHouse-native conversion functions. They take an argument with the input value, and for some of the data types (`FixedString`, `DateTime`, `DateTime64`, `Decimal`s), the subsequent arguments are constant expressions, defining the parameters of these data types, or the rules to interpret the source value. + +SELECT toFloat64(123); -- no arguments +SELECT toFixedString('Hello', 10) FORMAT TSV; -- the parameter of the FixedString data type, the function returns FixedString(10) +SELECT toFixedString('Hello', 5 + 5) FORMAT TSV; -- it can be a constant expression + +SELECT toDecimal32('123.456', 2); -- the scale of the Decimal data type + +SELECT toDateTime('2024-04-25 01:02:03', 'Europe/Amsterdam'); -- the time zone of DateTime +SELECT toDateTime64('2024-04-25 01:02:03', 6, 'Europe/Amsterdam'); -- the scale of DateTime64 and its time zone + +-- The length of FixedString and the scale of Decimal and DateTime64 types are mandatory arguments, while the time zone of the DateTime data type is optional. + +-- If the time zone is not specified, the time zone of the argument's data type is used, and if the argument is not a date time, the session time zone is used. + +SELECT toDateTime('2024-04-25 01:02:03'); +SELECT toDateTime64('2024-04-25 01:02:03.456789', 6); + +-- Here, the time zone can be specified as the rule of interpretation of the value during conversion: + +SELECT toString(1710612085::DateTime, 'America/Los_Angeles'); +SELECT toString(1710612085::DateTime); + +-- In the case when the time zone is not the part of the resulting data type, but a rule of interpretation of the source value, +-- it can be non-constant. Let's clarify: in this example, the resulting data type is a String; it does not have a time zone parameter: + +SELECT toString(1710612085::DateTime, tz) FROM Values('tz String', 'Europe/Amsterdam', 'America/Los_Angeles'); + +-- Functions converting to numeric types, date and datetime, IP and UUID, also have versions with -OrNull, -OrZero, and -OrDefault fallbacks, +-- that don't throw exceptions on parsing errors. +-- They use the same rules to the accurateCast operator: + +SELECT toUInt8OrNull('123'), toUInt8OrNull('-123'), toUInt8OrNull('1234'), toUInt8OrNull(' 123'); +SELECT toUInt8OrZero('123'), toUInt8OrZero('-123'), toUInt8OrZero('1234'), toUInt8OrZero(' 123'); +SELECT toUInt8OrDefault('123', 10), toUInt8OrDefault('-123', 10), toUInt8OrDefault('1234', 10), toUInt8OrDefault(' 123', 10); +SELECT toUInt8OrDefault('123'), toUInt8OrDefault('-123'), toUInt8OrDefault('1234'), toUInt8OrDefault(' 123'); + +SELECT toTypeName(toUInt8OrNull('123')), toTypeName(toUInt8OrZero('123')); + +-- These functions are only applicable to string data types. +-- Although it is a room for extension: + +SELECT toUInt8OrNull(123); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- String and FixedString work: + +SELECT toUInt8OrNull(123::FixedString(3)); + +-- For the FixedString data type trailing zero bytes are allowed, because they are the padding for FixedString: + +SELECT toUInt8OrNull('123'::FixedString(4)); +SELECT toUInt8OrNull('123\0'::FixedString(4)); + +-- While for String, they don't: + +SELECT toUInt8OrNull('123\0'); + + +-- 7. SQL-compatibility type-defining operators: + +SELECT DATE '2024-04-25', TIMESTAMP '2024-01-01 02:03:04', INTERVAL 1 MINUTE, INTERVAL '12 hour'; + +-- These operators are interpreted as the corresponding explicit conversion functions. + + +-- 8. SQL-compatibility aliases for explicit conversion functions: + +SELECT DATE('2024-04-25'), TIMESTAMP('2024-01-01 02:03:04'), FROM_UNIXTIME(1234567890); + +-- These functions exist for compatibility with MySQL. They are case-insensitive. + +SELECT date '2024-04-25', timeSTAMP('2024-01-01 02:03:04'), From_Unixtime(1234567890); + + +-- 9. Specialized conversion functions: + +-- `parseDateTimeBestEffort`, `parseDateTimeBestEffortUS`, `parseDateTime64BestEffort`, `parseDateTime64BestEffortUS`, `toUnixTimestamp` + +-- These functions are similar to explicit conversion functions but provide special rules on how the conversion is performed. + +SELECT parseDateTimeBestEffort('25 Apr 1986 1pm'); + + +-- 10. Functions for converting between different components or rounding of date and time data types. + +SELECT toDayOfMonth(toDateTime(1234567890)); + +-- These functions are covered in a separate topic. diff --git a/tests/queries/0_stateless/03012_parser_backtracking.reference b/tests/queries/0_stateless/03012_parser_backtracking.reference new file mode 100644 index 00000000000..84727754516 --- /dev/null +++ b/tests/queries/0_stateless/03012_parser_backtracking.reference @@ -0,0 +1 @@ +TOO_SLOW_PARSING diff --git a/tests/queries/0_stateless/03012_parser_backtracking.sh b/tests/queries/0_stateless/03012_parser_backtracking.sh new file mode 100755 index 00000000000..889753fb048 --- /dev/null +++ b/tests/queries/0_stateless/03012_parser_backtracking.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL --query "SELECT((((((((((SELECT(((((((((SELECT((((((((((SELECT(((((((((SELECT((((((((((SELECT(((((((((SELECT 1+)))))))))))))))))))))))))))))))))))))))))))))))))))))))))" 2>&1 | grep -o -F 'TOO_SLOW_PARSING' diff --git a/utils/check-marks/CMakeLists.txt b/utils/check-marks/CMakeLists.txt index 05546a2989b..857dc51d40a 100644 --- a/utils/check-marks/CMakeLists.txt +++ b/utils/check-marks/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable (check-marks main.cpp) -target_link_libraries(check-marks PRIVATE dbms boost::program_options) +target_link_libraries(check-marks PRIVATE dbms clickhouse_functions boost::program_options) diff --git a/utils/check-mysql-binlog/CMakeLists.txt b/utils/check-mysql-binlog/CMakeLists.txt index cbbecd456a0..0ece1684e6b 100644 --- a/utils/check-mysql-binlog/CMakeLists.txt +++ b/utils/check-mysql-binlog/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable(check-mysql-binlog main.cpp) -target_link_libraries(check-mysql-binlog PRIVATE dbms boost::program_options) +target_link_libraries(check-mysql-binlog PRIVATE dbms clickhouse_functions boost::program_options) diff --git a/utils/check-style/check-large-objects.sh b/utils/check-style/check-large-objects.sh index 5c1276e5732..2122cca911e 100755 --- a/utils/check-style/check-large-objects.sh +++ b/utils/check-style/check-large-objects.sh @@ -1,18 +1,15 @@ #!/usr/bin/env bash +export LC_ALL=C # The "total" should be printed without localization + # Check that there are no new translation units compiled to an object file larger than a certain size. TU_EXCLUDES=( - CastOverloadResolver AggregateFunctionUniq - FunctionsConversion - - RangeHashedDictionary - Aggregator ) -if find $1 -name '*.o' | xargs wc -c | grep -v total | sort -rn | awk '{ if ($1 > 50000000) print }' \ +if find $1 -name '*.o' | xargs wc -c | grep --regexp='\.o$' | sort -rn | awk '{ if ($1 > 50000000) print }' \ | grep -v -f <(printf "%s\n" "${TU_EXCLUDES[@]}") then echo "^ It's not allowed to have so large translation units." diff --git a/utils/keeper-data-dumper/CMakeLists.txt b/utils/keeper-data-dumper/CMakeLists.txt index 136d6f2268c..71f29781b73 100644 --- a/utils/keeper-data-dumper/CMakeLists.txt +++ b/utils/keeper-data-dumper/CMakeLists.txt @@ -4,4 +4,4 @@ if (NOT TARGET ch_contrib::nuraft) endif () clickhouse_add_executable(keeper-data-dumper main.cpp) -target_link_libraries(keeper-data-dumper PRIVATE dbms) +target_link_libraries(keeper-data-dumper PRIVATE dbms clickhouse_functions) diff --git a/utils/postprocess-traces/postprocess-traces.pl b/utils/postprocess-traces/postprocess-traces.pl index 476fb46418f..3e50f64d864 100755 --- a/utils/postprocess-traces/postprocess-traces.pl +++ b/utils/postprocess-traces/postprocess-traces.pl @@ -8,6 +8,19 @@ use Data::Dumper; my @current_stack = (); my $grouped_stacks = {}; +sub process_stacktrace +{ + my $group = \$grouped_stacks; + for my $frame (reverse @current_stack) + { + $$group->{count} ||= 0; + ++$$group->{count}; + $group = \$$group->{children}{$frame}; + } + + @current_stack = (); +} + while (my $line = <>) { chomp $line; @@ -21,18 +34,12 @@ while (my $line = <>) if ($line eq '') { - my $group = \$grouped_stacks; - for my $frame (reverse @current_stack) - { - $$group->{count} ||= 0; - ++$$group->{count}; - $group = \$$group->{children}{$frame}; - } - - @current_stack = (); + process_stacktrace(); } } +process_stacktrace(); + sub print_group { my $group = shift;