diff --git a/base/base/IPv4andIPv6.h b/base/base/IPv4andIPv6.h new file mode 100644 index 00000000000..0e97d83b07e --- /dev/null +++ b/base/base/IPv4andIPv6.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + + using IPv4 = StrongTypedef; + + struct IPv6 : StrongTypedef + { + constexpr IPv6() = default; + constexpr explicit IPv6(const UInt128 & x) : StrongTypedef(x) {} + constexpr explicit IPv6(UInt128 && x) : StrongTypedef(std::move(x)) {} + + IPv6 & operator=(const UInt128 & rhs) { StrongTypedef::operator=(rhs); return *this; } + IPv6 & operator=(UInt128 && rhs) { StrongTypedef::operator=(std::move(rhs)); return *this; } + + bool operator<(const IPv6 & rhs) const + { + return + memcmp16( + reinterpret_cast(toUnderType().items), + reinterpret_cast(rhs.toUnderType().items) + ) < 0; + } + + bool operator>(const IPv6 & rhs) const + { + return + memcmp16( + reinterpret_cast(toUnderType().items), + reinterpret_cast(rhs.toUnderType().items) + ) > 0; + } + + bool operator==(const IPv6 & rhs) const + { + return + memcmp16( + reinterpret_cast(toUnderType().items), + reinterpret_cast(rhs.toUnderType().items) + ) == 0; + } + + bool operator<=(const IPv6 & rhs) const { return !operator>(rhs); } + bool operator>=(const IPv6 & rhs) const { return !operator<(rhs); } + bool operator!=(const IPv6 & rhs) const { return !operator==(rhs); } + }; + +} diff --git a/base/base/TypeName.h b/base/base/TypeName.h index b3ea47fad5d..9005b5a2bf4 100644 --- a/base/base/TypeName.h +++ b/base/base/TypeName.h @@ -2,6 +2,7 @@ #include "Decimal.h" #include "UUID.h" +#include "IPv4andIPv6.h" namespace DB { @@ -35,6 +36,8 @@ TN_MAP(Float32) TN_MAP(Float64) TN_MAP(String) TN_MAP(UUID) +TN_MAP(IPv4) +TN_MAP(IPv6) TN_MAP(Decimal32) TN_MAP(Decimal64) TN_MAP(Decimal128) diff --git a/docker/images.json b/docker/images.json index bc8e9924955..323f53c0ae6 100644 --- a/docker/images.json +++ b/docker/images.json @@ -63,10 +63,6 @@ "name": "clickhouse/integration-tests-runner", "dependent": [] }, - "docker/test/testflows/runner": { - "name": "clickhouse/testflows-runner", - "dependent": [] - }, "docker/test/fasttest": { "name": "clickhouse/fasttest", "dependent": [] diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 22d6282d71c..8b85b886fec 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="22.12.1.1752" +ARG VERSION="22.12.2.25" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 3135ec508de..9348bfdcf7a 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="22.12.1.1752" +ARG VERSION="22.12.2.25" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 164f2e28d76..54ea9e0b00f 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -262,14 +262,17 @@ quit if [ "$server_died" == 1 ] then # The server has died. - if ! grep --text -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: AddressSanitizer:.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log > description.txt + if ! grep -E --text -o 'Received signal.*|Logical error.*|Assertion.*failed|Failed assertion.*|.*runtime error: .*|.*is located.*|(SUMMARY|ERROR): [a-zA-Z]+Sanitizer:.*|.*_LIBCPP_ASSERT.*' server.log > description.txt then echo "Lost connection to server. See the logs." > description.txt fi - if grep -E --text 'Sanitizer: (out-of-memory|failed to allocate)' description.txt + IS_SANITIZED=$(clickhouse-local --query "SELECT value LIKE '%-fsanitize=%' FROM system.build_options WHERE name = 'CXX_FLAGS'") + + if [ "${IS_SANITIZED}" -eq "1" ] && grep -E --text 'Sanitizer: (out-of-memory|out of memory|failed to allocate|Child process was terminated by signal 9)' description.txt then # OOM of sanitizer is not a problem we can handle - treat it as success, but preserve the description. + # Why? Because sanitizers have the memory overhead, that is not controllable from inside clickhouse-server. task_exit_code=0 echo "success" > status.txt else diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index ccfd63c8ed0..ce5bae2a031 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -8,6 +8,7 @@ RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list RUN apt-get update \ && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ + adduser \ ca-certificates \ bash \ btrfs-progs \ diff --git a/docs/changelogs/v22.12.2.25-stable.md b/docs/changelogs/v22.12.2.25-stable.md new file mode 100644 index 00000000000..e38fd2045ac --- /dev/null +++ b/docs/changelogs/v22.12.2.25-stable.md @@ -0,0 +1,29 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.12.2.25-stable (c790cfd4465) FIXME as compared to v22.12.1.1752-stable (688e488e930) + +#### Build/Testing/Packaging Improvement +* Backported in [#44381](https://github.com/ClickHouse/ClickHouse/issues/44381): In rare cases, we don't rebuild binaries, because another task with a similar prefix succeeded. E.g. `binary_darwin` didn't restart because `binary_darwin_aarch64`. [#44311](https://github.com/ClickHouse/ClickHouse/pull/44311) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#44561](https://github.com/ClickHouse/ClickHouse/issues/44561): Retry the integration tests on compressing errors. [#44529](https://github.com/ClickHouse/ClickHouse/pull/44529) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#44739](https://github.com/ClickHouse/ClickHouse/issues/44739): [#40651](https://github.com/ClickHouse/ClickHouse/issues/40651) [#41404](https://github.com/ClickHouse/ClickHouse/issues/41404). [#42126](https://github.com/ClickHouse/ClickHouse/pull/42126) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#44764](https://github.com/ClickHouse/ClickHouse/issues/44764): Fix parsing of bad version from compatibility setting. [#44224](https://github.com/ClickHouse/ClickHouse/pull/44224) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#44435](https://github.com/ClickHouse/ClickHouse/issues/44435): Fix possible crash in case function `IN` with constant arguments was used as a constant argument together with `LowCardinality`. Fixes [#44221](https://github.com/ClickHouse/ClickHouse/issues/44221). [#44346](https://github.com/ClickHouse/ClickHouse/pull/44346) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Bump libdivide (to gain some new optimizations) [#44132](https://github.com/ClickHouse/ClickHouse/pull/44132) ([Azat Khuzhin](https://github.com/azat)). +* Add some settings under `compatibility` [#44209](https://github.com/ClickHouse/ClickHouse/pull/44209) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix deadlock in StorageSystemDatabases [#44272](https://github.com/ClickHouse/ClickHouse/pull/44272) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Get rid of global Git object [#44273](https://github.com/ClickHouse/ClickHouse/pull/44273) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add check for submodules sanity [#44386](https://github.com/ClickHouse/ClickHouse/pull/44386) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Implement a custom central checkout action [#44399](https://github.com/ClickHouse/ClickHouse/pull/44399) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix crash on delete from materialized view [#44705](https://github.com/ClickHouse/ClickHouse/pull/44705) ([Alexander Gololobov](https://github.com/davenger)). + diff --git a/docs/en/development/continuous-integration.md b/docs/en/development/continuous-integration.md index ef3efa75d66..cb86ef06952 100644 --- a/docs/en/development/continuous-integration.md +++ b/docs/en/development/continuous-integration.md @@ -154,10 +154,6 @@ Runs [stateful functional tests](tests.md#functional-tests). Treat them in the s Runs [integration tests](tests.md#integration-tests). -## Testflows Check -Runs some tests using Testflows test system. See [here](https://github.com/ClickHouse/ClickHouse/tree/master/tests/testflows#running-tests-locally) how to run them locally. - - ## Stress Test Runs stateless functional tests concurrently from several clients to detect concurrency-related errors. If it fails: diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index f8195a06b6e..f917286029a 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -281,10 +281,6 @@ We also track test coverage but only for functional tests and only for clickhous There is automated check for flaky tests. It runs all new tests 100 times (for functional tests) or 10 times (for integration tests). If at least single time the test failed, it is considered flaky. -## Testflows - -[Testflows](https://testflows.com/) is an enterprise-grade open-source testing framework, which is used to test a subset of ClickHouse. - ## Test Automation {#test-automation} We run tests with [GitHub Actions](https://github.com/features/actions). diff --git a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md index b07f4a29396..104ec049ec4 100644 --- a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md @@ -19,7 +19,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( Path String, Time DateTime, - Value , + Value Float64, Version ... ) ENGINE = GraphiteMergeTree(config_section) @@ -37,7 +37,7 @@ A table for the Graphite data should have the following columns for the followin - Time of measuring the metric. Data type: `DateTime`. -- Value of the metric. Data type: any numeric. +- Value of the metric. Data type: `Float64`. - Version of the metric. Data type: any numeric (ClickHouse saves the rows with the highest version or the last written if versions are the same. Other rows are deleted during the merge of data parts). @@ -65,7 +65,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] EventDate Date, Path String, Time DateTime, - Value , + Value Float64, Version ... ) ENGINE [=] GraphiteMergeTree(date-column [, sampling_expression], (primary, key), index_granularity, config_section) diff --git a/docs/en/engines/table-engines/special/join.md b/docs/en/engines/table-engines/special/join.md index a49214bd00a..3f408db2933 100644 --- a/docs/en/engines/table-engines/special/join.md +++ b/docs/en/engines/table-engines/special/join.md @@ -86,7 +86,18 @@ When creating a table, the following settings are applied: [join_any_take_last_row](/docs/en/operations/settings/settings.md/#settings-join_any_take_last_row) #### join_use_nulls -[persistent](/docs/en/operations/settings/settings.md/#persistent) +#### persistent + +Disables persistency for the Join and [Set](/docs/en/engines/table-engines/special/set.md) table engines. + +Reduces the I/O overhead. Suitable for scenarios that pursue performance and do not require persistence. + +Possible values: + +- 1 — Enabled. +- 0 — Disabled. + +Default value: `1`. The `Join`-engine tables can’t be used in `GLOBAL JOIN` operations. diff --git a/docs/en/engines/table-engines/special/set.md b/docs/en/engines/table-engines/special/set.md index 3a3e7c4d5de..451835f2b95 100644 --- a/docs/en/engines/table-engines/special/set.md +++ b/docs/en/engines/table-engines/special/set.md @@ -19,5 +19,16 @@ For a rough server restart, the block of data on the disk might be lost or damag When creating a table, the following settings are applied: -- [persistent](../../../operations/settings/settings.md#persistent) +#### persistent + +Disables persistency for the Set and [Join](/docs/en/engines/table-engines/special/join.md/#join) table engines. + +Reduces the I/O overhead. Suitable for scenarios that pursue performance and do not require persistence. + +Possible values: + +- 1 — Enabled. +- 0 — Disabled. + +Default value: `1`. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index c2afd740ff2..bde0a0e8290 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2441,19 +2441,6 @@ Result └──────────────────────────┴───────┴───────────────────────────────────────────────────────┘ ``` -## persistent {#persistent} - -Disables persistency for the [Set](../../engines/table-engines/special/set.md/#set) and [Join](../../engines/table-engines/special/join.md/#join) table engines. - -Reduces the I/O overhead. Suitable for scenarios that pursue performance and do not require persistence. - -Possible values: - -- 1 — Enabled. -- 0 — Disabled. - -Default value: `1`. - ## allow_nullable_key {#allow-nullable-key} Allows using of the [Nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable)-typed values in a sorting and a primary key for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md/#table_engines-mergetree) tables. diff --git a/docs/en/operations/system-tables/data_skipping_indices.md b/docs/en/operations/system-tables/data_skipping_indices.md index 338c6d02206..f1e233b33f7 100644 --- a/docs/en/operations/system-tables/data_skipping_indices.md +++ b/docs/en/operations/system-tables/data_skipping_indices.md @@ -11,6 +11,7 @@ Columns: - `table` ([String](../../sql-reference/data-types/string.md)) — Table name. - `name` ([String](../../sql-reference/data-types/string.md)) — Index name. - `type` ([String](../../sql-reference/data-types/string.md)) — Index type. +- `type_full` ([String](../../sql-reference/data-types/string.md)) — Index type expression from create statement. - `expr` ([String](../../sql-reference/data-types/string.md)) — Expression for the index calculation. - `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of granules in the block. - `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of compressed data, in bytes. @@ -30,6 +31,7 @@ database: default table: user_actions name: clicks_idx type: minmax +type_full: minmax expr: clicks granularity: 1 data_compressed_bytes: 58 @@ -42,6 +44,7 @@ database: default table: users name: contacts_null_idx type: minmax +type_full: minmax expr: assumeNotNull(contacts_null) granularity: 1 data_compressed_bytes: 58 diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 5f6fad9ab9a..225f2b162ab 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -806,7 +806,8 @@ Aliases: `dateSub`, `DATE_SUB`. **Arguments** -- `unit` — The type of interval to subtract. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval to subtract. Note: The unit should be unquoted. + Possible values: - `second` diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md index d9e811a5703..b4b7ec5ab21 100644 --- a/docs/en/sql-reference/functions/ext-dict-functions.md +++ b/docs/en/sql-reference/functions/ext-dict-functions.md @@ -416,6 +416,7 @@ Functions: - `dictGetDateTime` - `dictGetUUID` - `dictGetString` +- `dictGetIPv4`, `dictGetIPv6` All these functions have the `OrDefault` modification. For example, `dictGetDateOrDefault`. diff --git a/docs/zh/development/continuous-integration.md b/docs/zh/development/continuous-integration.md index e0c8b41147a..fea834b1b8a 100644 --- a/docs/zh/development/continuous-integration.md +++ b/docs/zh/development/continuous-integration.md @@ -110,9 +110,6 @@ git push ## 集成测试 {#integration-tests} 运行[集成测试](./tests.md#integration-tests). -## Testflows 检查{#testflows-check} -使用Testflows测试系统去运行一些测试, 在[此处](https://github.com/ClickHouse/ClickHouse/tree/master/tests/testflows#running-tests-locally)查看如何在本地运行它们. - ## 压力测试 {#stress-test} 从多个客户端并发运行无状态功能测试, 用以检测与并发相关的错误.如果失败: ``` diff --git a/docs/zh/development/tests.md b/docs/zh/development/tests.md index 6f1118e5e63..c16bc26935a 100644 --- a/docs/zh/development/tests.md +++ b/docs/zh/development/tests.md @@ -281,10 +281,6 @@ We also track test coverage but only for functional tests and only for clickhous There is automated check for flaky tests. It runs all new tests 100 times (for functional tests) or 10 times (for integration tests). If at least single time the test failed, it is considered flaky. -## Testflows - -[Testflows](https://testflows.com/) is an enterprise-grade open-source testing framework, which is used to test a subset of ClickHouse. - ## Test Automation {#test-automation} We run tests with [GitHub Actions](https://github.com/features/actions). diff --git a/packages/clickhouse-keeper.yaml b/packages/clickhouse-keeper.yaml index f2095dda02a..7586fa580e6 100644 --- a/packages/clickhouse-keeper.yaml +++ b/packages/clickhouse-keeper.yaml @@ -12,8 +12,6 @@ priority: "optional" conflicts: - clickhouse-server -depends: -- adduser suggests: - clickhouse-keeper-dbg diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml index fe59828ca43..018e88ef828 100644 --- a/packages/clickhouse-server.yaml +++ b/packages/clickhouse-server.yaml @@ -12,8 +12,6 @@ priority: "optional" conflicts: - clickhouse-keeper -depends: -- adduser replaces: - clickhouse-server-common - clickhouse-server-base diff --git a/src/AggregateFunctions/AggregateFunctionMap.h b/src/AggregateFunctions/AggregateFunctionMap.h index dc19bf3f71c..91530698bf4 100644 --- a/src/AggregateFunctions/AggregateFunctionMap.h +++ b/src/AggregateFunctions/AggregateFunctionMap.h @@ -116,13 +116,9 @@ public: static DataTypePtr getKeyType(const DataTypes & types, const AggregateFunctionPtr & nested) { - if (types.empty()) + if (types.size() != 1) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Aggregate function {}Map requires at least one argument", nested->getName()); - - if (types.size() > 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Aggregate function {}Map requires only one map argument", nested->getName()); + "Aggregate function {}Map requires one map argument, but {} found", nested->getName(), types.size()); const auto * map_type = checkAndGetDataType(types[0].get()); if (!map_type) diff --git a/src/AggregateFunctions/AggregateFunctionOrFill.h b/src/AggregateFunctions/AggregateFunctionOrFill.h index eeec630be9a..4679e086506 100644 --- a/src/AggregateFunctions/AggregateFunctionOrFill.h +++ b/src/AggregateFunctions/AggregateFunctionOrFill.h @@ -36,8 +36,8 @@ public: AggregateFunctionOrFill(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params) : IAggregateFunctionHelper{arguments, params, createResultType(nested_function_->getResultType())} , nested_function{nested_function_} - , size_of_data {nested_function->sizeOfData()} - , inner_nullable {nested_function->getResultType()->isNullable()} + , size_of_data{nested_function->sizeOfData()} + , inner_nullable{nested_function->getResultType()->isNullable()} { // nothing } diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h index 4a1088a87bd..6c62adb2e65 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -428,10 +428,7 @@ public: } bool keepKey(const T & key) const { return static_cast(*this).keepKey(key); } - String getName() const override { return getNameImpl(); } - -private: - static String getNameImpl() { return Derived::getNameImpl(); } + String getName() const override { return Derived::getNameImpl(); } }; template diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index a5d1887f85e..7a57a433cba 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -65,9 +65,9 @@ class IAggregateFunction : public std::enable_shared_from_this(); } -ColumnsWithTypeAndName FunctionNode::getArgumentTypes() const +ColumnsWithTypeAndName FunctionNode::getArgumentColumns() const { - ColumnsWithTypeAndName argument_types; - for (const auto & arg : getArguments().getNodes()) + const auto & arguments = getArguments().getNodes(); + ColumnsWithTypeAndName argument_columns; + argument_columns.reserve(arguments.size()); + + for (const auto & arg : arguments) { ColumnWithTypeAndName argument; argument.type = arg->getResultType(); if (auto * constant = arg->as()) argument.column = argument.type->createColumnConst(1, constant->getValue()); - argument_types.push_back(argument); + argument_columns.push_back(std::move(argument)); } - return argument_types; + return argument_columns; } void FunctionNode::resolveAsFunction(FunctionBasePtr function_value) diff --git a/src/Analyzer/FunctionNode.h b/src/Analyzer/FunctionNode.h index 501d439e55e..49e66ba32c1 100644 --- a/src/Analyzer/FunctionNode.h +++ b/src/Analyzer/FunctionNode.h @@ -1,12 +1,14 @@ #pragma once #include -#include +#include +#include #include #include -#include #include #include +#include +#include namespace DB { @@ -19,12 +21,6 @@ namespace ErrorCodes class IFunctionOverloadResolver; using FunctionOverloadResolverPtr = std::shared_ptr; -class IFunctionBase; -using FunctionBasePtr = std::shared_ptr; - -class IAggregateFunction; -using AggregateFunctionPtr = std::shared_ptr; - /** Function node represents function in query tree. * Function syntax: function_name(parameter_1, ...)(argument_1, ...). * If function does not have parameters its syntax is function_name(argument_1, ...). @@ -63,66 +59,36 @@ public: explicit FunctionNode(String function_name_); /// Get function name - const String & getFunctionName() const - { - return function_name; - } + const String & getFunctionName() const { return function_name; } /// Get parameters - const ListNode & getParameters() const - { - return children[parameters_child_index]->as(); - } + const ListNode & getParameters() const { return children[parameters_child_index]->as(); } /// Get parameters - ListNode & getParameters() - { - return children[parameters_child_index]->as(); - } + ListNode & getParameters() { return children[parameters_child_index]->as(); } /// Get parameters node - const QueryTreeNodePtr & getParametersNode() const - { - return children[parameters_child_index]; - } + const QueryTreeNodePtr & getParametersNode() const { return children[parameters_child_index]; } /// Get parameters node - QueryTreeNodePtr & getParametersNode() - { - return children[parameters_child_index]; - } + QueryTreeNodePtr & getParametersNode() { return children[parameters_child_index]; } /// Get arguments - const ListNode & getArguments() const - { - return children[arguments_child_index]->as(); - } + const ListNode & getArguments() const { return children[arguments_child_index]->as(); } /// Get arguments - ListNode & getArguments() - { - return children[arguments_child_index]->as(); - } + ListNode & getArguments() { return children[arguments_child_index]->as(); } /// Get arguments node - const QueryTreeNodePtr & getArgumentsNode() const - { - return children[arguments_child_index]; - } + const QueryTreeNodePtr & getArgumentsNode() const { return children[arguments_child_index]; } /// Get arguments node - QueryTreeNodePtr & getArgumentsNode() - { - return children[arguments_child_index]; - } + QueryTreeNodePtr & getArgumentsNode() { return children[arguments_child_index]; } - ColumnsWithTypeAndName getArgumentTypes() const; + ColumnsWithTypeAndName getArgumentColumns() const; /// Returns true if function node has window, false otherwise - bool hasWindow() const - { - return children[window_child_index] != nullptr; - } + bool hasWindow() const { return children[window_child_index] != nullptr; } /** Get window node. * Valid only for window function node. @@ -130,18 +96,12 @@ public: * 1. It can be identifier node if window function is defined as expr OVER window_name. * 2. It can be window node if window function is defined as expr OVER (window_name ...). */ - const QueryTreeNodePtr & getWindowNode() const - { - return children[window_child_index]; - } + const QueryTreeNodePtr & getWindowNode() const { return children[window_child_index]; } /** Get window node. * Valid only for window function node. */ - QueryTreeNodePtr & getWindowNode() - { - return children[window_child_index]; - } + QueryTreeNodePtr & getWindowNode() { return children[window_child_index]; } /** Get non aggregate function. * If function is not resolved nullptr returned. @@ -150,7 +110,7 @@ public: { if (kind != FunctionKind::ORDINARY) return {}; - return std::reinterpret_pointer_cast(function); + return std::static_pointer_cast(function); } /** Get aggregate function. @@ -161,32 +121,20 @@ public: { if (kind == FunctionKind::UNKNOWN || kind == FunctionKind::ORDINARY) return {}; - return std::reinterpret_pointer_cast(function); + return std::static_pointer_cast(function); } /// Is function node resolved - bool isResolved() const - { - return function != nullptr; - } + bool isResolved() const { return function != nullptr; } /// Is function node window function - bool isWindowFunction() const - { - return hasWindow(); - } + bool isWindowFunction() const { return hasWindow(); } /// Is function node aggregate function - bool isAggregateFunction() const - { - return kind == FunctionKind::AGGREGATE; - } + bool isAggregateFunction() const { return kind == FunctionKind::AGGREGATE; } /// Is function node ordinary function - bool isOrdinaryFunction() const - { - return kind == FunctionKind::ORDINARY; - } + bool isOrdinaryFunction() const { return kind == FunctionKind::ORDINARY; } /** Resolve function node as non aggregate function. * It is important that function name is updated with resolved function name. @@ -208,10 +156,7 @@ public: */ void resolveAsWindowFunction(AggregateFunctionPtr window_function_value); - QueryTreeNodeType getNodeType() const override - { - return QueryTreeNodeType::FUNCTION; - } + QueryTreeNodeType getNodeType() const override { return QueryTreeNodeType::FUNCTION; } DataTypePtr getResultType() const override { diff --git a/src/Analyzer/Passes/CustomizeFunctionsPass.cpp b/src/Analyzer/Passes/CustomizeFunctionsPass.cpp index 7eb4a040970..407614dbe50 100644 --- a/src/Analyzer/Passes/CustomizeFunctionsPass.cpp +++ b/src/Analyzer/Passes/CustomizeFunctionsPass.cpp @@ -155,7 +155,7 @@ public: inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const { auto function = FunctionFactory::instance().get(function_name, context); - function_node.resolveAsFunction(function->build(function_node.getArgumentTypes())); + function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); } private: diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 0c5a450135f..aff932e39db 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -193,7 +193,7 @@ private: inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const { auto function = FunctionFactory::instance().get(function_name, context); - function_node.resolveAsFunction(function->build(function_node.getArgumentTypes())); + function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); } ContextPtr & context; diff --git a/src/Analyzer/Passes/FuseFunctionsPass.cpp b/src/Analyzer/Passes/FuseFunctionsPass.cpp index f354a7b1ec3..c73e1048dc2 100644 --- a/src/Analyzer/Passes/FuseFunctionsPass.cpp +++ b/src/Analyzer/Passes/FuseFunctionsPass.cpp @@ -65,7 +65,7 @@ QueryTreeNodePtr createResolvedFunction(const ContextPtr & context, const String auto function = FunctionFactory::instance().get(name, context); function_node->getArguments().getNodes() = std::move(arguments); - function_node->resolveAsFunction(function->build(function_node->getArgumentTypes())); + function_node->resolveAsFunction(function->build(function_node->getArgumentColumns())); return function_node; } @@ -88,7 +88,7 @@ FunctionNodePtr createResolvedAggregateFunction(const String & name, const Query { argument->getResultType() }, parameters, properties); - function_node->resolveAsAggregateFunction(aggregate_function); + function_node->resolveAsAggregateFunction(std::move(aggregate_function)); return function_node; } diff --git a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp index 020edfe4820..077ba331ead 100644 --- a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp +++ b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp @@ -56,7 +56,7 @@ public: auto multi_if_function = std::make_shared("multiIf"); multi_if_function->getArguments().getNodes() = std::move(multi_if_arguments); - multi_if_function->resolveAsFunction(multi_if_function_ptr->build(multi_if_function->getArgumentTypes())); + multi_if_function->resolveAsFunction(multi_if_function_ptr->build(multi_if_function->getArgumentColumns())); node = std::move(multi_if_function); } diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp index 776fe63c803..dd1c211a053 100644 --- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp @@ -52,7 +52,7 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr from, DataTypePtr result_ty auto function_node = std::make_shared("_CAST"); function_node->getArguments().getNodes() = std::move(arguments); - function_node->resolveAsFunction(cast_function->build(function_node->getArgumentTypes())); + function_node->resolveAsFunction(cast_function->build(function_node->getArgumentColumns())); return function_node; } @@ -71,7 +71,7 @@ void changeIfArguments( auto if_resolver = FunctionFactory::instance().get("if", context); - if_node.resolveAsFunction(if_resolver->build(if_node.getArgumentTypes())); + if_node.resolveAsFunction(if_resolver->build(if_node.getArgumentColumns())); } /// transform(value, array_from, array_to, default_value) will be transformed to transform(value, array_from, _CAST(array_to, Array(Enum...)), _CAST(default_value, Enum...)) @@ -93,7 +93,7 @@ void changeTransformArguments( auto transform_resolver = FunctionFactory::instance().get("transform", context); - transform_node.resolveAsFunction(transform_resolver->build(transform_node.getArgumentTypes())); + transform_node.resolveAsFunction(transform_resolver->build(transform_node.getArgumentColumns())); } void wrapIntoToString(FunctionNode & function_node, QueryTreeNodePtr arg, ContextPtr context) @@ -102,7 +102,7 @@ void wrapIntoToString(FunctionNode & function_node, QueryTreeNodePtr arg, Contex QueryTreeNodes arguments{ std::move(arg) }; function_node.getArguments().getNodes() = std::move(arguments); - function_node.resolveAsFunction(to_string_function->build(function_node.getArgumentTypes())); + function_node.resolveAsFunction(to_string_function->build(function_node.getArgumentColumns())); assert(isString(function_node.getResultType())); } diff --git a/src/Analyzer/Passes/MultiIfToIfPass.cpp b/src/Analyzer/Passes/MultiIfToIfPass.cpp index 7e13675bf98..9eb2a4da817 100644 --- a/src/Analyzer/Passes/MultiIfToIfPass.cpp +++ b/src/Analyzer/Passes/MultiIfToIfPass.cpp @@ -27,7 +27,7 @@ public: return; auto result_type = function_node->getResultType(); - function_node->resolveAsFunction(if_function_ptr->build(function_node->getArgumentTypes())); + function_node->resolveAsFunction(if_function_ptr->build(function_node->getArgumentColumns())); } private: diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 8d923d2a69d..e93548d34ed 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -4333,7 +4333,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi ? AggregateFunctionFactory::instance().get(function_name + "OrNull", argument_types, parameters, properties) : AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties); - function_node.resolveAsWindowFunction(aggregate_function); + function_node.resolveAsWindowFunction(std::move(aggregate_function)); bool window_node_is_identifier = function_node.getWindowNode()->getNodeType() == QueryTreeNodeType::IDENTIFIER; ProjectionName window_projection_name = resolveWindow(function_node.getWindowNode(), scope); @@ -4396,7 +4396,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi auto aggregate_function = need_add_or_null ? AggregateFunctionFactory::instance().get(function_name + "OrNull", argument_types, parameters, properties) : AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties); - function_node.resolveAsAggregateFunction(aggregate_function); + function_node.resolveAsAggregateFunction(std::move(aggregate_function)); return result_projection_names; } diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index 7e120b6828d..879eb4d4a8d 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -122,7 +122,7 @@ public: auto & not_function_arguments = not_function->getArguments().getNodes(); not_function_arguments.push_back(std::move(nested_if_function_arguments_nodes[0])); - not_function->resolveAsFunction(FunctionFactory::instance().get("not", context)->build(not_function->getArgumentTypes())); + not_function->resolveAsFunction(FunctionFactory::instance().get("not", context)->build(not_function->getArgumentColumns())); function_node_arguments_nodes[0] = std::move(not_function); function_node_arguments_nodes.resize(1); diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp index 37bad70da57..3ce5ec4a24c 100644 --- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp +++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp @@ -75,7 +75,6 @@ public: function_node->getAggregateFunction()->getParameters(), properties); - auto function_result_type = function_node->getResultType(); function_node->resolveAsAggregateFunction(std::move(aggregate_function)); } }; diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index 06a1fec4698..4148d42ee23 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -59,7 +59,7 @@ class ValidationChecker : public InDepthQueryTreeVisitor if (!function->isResolved()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Function {} is not resolved after running {} pass", - function->dumpTree(), pass_name); + function->toAST()->formatForErrorMessage(), pass_name); } public: diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index 27faf4bd2ad..a295eeb5c84 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -355,6 +355,8 @@ size_t ColumnUnique::uniqueInsert(const Field & x) void operator() (const Int128 & x) { res = {reinterpret_cast(&x), sizeof(x)}; } void operator() (const Int256 & x) { res = {reinterpret_cast(&x), sizeof(x)}; } void operator() (const UUID & x) { res = {reinterpret_cast(&x), sizeof(x)}; } + void operator() (const IPv4 & x) { res = {reinterpret_cast(&x), sizeof(x)}; } + void operator() (const IPv6 & x) { res = {reinterpret_cast(&x), sizeof(x)}; } void operator() (const Float64 & x) { res = {reinterpret_cast(&x), sizeof(x)}; } void operator() (const DecimalField & x) { res = {reinterpret_cast(&x), sizeof(x)}; } void operator() (const DecimalField & x) { res = {reinterpret_cast(&x), sizeof(x)}; } diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 30e7423fde0..a97da8fe9c6 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -942,5 +942,7 @@ template class ColumnVector; template class ColumnVector; template class ColumnVector; template class ColumnVector; +template class ColumnVector; +template class ColumnVector; } diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index a601dd8b405..ded66430160 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -557,5 +557,7 @@ extern template class ColumnVector; extern template class ColumnVector; extern template class ColumnVector; extern template class ColumnVector; +extern template class ColumnVector; +extern template class ColumnVector; } diff --git a/src/Columns/ColumnsNumber.h b/src/Columns/ColumnsNumber.h index 654d0c73254..ae7eddb0b22 100644 --- a/src/Columns/ColumnsNumber.h +++ b/src/Columns/ColumnsNumber.h @@ -27,5 +27,7 @@ using ColumnFloat32 = ColumnVector; using ColumnFloat64 = ColumnVector; using ColumnUUID = ColumnVector; +using ColumnIPv4 = ColumnVector; +using ColumnIPv6 = ColumnVector; } diff --git a/src/Columns/MaskOperations.cpp b/src/Columns/MaskOperations.cpp index e320e1d57a3..24cb81a6528 100644 --- a/src/Columns/MaskOperations.cpp +++ b/src/Columns/MaskOperations.cpp @@ -72,6 +72,8 @@ INSTANTIATE(Decimal256) INSTANTIATE(DateTime64) INSTANTIATE(char *) INSTANTIATE(UUID) +INSTANTIATE(IPv4) +INSTANTIATE(IPv6) #undef INSTANTIATE diff --git a/src/Columns/tests/gtest_column_vector.cpp b/src/Columns/tests/gtest_column_vector.cpp index 5017d687791..14bf36434b6 100644 --- a/src/Columns/tests/gtest_column_vector.cpp +++ b/src/Columns/tests/gtest_column_vector.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -14,6 +15,12 @@ static constexpr size_t MAX_ROWS = 10000; static const std::vector filter_ratios = {1, 2, 5, 11, 32, 64, 100, 1000}; static const size_t K = filter_ratios.size(); +template +struct HasUnderlyingType : std::false_type {}; + +template +struct HasUnderlyingType> : std::true_type {}; + template static MutableColumnPtr createColumn(size_t n) { @@ -21,7 +28,10 @@ static MutableColumnPtr createColumn(size_t n) auto & values = column->getData(); for (size_t i = 0; i < n; ++i) - values.push_back(static_cast(i)); + if constexpr (HasUnderlyingType::value) + values.push_back(static_cast(i)); + else + values.push_back(static_cast(i)); return column; } @@ -85,6 +95,8 @@ TEST(ColumnVector, Filter) testFilter(); testFilter(); testFilter(); + testFilter(); + testFilter(); } template diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 95333eccbcd..531d7292ae2 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -643,6 +643,8 @@ M(672, INVALID_SCHEDULER_NODE) \ M(673, RESOURCE_ACCESS_DENIED) \ M(674, RESOURCE_NOT_FOUND) \ + M(675, CANNOT_PARSE_IPV4) \ + M(676, CANNOT_PARSE_IPV6) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/FieldVisitorConvertToNumber.h b/src/Common/FieldVisitorConvertToNumber.h index 466d312406e..94071e4f3fa 100644 --- a/src/Common/FieldVisitorConvertToNumber.h +++ b/src/Common/FieldVisitorConvertToNumber.h @@ -55,6 +55,8 @@ public: T operator() (const Int64 & x) const { return T(x); } T operator() (const Int128 & x) const { return T(x); } T operator() (const UUID & x) const { return T(x.toUnderType()); } + T operator() (const IPv4 & x) const { return T(x.toUnderType()); } + T operator() (const IPv6 & x) const { return T(x.toUnderType()); } T operator() (const Float64 & x) const { diff --git a/src/Common/FieldVisitorDump.cpp b/src/Common/FieldVisitorDump.cpp index fc3d56c3503..be4331ca478 100644 --- a/src/Common/FieldVisitorDump.cpp +++ b/src/Common/FieldVisitorDump.cpp @@ -37,6 +37,8 @@ String FieldVisitorDump::operator() (const UInt256 & x) const { return formatQuo String FieldVisitorDump::operator() (const Int128 & x) const { return formatQuotedWithPrefix(x, "Int128_"); } String FieldVisitorDump::operator() (const Int256 & x) const { return formatQuotedWithPrefix(x, "Int256_"); } String FieldVisitorDump::operator() (const UUID & x) const { return formatQuotedWithPrefix(x, "UUID_"); } +String FieldVisitorDump::operator() (const IPv4 & x) const { return formatQuotedWithPrefix(x, "IPv4_"); } +String FieldVisitorDump::operator() (const IPv6 & x) const { return formatQuotedWithPrefix(x, "IPv6_"); } String FieldVisitorDump::operator() (const bool & x) const { return formatQuotedWithPrefix(x, "Bool_"); } diff --git a/src/Common/FieldVisitorDump.h b/src/Common/FieldVisitorDump.h index dc67ccf7da3..6ffd91bb400 100644 --- a/src/Common/FieldVisitorDump.h +++ b/src/Common/FieldVisitorDump.h @@ -17,6 +17,8 @@ public: String operator() (const Int128 & x) const; String operator() (const Int256 & x) const; String operator() (const UUID & x) const; + String operator() (const IPv4 & x) const; + String operator() (const IPv6 & x) const; String operator() (const Float64 & x) const; String operator() (const String & x) const; String operator() (const Array & x) const; diff --git a/src/Common/FieldVisitorHash.cpp b/src/Common/FieldVisitorHash.cpp index b6750fdcd03..d759635c65b 100644 --- a/src/Common/FieldVisitorHash.cpp +++ b/src/Common/FieldVisitorHash.cpp @@ -49,6 +49,20 @@ void FieldVisitorHash::operator() (const UUID & x) const hash.update(x); } +void FieldVisitorHash::operator() (const IPv4 & x) const +{ + UInt8 type = Field::Types::IPv4; + hash.update(type); + hash.update(x); +} + +void FieldVisitorHash::operator() (const IPv6 & x) const +{ + UInt8 type = Field::Types::IPv6; + hash.update(type); + hash.update(x); +} + void FieldVisitorHash::operator() (const Float64 & x) const { UInt8 type = Field::Types::Float64; diff --git a/src/Common/FieldVisitorHash.h b/src/Common/FieldVisitorHash.h index e574b0456eb..1350956146b 100644 --- a/src/Common/FieldVisitorHash.h +++ b/src/Common/FieldVisitorHash.h @@ -23,6 +23,8 @@ public: void operator() (const Int128 & x) const; void operator() (const Int256 & x) const; void operator() (const UUID & x) const; + void operator() (const IPv4 & x) const; + void operator() (const IPv6 & x) const; void operator() (const Float64 & x) const; void operator() (const String & x) const; void operator() (const Array & x) const; diff --git a/src/Common/FieldVisitorSum.cpp b/src/Common/FieldVisitorSum.cpp index db7b4850204..ed4dd373049 100644 --- a/src/Common/FieldVisitorSum.cpp +++ b/src/Common/FieldVisitorSum.cpp @@ -33,6 +33,8 @@ bool FieldVisitorSum::operator() (Tuple &) const { throw Exception("Cannot sum T bool FieldVisitorSum::operator() (Map &) const { throw Exception("Cannot sum Maps", ErrorCodes::LOGICAL_ERROR); } bool FieldVisitorSum::operator() (Object &) const { throw Exception("Cannot sum Objects", ErrorCodes::LOGICAL_ERROR); } bool FieldVisitorSum::operator() (UUID &) const { throw Exception("Cannot sum UUIDs", ErrorCodes::LOGICAL_ERROR); } +bool FieldVisitorSum::operator() (IPv4 &) const { throw Exception("Cannot sum IPv4s", ErrorCodes::LOGICAL_ERROR); } +bool FieldVisitorSum::operator() (IPv6 &) const { throw Exception("Cannot sum IPv6s", ErrorCodes::LOGICAL_ERROR); } bool FieldVisitorSum::operator() (AggregateFunctionStateData &) const { diff --git a/src/Common/FieldVisitorSum.h b/src/Common/FieldVisitorSum.h index c28e2058b05..5b9c83d1dd1 100644 --- a/src/Common/FieldVisitorSum.h +++ b/src/Common/FieldVisitorSum.h @@ -28,6 +28,8 @@ public: bool operator() (Map &) const; bool operator() (Object &) const; bool operator() (UUID &) const; + bool operator() (IPv4 &) const; + bool operator() (IPv6 &) const; bool operator() (AggregateFunctionStateData &) const; bool operator() (bool &) const; diff --git a/src/Common/FieldVisitorToString.cpp b/src/Common/FieldVisitorToString.cpp index e0e138d744c..d7113b8c724 100644 --- a/src/Common/FieldVisitorToString.cpp +++ b/src/Common/FieldVisitorToString.cpp @@ -65,6 +65,8 @@ String FieldVisitorToString::operator() (const UInt128 & x) const { return forma String FieldVisitorToString::operator() (const UInt256 & x) const { return formatQuoted(x); } String FieldVisitorToString::operator() (const Int256 & x) const { return formatQuoted(x); } String FieldVisitorToString::operator() (const UUID & x) const { return formatQuoted(x); } +String FieldVisitorToString::operator() (const IPv4 & x) const { return formatQuoted(x); } +String FieldVisitorToString::operator() (const IPv6 & x) const { return formatQuoted(x); } String FieldVisitorToString::operator() (const AggregateFunctionStateData & x) const { return formatQuoted(x.data); } String FieldVisitorToString::operator() (const bool & x) const { return x ? "true" : "false"; } diff --git a/src/Common/FieldVisitorToString.h b/src/Common/FieldVisitorToString.h index cca29a8f7e0..6a10de12ff9 100644 --- a/src/Common/FieldVisitorToString.h +++ b/src/Common/FieldVisitorToString.h @@ -17,6 +17,8 @@ public: String operator() (const Int128 & x) const; String operator() (const Int256 & x) const; String operator() (const UUID & x) const; + String operator() (const IPv4 & x) const; + String operator() (const IPv6 & x) const; String operator() (const Float64 & x) const; String operator() (const String & x) const; String operator() (const Array & x) const; diff --git a/src/Common/FieldVisitorWriteBinary.cpp b/src/Common/FieldVisitorWriteBinary.cpp index 85c32cee3c0..c9b0cf16414 100644 --- a/src/Common/FieldVisitorWriteBinary.cpp +++ b/src/Common/FieldVisitorWriteBinary.cpp @@ -16,6 +16,8 @@ void FieldVisitorWriteBinary::operator() (const Int128 & x, WriteBuffer & buf) c void FieldVisitorWriteBinary::operator() (const UInt256 & x, WriteBuffer & buf) const { writeBinary(x, buf); } void FieldVisitorWriteBinary::operator() (const Int256 & x, WriteBuffer & buf) const { writeBinary(x, buf); } void FieldVisitorWriteBinary::operator() (const UUID & x, WriteBuffer & buf) const { writeBinary(x, buf); } +void FieldVisitorWriteBinary::operator() (const IPv4 & x, WriteBuffer & buf) const { writeBinary(x, buf); } +void FieldVisitorWriteBinary::operator() (const IPv6 & x, WriteBuffer & buf) const { writeBinary(x, buf); } void FieldVisitorWriteBinary::operator() (const DecimalField & x, WriteBuffer & buf) const { writeBinary(x.getValue(), buf); } void FieldVisitorWriteBinary::operator() (const DecimalField & x, WriteBuffer & buf) const { writeBinary(x.getValue(), buf); } void FieldVisitorWriteBinary::operator() (const DecimalField & x, WriteBuffer & buf) const { writeBinary(x.getValue(), buf); } diff --git a/src/Common/FieldVisitorWriteBinary.h b/src/Common/FieldVisitorWriteBinary.h index ff2740383f7..bc75150bed2 100644 --- a/src/Common/FieldVisitorWriteBinary.h +++ b/src/Common/FieldVisitorWriteBinary.h @@ -16,6 +16,8 @@ public: void operator() (const Int128 & x, WriteBuffer & buf) const; void operator() (const Int256 & x, WriteBuffer & buf) const; void operator() (const UUID & x, WriteBuffer & buf) const; + void operator() (const IPv4 & x, WriteBuffer & buf) const; + void operator() (const IPv6 & x, WriteBuffer & buf) const; void operator() (const Float64 & x, WriteBuffer & buf) const; void operator() (const String & x, WriteBuffer & buf) const; void operator() (const Array & x, WriteBuffer & buf) const; diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h index 2e0ef400da8..01758c1b9fb 100644 --- a/src/Common/HashTable/Hash.h +++ b/src/Common/HashTable/Hash.h @@ -259,7 +259,7 @@ inline size_t DefaultHash64(T key) static_cast(key) ^ static_cast(key >> 64)); } - else if constexpr (std::is_same_v) + else if constexpr (std::is_same_v || std::is_same_v) { return intHash64( static_cast(key.toUnderType()) ^ diff --git a/src/Common/formatIPv6.cpp b/src/Common/formatIPv6.cpp index 9c71debaa1e..cfa93bf4920 100644 --- a/src/Common/formatIPv6.cpp +++ b/src/Common/formatIPv6.cpp @@ -9,36 +9,55 @@ namespace DB { -// To be used in formatIPv4, maps a byte to it's string form prefixed with length (so save strlen call). -extern const char one_byte_to_string_lookup_table[256][4] = +/** Further we want to generate constexpr array of strings with sizes from sequence of unsigned ints [0..N) + * in order to use this arrey for fast conversion of unsigned integers to strings + */ +namespace detail { - {1, '0'}, {1, '1'}, {1, '2'}, {1, '3'}, {1, '4'}, {1, '5'}, {1, '6'}, {1, '7'}, {1, '8'}, {1, '9'}, - {2, '1', '0'}, {2, '1', '1'}, {2, '1', '2'}, {2, '1', '3'}, {2, '1', '4'}, {2, '1', '5'}, {2, '1', '6'}, {2, '1', '7'}, {2, '1', '8'}, {2, '1', '9'}, - {2, '2', '0'}, {2, '2', '1'}, {2, '2', '2'}, {2, '2', '3'}, {2, '2', '4'}, {2, '2', '5'}, {2, '2', '6'}, {2, '2', '7'}, {2, '2', '8'}, {2, '2', '9'}, - {2, '3', '0'}, {2, '3', '1'}, {2, '3', '2'}, {2, '3', '3'}, {2, '3', '4'}, {2, '3', '5'}, {2, '3', '6'}, {2, '3', '7'}, {2, '3', '8'}, {2, '3', '9'}, - {2, '4', '0'}, {2, '4', '1'}, {2, '4', '2'}, {2, '4', '3'}, {2, '4', '4'}, {2, '4', '5'}, {2, '4', '6'}, {2, '4', '7'}, {2, '4', '8'}, {2, '4', '9'}, - {2, '5', '0'}, {2, '5', '1'}, {2, '5', '2'}, {2, '5', '3'}, {2, '5', '4'}, {2, '5', '5'}, {2, '5', '6'}, {2, '5', '7'}, {2, '5', '8'}, {2, '5', '9'}, - {2, '6', '0'}, {2, '6', '1'}, {2, '6', '2'}, {2, '6', '3'}, {2, '6', '4'}, {2, '6', '5'}, {2, '6', '6'}, {2, '6', '7'}, {2, '6', '8'}, {2, '6', '9'}, - {2, '7', '0'}, {2, '7', '1'}, {2, '7', '2'}, {2, '7', '3'}, {2, '7', '4'}, {2, '7', '5'}, {2, '7', '6'}, {2, '7', '7'}, {2, '7', '8'}, {2, '7', '9'}, - {2, '8', '0'}, {2, '8', '1'}, {2, '8', '2'}, {2, '8', '3'}, {2, '8', '4'}, {2, '8', '5'}, {2, '8', '6'}, {2, '8', '7'}, {2, '8', '8'}, {2, '8', '9'}, - {2, '9', '0'}, {2, '9', '1'}, {2, '9', '2'}, {2, '9', '3'}, {2, '9', '4'}, {2, '9', '5'}, {2, '9', '6'}, {2, '9', '7'}, {2, '9', '8'}, {2, '9', '9'}, - {3, '1', '0', '0'}, {3, '1', '0', '1'}, {3, '1', '0', '2'}, {3, '1', '0', '3'}, {3, '1', '0', '4'}, {3, '1', '0', '5'}, {3, '1', '0', '6'}, {3, '1', '0', '7'}, {3, '1', '0', '8'}, {3, '1', '0', '9'}, - {3, '1', '1', '0'}, {3, '1', '1', '1'}, {3, '1', '1', '2'}, {3, '1', '1', '3'}, {3, '1', '1', '4'}, {3, '1', '1', '5'}, {3, '1', '1', '6'}, {3, '1', '1', '7'}, {3, '1', '1', '8'}, {3, '1', '1', '9'}, - {3, '1', '2', '0'}, {3, '1', '2', '1'}, {3, '1', '2', '2'}, {3, '1', '2', '3'}, {3, '1', '2', '4'}, {3, '1', '2', '5'}, {3, '1', '2', '6'}, {3, '1', '2', '7'}, {3, '1', '2', '8'}, {3, '1', '2', '9'}, - {3, '1', '3', '0'}, {3, '1', '3', '1'}, {3, '1', '3', '2'}, {3, '1', '3', '3'}, {3, '1', '3', '4'}, {3, '1', '3', '5'}, {3, '1', '3', '6'}, {3, '1', '3', '7'}, {3, '1', '3', '8'}, {3, '1', '3', '9'}, - {3, '1', '4', '0'}, {3, '1', '4', '1'}, {3, '1', '4', '2'}, {3, '1', '4', '3'}, {3, '1', '4', '4'}, {3, '1', '4', '5'}, {3, '1', '4', '6'}, {3, '1', '4', '7'}, {3, '1', '4', '8'}, {3, '1', '4', '9'}, - {3, '1', '5', '0'}, {3, '1', '5', '1'}, {3, '1', '5', '2'}, {3, '1', '5', '3'}, {3, '1', '5', '4'}, {3, '1', '5', '5'}, {3, '1', '5', '6'}, {3, '1', '5', '7'}, {3, '1', '5', '8'}, {3, '1', '5', '9'}, - {3, '1', '6', '0'}, {3, '1', '6', '1'}, {3, '1', '6', '2'}, {3, '1', '6', '3'}, {3, '1', '6', '4'}, {3, '1', '6', '5'}, {3, '1', '6', '6'}, {3, '1', '6', '7'}, {3, '1', '6', '8'}, {3, '1', '6', '9'}, - {3, '1', '7', '0'}, {3, '1', '7', '1'}, {3, '1', '7', '2'}, {3, '1', '7', '3'}, {3, '1', '7', '4'}, {3, '1', '7', '5'}, {3, '1', '7', '6'}, {3, '1', '7', '7'}, {3, '1', '7', '8'}, {3, '1', '7', '9'}, - {3, '1', '8', '0'}, {3, '1', '8', '1'}, {3, '1', '8', '2'}, {3, '1', '8', '3'}, {3, '1', '8', '4'}, {3, '1', '8', '5'}, {3, '1', '8', '6'}, {3, '1', '8', '7'}, {3, '1', '8', '8'}, {3, '1', '8', '9'}, - {3, '1', '9', '0'}, {3, '1', '9', '1'}, {3, '1', '9', '2'}, {3, '1', '9', '3'}, {3, '1', '9', '4'}, {3, '1', '9', '5'}, {3, '1', '9', '6'}, {3, '1', '9', '7'}, {3, '1', '9', '8'}, {3, '1', '9', '9'}, - {3, '2', '0', '0'}, {3, '2', '0', '1'}, {3, '2', '0', '2'}, {3, '2', '0', '3'}, {3, '2', '0', '4'}, {3, '2', '0', '5'}, {3, '2', '0', '6'}, {3, '2', '0', '7'}, {3, '2', '0', '8'}, {3, '2', '0', '9'}, - {3, '2', '1', '0'}, {3, '2', '1', '1'}, {3, '2', '1', '2'}, {3, '2', '1', '3'}, {3, '2', '1', '4'}, {3, '2', '1', '5'}, {3, '2', '1', '6'}, {3, '2', '1', '7'}, {3, '2', '1', '8'}, {3, '2', '1', '9'}, - {3, '2', '2', '0'}, {3, '2', '2', '1'}, {3, '2', '2', '2'}, {3, '2', '2', '3'}, {3, '2', '2', '4'}, {3, '2', '2', '5'}, {3, '2', '2', '6'}, {3, '2', '2', '7'}, {3, '2', '2', '8'}, {3, '2', '2', '9'}, - {3, '2', '3', '0'}, {3, '2', '3', '1'}, {3, '2', '3', '2'}, {3, '2', '3', '3'}, {3, '2', '3', '4'}, {3, '2', '3', '5'}, {3, '2', '3', '6'}, {3, '2', '3', '7'}, {3, '2', '3', '8'}, {3, '2', '3', '9'}, - {3, '2', '4', '0'}, {3, '2', '4', '1'}, {3, '2', '4', '2'}, {3, '2', '4', '3'}, {3, '2', '4', '4'}, {3, '2', '4', '5'}, {3, '2', '4', '6'}, {3, '2', '4', '7'}, {3, '2', '4', '8'}, {3, '2', '4', '9'}, - {3, '2', '5', '0'}, {3, '2', '5', '1'}, {3, '2', '5', '2'}, {3, '2', '5', '3'}, {3, '2', '5', '4'}, {3, '2', '5', '5'}, -}; + template + struct ToChars + { + static const char value[]; + static const size_t size; + }; + + template + constexpr char ToChars::value[] = {('0' + digits)..., 0}; + + template + constexpr size_t ToChars::size = sizeof...(digits); + + template + struct Decompose : Decompose {}; + + template + struct Decompose<0, digits...> : ToChars {}; + + template <> + struct Decompose<0> : ToChars<0> {}; + + template + struct NumToString : Decompose {}; + + template + consteval std::array, sizeof...(ints)> str_make_array_impl(std::integer_sequence) + { + return std::array, sizeof...(ints)> { std::pair {NumToString::value, NumToString::size}... }; + } +} + +/** str_make_array() - generates static array of std::pair for numbers [0..N), where: + * first - null-terminated string representing number + * second - size of the string as would returned by strlen() + */ +template +consteval std::array, N> str_make_array() +{ + return detail::str_make_array_impl(std::make_integer_sequence{}); +} + +/// This will generate static array of pair for [0..255] at compile time +extern constexpr auto one_byte_to_string_lookup_table = str_make_array<256>(); /// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base) static constexpr UInt32 intLog(const UInt32 value, const UInt32 base, const bool carry) diff --git a/src/Common/formatIPv6.h b/src/Common/formatIPv6.h index 14093594cff..69963336cef 100644 --- a/src/Common/formatIPv6.h +++ b/src/Common/formatIPv6.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -17,6 +18,7 @@ constexpr size_t IPV6_MAX_TEXT_LENGTH = 45; /// Does not count tail zero byt namespace DB { +extern const std::array, 256> one_byte_to_string_lookup_table; /** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c * performs significantly faster than the reference implementation due to the absence of sprintf calls, @@ -30,17 +32,38 @@ void formatIPv6(const unsigned char * src, char *& dst, uint8_t zeroed_tail_byte * which should be long enough. * That is "127.0.0.1" becomes 0x7f000001. * - * In case of failure returns false and doesn't modify buffer pointed by `dst`. + * In case of failure doesn't modify buffer pointed by `dst`. * - * @param src - input string, expected to be non-null and null-terminated right after the IPv4 string value. - * @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long. - * @return false if parsing failed, true otherwise. + * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) + * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. + * To parse strings use overloads below. + * + * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. + * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. + * @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long. + * @param first_octet - preparsed first octet + * @return - true if parsed successfully, false otherwise. */ -inline bool parseIPv4(const char * src, unsigned char * dst) +template +requires (std::is_same::type, char>::value) +inline bool parseIPv4(T * &src, EOFfunction eof, unsigned char * dst, int32_t first_octet = -1) { + if (src == nullptr || first_octet > 255) + return false; + UInt32 result = 0; - for (int offset = 24; offset >= 0; offset -= 8) + int offset = 24; + if (first_octet >= 0) { + result |= first_octet << offset; + offset -= 8; + } + + for (; true; offset -= 8, ++src) + { + if (eof()) + return false; + UInt32 value = 0; size_t len = 0; while (isNumericASCII(*src) && len <= 3) @@ -48,135 +71,331 @@ inline bool parseIPv4(const char * src, unsigned char * dst) value = value * 10 + (*src - '0'); ++len; ++src; + if (eof()) + break; } - if (len == 0 || value > 255 || (offset > 0 && *src != '.')) + if (len == 0 || value > 255 || (offset > 0 && (eof() || *src != '.'))) return false; result |= value << offset; - ++src; - } - if (*(src - 1) != '\0') - return false; - memcpy(dst, &result, sizeof(result)); + if (offset == 0) + break; + } + + if constexpr (std::endian::native == std::endian::little) + memcpy(dst, &result, sizeof(result)); + else + reverseMemcpy(dst, &result, sizeof(result)); + return true; } +/// returns pointer to the right after parsed sequence or null on failed parsing +inline const char * parseIPv4(const char * src, const char * end, unsigned char * dst) +{ + if (parseIPv4(src, [&src, end](){ return src == end; }, dst)) + return src; + return nullptr; +} + +/// returns true if whole buffer was parsed successfully +inline bool parseIPv4whole(const char * src, const char * end, unsigned char * dst) +{ + return parseIPv4(src, end, dst) == end; +} + +/// returns pointer to the right after parsed sequence or null on failed parsing +inline const char * parseIPv4(const char * src, unsigned char * dst) +{ + if (parseIPv4(src, [](){ return false; }, dst)) + return src; + return nullptr; +} + +/// returns true if whole null-terminated string was parsed successfully +inline bool parseIPv4whole(const char * src, unsigned char * dst) +{ + const char * end = parseIPv4(src, dst); + return end != nullptr && *end == '\0'; +} + /** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string. * -* Slightly altered implementation from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c * Parses the input string `src` and stores binary big-endian value into buffer pointed by `dst`, -* which should be long enough. In case of failure zeroes -* IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`. +* which should be long enough. In case of failure zeroes IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`. * -* @param src - input string, expected to be non-null and null-terminated right after the IPv6 string value. -* @param dst - where to put output bytes, expected to be non-null and at IPV6_BINARY_LENGTH-long. -* @return false if parsing failed, true otherwise. +* WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) +* and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. +* To parse strings use overloads below. +* +* @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. +* @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. +* @param dst - where to put output bytes, expected to be non-null and at IPV6_BINARY_LENGTH-long. +* @param first_block - preparsed first block +* @return - true if parsed successfully, false otherwise. */ -inline bool parseIPv6(const char * src, unsigned char * dst) +template +requires (std::is_same::type, char>::value) +inline bool parseIPv6(T * &src, EOFfunction eof, unsigned char * dst, int32_t first_block = -1) { const auto clear_dst = [dst]() { - memset(dst, '\0', IPV6_BINARY_LENGTH); + std::memset(dst, '\0', IPV6_BINARY_LENGTH); return false; }; - /// Leading :: requires some special handling. - if (*src == ':') - if (*++src != ':') - return clear_dst(); + if (src == nullptr || eof()) + return clear_dst(); - unsigned char tmp[IPV6_BINARY_LENGTH]{}; - unsigned char * tp = tmp; - unsigned char * endp = tp + IPV6_BINARY_LENGTH; - const char * curtok = src; - bool saw_xdigit = false; - UInt32 val{}; - unsigned char * colonp = nullptr; + int groups = 0; /// number of parsed groups + unsigned char * iter = dst; /// iterator over dst buffer + unsigned char * zptr = nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started - /// Assuming zero-terminated string. - while (char ch = *src++) + std::memset(dst, '\0', IPV6_BINARY_LENGTH); + + if (first_block >= 0) { - UInt8 num = unhex(ch); - - if (num != 0xFF) + *iter++ = static_cast((first_block >> 8) & 0xffu); + *iter++ = static_cast(first_block & 0xffu); + if (*src == ':') { - val <<= 4; - val |= num; - if (val > 0xffffu) + zptr = iter; + ++src; + } + ++groups; + } + + bool group_start = true; + + while (!eof() && groups < 8) + { + if (*src == ':') + { + ++src; + if (eof()) /// trailing colon is not allowed return clear_dst(); - saw_xdigit = true; - continue; - } + group_start = true; - if (ch == ':') - { - curtok = src; - if (!saw_xdigit) + if (*src == ':') { - if (colonp) + if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed return clear_dst(); - - colonp = tp; + zptr = iter; + ++src; continue; } - - if (tp + sizeof(UInt16) > endp) + if (groups == 0) /// leading colon is not allowed return clear_dst(); - - *tp++ = static_cast((val >> 8) & 0xffu); - *tp++ = static_cast(val & 0xffu); - saw_xdigit = false; - val = 0; - continue; } - if (ch == '.' && (tp + IPV4_BINARY_LENGTH) <= endp) + if (*src == '.') /// mixed IPv4 parsing { - if (!parseIPv4(curtok, tp)) + if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first + return clear_dst(); + + ++src; + if (eof()) + return clear_dst(); + + /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4 + --groups; + iter -= 2; + + UInt16 num = 0; + for (int i = 0; i < 2; ++i) + { + unsigned char first = (iter[i] >> 4) & 0x0fu; + unsigned char second = iter[i] & 0x0fu; + if (first > 9 || second > 9) + return clear_dst(); + (num *= 100) += first * 10 + second; + } + if (num > 255) + return clear_dst(); + + /// parse IPv4 with known first octet + if (!parseIPv4(src, eof, iter, num)) return clear_dst(); if constexpr (std::endian::native == std::endian::little) - std::reverse(tp, tp + IPV4_BINARY_LENGTH); + std::reverse(iter, iter + IPV4_BINARY_LENGTH); - tp += IPV4_BINARY_LENGTH; - saw_xdigit = false; - break; /* '\0' was seen by ipv4_scan(). */ + iter += 4; + groups += 2; + break; /// IPv4 block is the last - end of parsing } - return clear_dst(); + if (!group_start) /// end of parsing + break; + group_start = false; + + UInt16 val = 0; /// current decoded group + int xdigits = 0; /// number of decoded hex digits in current group + + for (; !eof() && xdigits < 4; ++src, ++xdigits) + { + UInt8 num = unhex(*src); + if (num == 0xFF) + break; + (val <<= 4) |= num; + } + + if (xdigits == 0) /// end of parsing + break; + + *iter++ = static_cast((val >> 8) & 0xffu); + *iter++ = static_cast(val & 0xffu); + ++groups; } - if (saw_xdigit) + /// either all 8 groups or all-zeroes block should be present + if (groups < 8 && zptr == nullptr) + return clear_dst(); + + if (zptr != nullptr) /// process all-zeroes block { - if (tp + sizeof(UInt16) > endp) + size_t msize = iter - zptr; + std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize); + std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst)); + } + + return true; +} + +/// returns pointer to the right after parsed sequence or null on failed parsing +inline const char * parseIPv6(const char * src, const char * end, unsigned char * dst) +{ + if (parseIPv6(src, [&src, end](){ return src == end; }, dst)) + return src; + return nullptr; +} + +/// returns true if whole buffer was parsed successfully +inline bool parseIPv6whole(const char * src, const char * end, unsigned char * dst) +{ + return parseIPv6(src, end, dst) == end; +} + +/// returns pointer to the right after parsed sequence or null on failed parsing +inline const char * parseIPv6(const char * src, unsigned char * dst) +{ + if (parseIPv6(src, [](){ return false; }, dst)) + return src; + return nullptr; +} + +/// returns true if whole null-terminated string was parsed successfully +inline bool parseIPv6whole(const char * src, unsigned char * dst) +{ + const char * end = parseIPv6(src, dst); + return end != nullptr && *end == '\0'; +} + +/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string. +* +* Parses the input string `src` IPv6 or possible IPv4 into IPv6 and stores binary big-endian value into buffer pointed by `dst`, +* which should be long enough. In case of failure zeroes IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`. +* +* WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) +* and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. +* +* @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. +* @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. +* @param dst - where to put output bytes, expected to be non-null and at IPV6_BINARY_LENGTH-long. +* @return - true if parsed successfully, false otherwise. +*/ +template +requires (std::is_same::type, char>::value) +inline bool parseIPv6orIPv4(T * &src, EOFfunction eof, unsigned char * dst) +{ + const auto clear_dst = [dst]() + { + std::memset(dst, '\0', IPV6_BINARY_LENGTH); + return false; + }; + + if (src == nullptr) + return clear_dst(); + + bool leading_zero = false; + uint16_t val = 0; + int digits = 0; + /// parse up to 4 first digits as hexadecimal + for (; !eof() && digits < 4; ++src, ++digits) + { + if (*src == ':' || *src == '.') + break; + + if (digits == 0 && *src == '0') + leading_zero = true; + + UInt8 num = unhex(*src); + if (num == 0xFF) + return clear_dst(); + (val <<= 4) |= num; + } + + if (eof()) + return clear_dst(); + + if (*src == ':') /// IPv6 + { + if (digits == 0) /// leading colon - no preparsed group + return parseIPv6(src, eof, dst); + ++src; + return parseIPv6(src, eof, dst, val); /// parse with first preparsed group + } + + if (*src == '.') /// IPv4 + { + /// should has some digits + if (digits == 0) + return clear_dst(); + /// should not has leading zeroes, should has no more than 3 digits + if ((leading_zero && digits > 1) || digits > 3) return clear_dst(); - *tp++ = static_cast((val >> 8) & 0xffu); - *tp++ = static_cast(val & 0xffu); - } - - if (colonp) - { - /* - * Since some memmove()'s erroneously fail to handle - * overlapping regions, we'll do the shift by hand. - */ - const auto n = tp - colonp; - - for (int i = 1; i <= n; ++i) + /// recode first group as decimal + UInt16 num = 0; + for (int exp = 1; exp < 1000; exp *= 10) { - endp[- i] = colonp[n - i]; - colonp[n - i] = 0; + int n = val & 0x0fu; + if (n > 9) + return clear_dst(); + num += n * exp; + val >>= 4; } - tp = endp; + if (num > 255) + return clear_dst(); + + ++src; + if (!parseIPv4(src, eof, dst, num)) /// try to parse as IPv4 with preparsed first octet + return clear_dst(); + + /// convert into IPv6 + if constexpr (std::endian::native == std::endian::little) + { + dst[15] = dst[0]; dst[0] = 0; + dst[14] = dst[1]; dst[1] = 0; + dst[13] = dst[2]; dst[2] = 0; + dst[12] = dst[3]; dst[3] = 0; + } + else + { + dst[15] = dst[3]; dst[3] = 0; + dst[14] = dst[2]; dst[2] = 0; + dst[13] = dst[1]; dst[1] = 0; + dst[12] = dst[0]; dst[0] = 0; + } + + dst[11] = 0xff; + dst[10] = 0xff; + + return true; } - if (tp != endp) - return clear_dst(); - - memcpy(dst, tmp, sizeof(tmp)); - return true; + return clear_dst(); } /** Format 4-byte binary sequesnce as IPv4 text: 'aaa.bbb.ccc.ddd', @@ -198,22 +417,27 @@ inline bool parseIPv6(const char * src, unsigned char * dst) * formatIPv4(&0x7f000001, dst, mask_tail_octets = 1, "0"); * > dst == "127.0.0.0" */ -inline void formatIPv4(const unsigned char * src, char *& dst, uint8_t mask_tail_octets = 0, const char * mask_string = "xxx") +inline void formatIPv4(const unsigned char * src, size_t src_size, char *& dst, uint8_t mask_tail_octets = 0, const char * mask_string = "xxx") { - extern const char one_byte_to_string_lookup_table[256][4]; - const size_t mask_length = mask_string ? strlen(mask_string) : 0; const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets); - for (size_t octet = 0; octet < limit; ++octet) + const size_t padding = std::min(4 - src_size, limit); + + for (size_t octet = 0; octet < padding; ++octet) + { + *dst++ = '0'; + *dst++ = '.'; + } + + for (size_t octet = 4 - src_size; octet < limit; ++octet) { uint8_t value = 0; if constexpr (std::endian::native == std::endian::little) value = static_cast(src[IPV4_BINARY_LENGTH - octet - 1]); else value = static_cast(src[octet]); - const auto * rep = one_byte_to_string_lookup_table[value]; - const uint8_t len = rep[0]; - const char* str = rep + 1; + const uint8_t len = one_byte_to_string_lookup_table[value].second; + const char* str = one_byte_to_string_lookup_table[value].first; memcpy(dst, str, len); dst += len; @@ -231,4 +455,9 @@ inline void formatIPv4(const unsigned char * src, char *& dst, uint8_t mask_tail dst[-1] = '\0'; } +inline void formatIPv4(const unsigned char * src, char *& dst, uint8_t mask_tail_octets = 0, const char * mask_string = "xxx") +{ + formatIPv4(src, 4, dst, mask_tail_octets, mask_string); +} + } diff --git a/src/Common/typeid_cast.h b/src/Common/typeid_cast.h index 3c3f236f740..75bc9eb22b1 100644 --- a/src/Common/typeid_cast.h +++ b/src/Common/typeid_cast.h @@ -18,6 +18,9 @@ namespace DB } } +template +concept is_any_of = (std::same_as || ...); + /** Checks type by comparing typeid. * The exact match of the type is checked. That is, cast to the ancestor will be unsuccessful. diff --git a/src/Core/Field.cpp b/src/Core/Field.cpp index 71a6d27e5b4..c1842d7e493 100644 --- a/src/Core/Field.cpp +++ b/src/Core/Field.cpp @@ -51,6 +51,18 @@ inline Field getBinaryValue(UInt8 type, ReadBuffer & buf) readBinary(value, buf); return value; } + case Field::Types::IPv4: + { + IPv4 value; + readBinary(value, buf); + return value; + } + case Field::Types::IPv6: + { + IPv6 value; + readBinary(value.toUnderType(), buf); + return value; + } case Field::Types::Int64: { Int64 value; @@ -583,6 +595,8 @@ String fieldTypeToString(Field::Types::Which type) case Field::Types::Which::UInt128: return "UInt128"; case Field::Types::Which::UInt256: return "UInt256"; case Field::Types::Which::UUID: return "UUID"; + case Field::Types::Which::IPv4: return "IPv4"; + case Field::Types::Which::IPv6: return "IPv6"; } } diff --git a/src/Core/Field.h b/src/Core/Field.h index c3516b705a6..3c787389ef6 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -192,6 +193,8 @@ template <> struct NearestFieldTypeImpl { using Type = UInt64; }; template <> struct NearestFieldTypeImpl { using Type = UInt64; }; template <> struct NearestFieldTypeImpl { using Type = UUID; }; +template <> struct NearestFieldTypeImpl { using Type = IPv4; }; +template <> struct NearestFieldTypeImpl { using Type = IPv6; }; template <> struct NearestFieldTypeImpl { using Type = Int64; }; template <> struct NearestFieldTypeImpl { using Type = Int64; }; @@ -292,6 +295,8 @@ public: UUID = 27, Bool = 28, Object = 29, + IPv4 = 30, + IPv6 = 31, }; }; @@ -468,6 +473,8 @@ public: case Types::Int128: return get() < rhs.get(); case Types::Int256: return get() < rhs.get(); case Types::UUID: return get() < rhs.get(); + case Types::IPv4: return get() < rhs.get(); + case Types::IPv6: return get() < rhs.get(); case Types::Float64: return get() < rhs.get(); case Types::String: return get() < rhs.get(); case Types::Array: return get() < rhs.get(); @@ -507,6 +514,8 @@ public: case Types::Int128: return get() <= rhs.get(); case Types::Int256: return get() <= rhs.get(); case Types::UUID: return get().toUnderType() <= rhs.get().toUnderType(); + case Types::IPv4: return get() <= rhs.get(); + case Types::IPv6: return get() <= rhs.get(); case Types::Float64: return get() <= rhs.get(); case Types::String: return get() <= rhs.get(); case Types::Array: return get() <= rhs.get(); @@ -547,6 +556,8 @@ public: return std::bit_cast(get()) == std::bit_cast(rhs.get()); } case Types::UUID: return get() == rhs.get(); + case Types::IPv4: return get() == rhs.get(); + case Types::IPv6: return get() == rhs.get(); case Types::String: return get() == rhs.get(); case Types::Array: return get() == rhs.get(); case Types::Tuple: return get() == rhs.get(); @@ -586,6 +597,8 @@ public: case Types::Int128: return f(field.template get()); case Types::Int256: return f(field.template get()); case Types::UUID: return f(field.template get()); + case Types::IPv4: return f(field.template get()); + case Types::IPv6: return f(field.template get()); case Types::Float64: return f(field.template get()); case Types::String: return f(field.template get()); case Types::Array: return f(field.template get()); @@ -612,7 +625,7 @@ public: private: std::aligned_union_t, DecimalField, DecimalField, DecimalField, AggregateFunctionStateData > storage; @@ -747,6 +760,8 @@ template <> struct Field::TypeToEnum { static constexpr Types::Which va template <> struct Field::TypeToEnum { static constexpr Types::Which value = Types::Int128; }; template <> struct Field::TypeToEnum { static constexpr Types::Which value = Types::Int256; }; template <> struct Field::TypeToEnum { static constexpr Types::Which value = Types::UUID; }; +template <> struct Field::TypeToEnum { static constexpr Types::Which value = Types::IPv4; }; +template <> struct Field::TypeToEnum { static constexpr Types::Which value = Types::IPv6; }; template <> struct Field::TypeToEnum { static constexpr Types::Which value = Types::Float64; }; template <> struct Field::TypeToEnum { static constexpr Types::Which value = Types::String; }; template <> struct Field::TypeToEnum { static constexpr Types::Which value = Types::Array; }; @@ -769,6 +784,8 @@ template <> struct Field::EnumToType { using Type = Int64 template <> struct Field::EnumToType { using Type = Int128; }; template <> struct Field::EnumToType { using Type = Int256; }; template <> struct Field::EnumToType { using Type = UUID; }; +template <> struct Field::EnumToType { using Type = IPv4; }; +template <> struct Field::EnumToType { using Type = IPv6; }; template <> struct Field::EnumToType { using Type = Float64; }; template <> struct Field::EnumToType { using Type = String; }; template <> struct Field::EnumToType { using Type = Array; }; diff --git a/src/Core/IResolvedFunction.h b/src/Core/IResolvedFunction.h index 64c69f597c7..d472d2ce734 100644 --- a/src/Core/IResolvedFunction.h +++ b/src/Core/IResolvedFunction.h @@ -12,6 +12,9 @@ using DataTypes = std::vector; struct Array; +/* Generic class for all functions. + * Represents interface for function signature. + */ class IResolvedFunction { public: diff --git a/src/Core/SortCursor.h b/src/Core/SortCursor.h index abd3e3c85f8..3c412fa1f17 100644 --- a/src/Core/SortCursor.h +++ b/src/Core/SortCursor.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -696,6 +697,8 @@ private: SortingQueueImpl>, strategy>, SortingQueueImpl>, strategy>, + SortingQueueImpl>, strategy>, + SortingQueueImpl>, strategy>, SortingQueueImpl, strategy>, SortingQueueImpl, strategy>, diff --git a/src/Core/TypeId.h b/src/Core/TypeId.h index 39058773184..d2ae56b4280 100644 --- a/src/Core/TypeId.h +++ b/src/Core/TypeId.h @@ -47,6 +47,8 @@ TYPEID_MAP(Int256) TYPEID_MAP(Float32) TYPEID_MAP(Float64) TYPEID_MAP(UUID) +TYPEID_MAP(IPv4) +TYPEID_MAP(IPv6) TYPEID_MAP(Decimal32) TYPEID_MAP(Decimal64) diff --git a/src/Core/Types.h b/src/Core/Types.h index 1eddd431c86..cd559661a96 100644 --- a/src/Core/Types.h +++ b/src/Core/Types.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB @@ -84,6 +85,8 @@ enum class TypeIndex LowCardinality, Map, Object, + IPv4, + IPv6, }; diff --git a/src/Core/callOnTypeIndex.h b/src/Core/callOnTypeIndex.h index 2634578f325..d01f8907a5c 100644 --- a/src/Core/callOnTypeIndex.h +++ b/src/Core/callOnTypeIndex.h @@ -160,6 +160,9 @@ class DataTypeDate32; class DataTypeString; class DataTypeFixedString; class DataTypeUUID; +template class DataTypeIP; +using DataTypeIPv4 = DataTypeIP; +using DataTypeIPv6 = DataTypeIP; class DataTypeDateTime; class DataTypeDateTime64; template class DataTypeEnum; @@ -206,6 +209,8 @@ bool callOnIndexAndDataType(TypeIndex number, F && f, ExtraArgs && ... args) case TypeIndex::Enum16: return f(TypePair, T>(), std::forward(args)...); case TypeIndex::UUID: return f(TypePair(), std::forward(args)...); + case TypeIndex::IPv4: return f(TypePair(), std::forward(args)...); + case TypeIndex::IPv6: return f(TypePair(), std::forward(args)...); default: break; diff --git a/src/DataTypes/DataTypeCustomIPv4AndIPv6.cpp b/src/DataTypes/DataTypeCustomIPv4AndIPv6.cpp deleted file mode 100644 index 808aa43528e..00000000000 --- a/src/DataTypes/DataTypeCustomIPv4AndIPv6.cpp +++ /dev/null @@ -1,29 +0,0 @@ -#include -#include -#include - -namespace DB -{ - -void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory) -{ - factory.registerSimpleDataTypeCustom("IPv4", [] - { - auto type = DataTypeFactory::instance().get("UInt32"); - return std::make_pair(type, std::make_unique( - std::make_unique("IPv4"), std::make_unique(type->getDefaultSerialization()))); - }); - - factory.registerSimpleDataTypeCustom("IPv6", [] - { - auto type = DataTypeFactory::instance().get("FixedString(16)"); - return std::make_pair(type, std::make_unique( - std::make_unique("IPv6"), std::make_unique(type->getDefaultSerialization()))); - }); - - /// MySQL, MariaDB - factory.registerAlias("INET4", "IPv4", DataTypeFactory::CaseInsensitive); - factory.registerAlias("INET6", "IPv6", DataTypeFactory::CaseInsensitive); -} - -} diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index 9f60210f8d6..2042e571beb 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -208,11 +208,11 @@ DataTypeFactory::DataTypeFactory() registerDataTypeNullable(*this); registerDataTypeNothing(*this); registerDataTypeUUID(*this); + registerDataTypeIPv4andIPv6(*this); registerDataTypeAggregateFunction(*this); registerDataTypeNested(*this); registerDataTypeInterval(*this); registerDataTypeLowCardinality(*this); - registerDataTypeDomainIPv4AndIPv6(*this); registerDataTypeDomainBool(*this); registerDataTypeDomainSimpleAggregateFunction(*this); registerDataTypeDomainGeo(*this); diff --git a/src/DataTypes/DataTypeFactory.h b/src/DataTypes/DataTypeFactory.h index f452f6167c7..af5c0cc99c6 100644 --- a/src/DataTypes/DataTypeFactory.h +++ b/src/DataTypes/DataTypeFactory.h @@ -79,11 +79,11 @@ void registerDataTypeMap(DataTypeFactory & factory); void registerDataTypeNullable(DataTypeFactory & factory); void registerDataTypeNothing(DataTypeFactory & factory); void registerDataTypeUUID(DataTypeFactory & factory); +void registerDataTypeIPv4andIPv6(DataTypeFactory & factory); void registerDataTypeAggregateFunction(DataTypeFactory & factory); void registerDataTypeNested(DataTypeFactory & factory); void registerDataTypeInterval(DataTypeFactory & factory); void registerDataTypeLowCardinality(DataTypeFactory & factory); -void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory); void registerDataTypeDomainBool(DataTypeFactory & factory); void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory); void registerDataTypeDomainGeo(DataTypeFactory & factory); diff --git a/src/DataTypes/DataTypeIPv4andIPv6.cpp b/src/DataTypes/DataTypeIPv4andIPv6.cpp new file mode 100644 index 00000000000..4c0b45f472a --- /dev/null +++ b/src/DataTypes/DataTypeIPv4andIPv6.cpp @@ -0,0 +1,17 @@ +#include +#include +#include + + +namespace DB +{ + +void registerDataTypeIPv4andIPv6(DataTypeFactory & factory) +{ + factory.registerSimpleDataType("IPv4", [] { return DataTypePtr(std::make_shared()); }); + factory.registerAlias("INET4", "IPv4", DataTypeFactory::CaseInsensitive); + factory.registerSimpleDataType("IPv6", [] { return DataTypePtr(std::make_shared()); }); + factory.registerAlias("INET6", "IPv6", DataTypeFactory::CaseInsensitive); +} + +} diff --git a/src/DataTypes/DataTypeIPv4andIPv6.h b/src/DataTypes/DataTypeIPv4andIPv6.h new file mode 100644 index 00000000000..b8eacc3ee80 --- /dev/null +++ b/src/DataTypes/DataTypeIPv4andIPv6.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +template +class DataTypeIP : public IDataType +{ +public: + static constexpr bool is_parametric = false; + + using FieldType = IPv; + using ColumnType = ColumnVector; + static constexpr auto type_id = TypeToTypeIndex; + + const char * getFamilyName() const override { return TypeName.data(); } + TypeIndex getTypeId() const override { return type_id; } + + Field getDefault() const override { return IPv{}; } + + MutableColumnPtr createColumn() const override {return ColumnVector::create();} + + bool isParametric() const override { return false; } + bool haveSubtypes() const override { return false; } + + bool equals(const IDataType & rhs) const override { return typeid(rhs) == typeid(*this); } + + bool canBeUsedInBitOperations() const override { return true; } + bool canBeInsideNullable() const override { return true; } + bool canBePromoted() const override { return false; } + bool shouldAlignRightInPrettyFormats() const override { return false; } + bool textCanContainOnlyValidUTF8() const override { return true; } + bool isComparable() const override { return true; } + bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; } + bool isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion() const override { return true; } + bool haveMaximumSizeOfValue() const override { return true; } + size_t getSizeOfValueInMemory() const override { return sizeof(IPv); } + bool isCategorial() const override { return true; } + bool canBeInsideLowCardinality() const override { return true; } + + SerializationPtr doGetDefaultSerialization() const override { return std::make_shared>(); } +}; + +using DataTypeIPv4 = DataTypeIP; +using DataTypeIPv6 = DataTypeIP; + +} diff --git a/src/DataTypes/DataTypeLowCardinality.cpp b/src/DataTypes/DataTypeLowCardinality.cpp index a618bfe3ed4..bd65196580b 100644 --- a/src/DataTypes/DataTypeLowCardinality.cpp +++ b/src/DataTypes/DataTypeLowCardinality.cpp @@ -84,6 +84,10 @@ MutableColumnUniquePtr DataTypeLowCardinality::createColumnUniqueImpl(const IDat return creator(static_cast *>(nullptr)); else if (which.isUUID()) return creator(static_cast *>(nullptr)); + else if (which.isIPv4()) + return creator(static_cast *>(nullptr)); + else if (which.isIPv6()) + return creator(static_cast *>(nullptr)); else if (which.isInterval()) return creator(static_cast(nullptr)); else if (which.isInt() || which.isUInt() || which.isFloat()) diff --git a/src/DataTypes/FieldToDataType.cpp b/src/DataTypes/FieldToDataType.cpp index 49ece27cc1f..a5ea920a45f 100644 --- a/src/DataTypes/FieldToDataType.cpp +++ b/src/DataTypes/FieldToDataType.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -82,6 +83,18 @@ DataTypePtr FieldToDataType::operator() (const UUID &) const return std::make_shared(); } +template +DataTypePtr FieldToDataType::operator() (const IPv4 &) const +{ + return std::make_shared(); +} + +template +DataTypePtr FieldToDataType::operator() (const IPv6 &) const +{ + return std::make_shared(); +} + template DataTypePtr FieldToDataType::operator() (const String &) const { diff --git a/src/DataTypes/FieldToDataType.h b/src/DataTypes/FieldToDataType.h index 5e66fe420ad..bd7d5b1af85 100644 --- a/src/DataTypes/FieldToDataType.h +++ b/src/DataTypes/FieldToDataType.h @@ -28,6 +28,8 @@ public: DataTypePtr operator() (const Int64 & x) const; DataTypePtr operator() (const Int128 & x) const; DataTypePtr operator() (const UUID & x) const; + DataTypePtr operator() (const IPv4 & x) const; + DataTypePtr operator() (const IPv6 & x) const; DataTypePtr operator() (const Float64 & x) const; DataTypePtr operator() (const String & x) const; DataTypePtr operator() (const Array & x) const; diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 7440896b4c9..bafe03dbc3a 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -377,6 +377,8 @@ struct WhichDataType constexpr bool isStringOrFixedString() const { return isString() || isFixedString(); } constexpr bool isUUID() const { return idx == TypeIndex::UUID; } + constexpr bool isIPv4() const { return idx == TypeIndex::IPv4; } + constexpr bool isIPv6() const { return idx == TypeIndex::IPv6; } constexpr bool isArray() const { return idx == TypeIndex::Array; } constexpr bool isTuple() const { return idx == TypeIndex::Tuple; } constexpr bool isMap() const {return idx == TypeIndex::Map; } @@ -414,6 +416,8 @@ inline bool isMap(const DataTypePtr & data_type) {return WhichDataType(data_type inline bool isInterval(const DataTypePtr & data_type) {return WhichDataType(data_type).isInterval(); } inline bool isNothing(const DataTypePtr & data_type) { return WhichDataType(data_type).isNothing(); } inline bool isUUID(const DataTypePtr & data_type) { return WhichDataType(data_type).isUUID(); } +inline bool isIPv4(const DataTypePtr & data_type) { return WhichDataType(data_type).isIPv4(); } +inline bool isIPv6(const DataTypePtr & data_type) { return WhichDataType(data_type).isIPv6(); } template inline bool isObject(const T & data_type) @@ -479,7 +483,7 @@ template inline bool isColumnedAsNumber(const T & data_type) { WhichDataType which(data_type); - return which.isInt() || which.isUInt() || which.isFloat() || which.isDateOrDate32() || which.isDateTime() || which.isDateTime64() || which.isUUID(); + return which.isInt() || which.isUInt() || which.isFloat() || which.isDateOrDate32() || which.isDateTime() || which.isDateTime64() || which.isUUID() || which.isIPv4() || which.isIPv6(); } template diff --git a/src/DataTypes/Serializations/SerializationIP.cpp b/src/DataTypes/Serializations/SerializationIP.cpp deleted file mode 100644 index c89c2d7c8ac..00000000000 --- a/src/DataTypes/Serializations/SerializationIP.cpp +++ /dev/null @@ -1,109 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING; - extern const int ILLEGAL_COLUMN; -} - -SerializationIPv4::SerializationIPv4(const SerializationPtr & nested_) - : SerializationCustomSimpleText(nested_) -{ -} - -void SerializationIPv4::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const -{ - const auto * col = checkAndGetColumn(&column); - if (!col) - { - throw Exception("IPv4 type can only serialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN); - } - - char buffer[IPV4_MAX_TEXT_LENGTH + 1] = {'\0'}; - char * ptr = buffer; - formatIPv4(reinterpret_cast(&col->getData()[row_num]), ptr); - - ostr.write(buffer, strlen(buffer)); -} - -void SerializationIPv4::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const -{ - ColumnUInt32 * col = typeid_cast(&column); - if (!col) - { - throw Exception("IPv4 type can only deserialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN); - } - - char buffer[IPV4_MAX_TEXT_LENGTH + 1] = {'\0'}; - [[maybe_unused]] size_t read_bytes = istr.read(buffer, sizeof(buffer) - 1); - UInt32 ipv4_value = 0; - - bool parse_result = parseIPv4(buffer, reinterpret_cast(&ipv4_value)); - if (!parse_result && !settings.input_format_ipv4_default_on_conversion_error) - { - throw Exception("Invalid IPv4 value", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING); - } - - col->insert(ipv4_value); - - if (whole && !istr.eof()) - throwUnexpectedDataAfterParsedValue(column, istr, settings, "IPv4"); -} - -SerializationIPv6::SerializationIPv6(const SerializationPtr & nested_) - : SerializationCustomSimpleText(nested_) -{ -} -void SerializationIPv6::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const -{ - const auto * col = checkAndGetColumn(&column); - if (!col) - { - throw Exception("IPv6 type domain can only serialize columns of type FixedString(16)." + column.getName(), ErrorCodes::ILLEGAL_COLUMN); - } - - char buffer[IPV6_MAX_TEXT_LENGTH + 1] = {'\0'}; - char * ptr = buffer; - formatIPv6(reinterpret_cast(col->getDataAt(row_num).data), ptr); - - ostr.write(buffer, strlen(buffer)); -} - -void SerializationIPv6::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const -{ - ColumnFixedString * col = typeid_cast(&column); - if (!col) - { - throw Exception("IPv6 type domain can only deserialize columns of type FixedString(16)." + column.getName(), ErrorCodes::ILLEGAL_COLUMN); - } - - char buffer[IPV6_MAX_TEXT_LENGTH + 1] = {'\0'}; - [[maybe_unused]] size_t read_bytes = istr.read(buffer, sizeof(buffer) - 1); - - std::string ipv6_value(IPV6_BINARY_LENGTH, '\0'); - - bool parse_result = parseIPv6(buffer, reinterpret_cast(ipv6_value.data())); - if (!parse_result && !settings.input_format_ipv6_default_on_conversion_error) - { - throw Exception("Invalid IPv6 value", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING); - } - - col->insertString(ipv6_value); - - if (whole && !istr.eof()) - throwUnexpectedDataAfterParsedValue(column, istr, settings, "IPv6"); -} - -} diff --git a/src/DataTypes/Serializations/SerializationIP.h b/src/DataTypes/Serializations/SerializationIP.h deleted file mode 100644 index 282105b6b1e..00000000000 --- a/src/DataTypes/Serializations/SerializationIP.h +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -class SerializationIPv4 final : public SerializationCustomSimpleText -{ -public: - explicit SerializationIPv4(const SerializationPtr & nested_); - - void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override; -}; - -class SerializationIPv6 : public SerializationCustomSimpleText -{ -public: - explicit SerializationIPv6(const SerializationPtr & nested_); - - void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override; -}; - -} diff --git a/src/DataTypes/Serializations/SerializationIPv4andIPv6.h b/src/DataTypes/Serializations/SerializationIPv4andIPv6.h new file mode 100644 index 00000000000..2c0df0f76ed --- /dev/null +++ b/src/DataTypes/Serializations/SerializationIPv4andIPv6.h @@ -0,0 +1,134 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +template +class SerializationIP : public SimpleTextSerialization +{ +public: + void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override + { + writeText(assert_cast &>(column).getData()[row_num], ostr); + } + void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override + { + IPv x; + readText(x, istr); + + if (whole && !istr.eof()) + throwUnexpectedDataAfterParsedValue(column, istr, settings, TypeName.data()); + + assert_cast &>(column).getData().push_back(x); + } + void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override + { + serializeText(column, row_num, ostr, settings); + } + void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override + { + deserializeText(column, istr, settings, true); + } + void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override + { + writeChar('\'', ostr); + serializeText(column, row_num, ostr, settings); + writeChar('\'', ostr); + } + void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override + { + IPv x; + assertChar('\'', istr); + readText(x, istr); + assertChar('\'', istr); + assert_cast &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety. + } + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override + { + writeChar('"', ostr); + serializeText(column, row_num, ostr, settings); + writeChar('"', ostr); + } + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override + { + IPv x; + assertChar('"', istr); + readText(x, istr); + /// this code looks weird, but we want to throw specific exception to match original behavior... + if (istr.eof()) + assertChar('"', istr); + if (*istr.position() != '"') + throwUnexpectedDataAfterParsedValue(column, istr, settings, TypeName.data()); + istr.ignore(); + + assert_cast &>(column).getData().push_back(x); + } + void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override + { + writeChar('"', ostr); + serializeText(column, row_num, ostr, settings); + writeChar('"', ostr); + } + void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override + { + IPv value; + readCSV(value, istr); + + if (!istr.eof()) + throwUnexpectedDataAfterParsedValue(column, istr, settings, TypeName.data()); + + assert_cast &>(column).getData().push_back(value); + } + + void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override + { + IPv x = field.get(); + writeBinary(x, ostr); + } + void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override + { + IPv x; + readBinary(x.toUnderType(), istr); + field = NearestFieldType(x); + } + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override + { + writeBinary(assert_cast &>(column).getData()[row_num], ostr); + } + void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override + { + IPv x; + readBinary(x.toUnderType(), istr); + assert_cast &>(column).getData().push_back(x); + } + void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override + { + const typename ColumnVector::Container & x = typeid_cast &>(column).getData(); + + size_t size = x.size(); + + if (limit == 0 || offset + limit > size) + limit = size - offset; + + if (limit) + ostr.write(reinterpret_cast(&x[offset]), sizeof(IPv) * limit); + } + void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const override + { + typename ColumnVector::Container & x = typeid_cast &>(column).getData(); + size_t initial_size = x.size(); + x.resize(initial_size + limit); + size_t size = istr.readBig(reinterpret_cast(&x[initial_size]), sizeof(IPv) * limit); + x.resize(initial_size + size / sizeof(IPv)); + } +}; + +using SerializationIPv4 = SerializationIP; +using SerializationIPv6 = SerializationIP; + +} diff --git a/src/Dictionaries/CacheDictionaryStorage.h b/src/Dictionaries/CacheDictionaryStorage.h index 684b4a1311f..5b52fbde00d 100644 --- a/src/Dictionaries/CacheDictionaryStorage.h +++ b/src/Dictionaries/CacheDictionaryStorage.h @@ -557,6 +557,8 @@ private: ContainerType, ContainerType, ContainerType, + ContainerType, + ContainerType, ContainerType, ContainerType, ContainerType> attribute_container; diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h index 0046b2aaf5a..6e206cafb4a 100644 --- a/src/Dictionaries/DictionaryHelpers.h +++ b/src/Dictionaries/DictionaryHelpers.h @@ -267,6 +267,14 @@ public: { return ColumnType::create(size); } + else if constexpr (std::is_same_v) + { + return ColumnType::create(size); + } + else if constexpr (std::is_same_v) + { + return ColumnType::create(size); + } else if constexpr (is_decimal) { auto nested_type = removeNullable(dictionary_attribute.type); diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h index 9ae3e9a2e71..d09b2c01b8e 100644 --- a/src/Dictionaries/FlatDictionary.h +++ b/src/Dictionaries/FlatDictionary.h @@ -136,6 +136,8 @@ private: ContainerType, ContainerType, ContainerType, + ContainerType, + ContainerType, ContainerType, ContainerType> container; diff --git a/src/Dictionaries/HashedArrayDictionary.h b/src/Dictionaries/HashedArrayDictionary.h index 8df9ce4c1f7..bcb3f85ef06 100644 --- a/src/Dictionaries/HashedArrayDictionary.h +++ b/src/Dictionaries/HashedArrayDictionary.h @@ -156,6 +156,8 @@ private: AttributeContainerType, AttributeContainerType, AttributeContainerType, + AttributeContainerType, + AttributeContainerType, AttributeContainerType, AttributeContainerType> container; diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h index ba5d284466a..1fb38e8f9a6 100644 --- a/src/Dictionaries/HashedDictionary.h +++ b/src/Dictionaries/HashedDictionary.h @@ -186,6 +186,8 @@ private: CollectionType, CollectionType, CollectionType, + CollectionType, + CollectionType, CollectionType, CollectionType> container; diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp index be9a5a88ecd..31ae34f8c4b 100644 --- a/src/Dictionaries/IPAddressDictionary.cpp +++ b/src/Dictionaries/IPAddressDictionary.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -27,7 +28,6 @@ namespace ErrorCodes extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; extern const int CANNOT_PARSE_NUMBER; extern const int DICTIONARY_IS_EMPTY; - extern const int LOGICAL_ERROR; extern const int TYPE_MISMATCH; extern const int UNSUPPORTED_METHOD; } @@ -129,12 +129,12 @@ static void validateKeyTypes(const DataTypes & key_types) if (key_types.empty() || key_types.size() > 2) throw Exception(ErrorCodes::TYPE_MISMATCH, "Expected a single IP address or IP with mask"); - const auto * key_ipv4type = typeid_cast(key_types[0].get()); - const auto * key_ipv6type = typeid_cast(key_types[0].get()); + TypeIndex type_id = key_types[0]->getTypeId(); + const auto * key_string = typeid_cast(key_types[0].get()); - if (key_ipv4type == nullptr && (key_ipv6type == nullptr || key_ipv6type->getN() != 16)) + if (type_id != TypeIndex::IPv4 && type_id != TypeIndex::UInt32 && type_id != TypeIndex::IPv6 && !(key_string && key_string->getN() == IPV6_BINARY_LENGTH)) throw Exception(ErrorCodes::TYPE_MISMATCH, - "Key does not match, expected either `IPv4` (`UInt32`) or `IPv6` (`FixedString(16)`)"); + "Key does not match, expected either IPv4 (or UInt32) or IPv6 (or FixedString(16))"); if (key_types.size() > 1) { @@ -296,30 +296,33 @@ ColumnUInt8::Ptr IPAddressDictionary::hasKeys(const Columns & key_columns, const size_t keys_found = 0; - if (first_column->isNumeric()) + TypeIndex type_id = first_column->getDataType(); + + if (type_id == TypeIndex::IPv4 || type_id == TypeIndex::UInt32) { uint8_t addrv6_buf[IPV6_BINARY_LENGTH]; for (const auto i : collections::range(0, rows)) { - auto addrv4 = static_cast(first_column->get64(i)); + auto addrv4 = *reinterpret_cast(first_column->getDataAt(i).data); auto found = tryLookupIPv4(addrv4, addrv6_buf); out[i] = (found != ipNotFound()); keys_found += out[i]; } } - else + else if (type_id == TypeIndex::IPv6 || type_id == TypeIndex::FixedString) { for (const auto i : collections::range(0, rows)) { auto addr = first_column->getDataAt(i); - if (unlikely(addr.size != IPV6_BINARY_LENGTH)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected key to be FixedString(16)"); - + if (addr.size != IPV6_BINARY_LENGTH) + throw Exception(ErrorCodes::TYPE_MISMATCH, "Expected key FixedString(16)"); auto found = tryLookupIPv6(reinterpret_cast(addr.data)); out[i] = (found != ipNotFound()); keys_found += out[i]; } } + else + throw Exception(ErrorCodes::TYPE_MISMATCH, "Expected key to be IPv4 (or UInt32) or IPv6 (or FixedString(16))"); query_count.fetch_add(rows, std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed); @@ -709,13 +712,15 @@ void IPAddressDictionary::getItemsImpl( size_t keys_found = 0; - if (first_column->isNumeric()) + TypeIndex type_id = first_column->getDataType(); + + if (type_id == TypeIndex::IPv4 || type_id == TypeIndex::UInt32) { uint8_t addrv6_buf[IPV6_BINARY_LENGTH]; for (const auto i : collections::range(0, rows)) { // addrv4 has native endianness - auto addrv4 = static_cast(first_column->get64(i)); + auto addrv4 = *reinterpret_cast(first_column->getDataAt(i).data); auto found = tryLookupIPv4(addrv4, addrv6_buf); if (found != ipNotFound()) { @@ -726,14 +731,13 @@ void IPAddressDictionary::getItemsImpl( set_value(i, default_value_extractor[i]); } } - else + else if (type_id == TypeIndex::IPv6 || type_id == TypeIndex::FixedString) { for (const auto i : collections::range(0, rows)) { auto addr = first_column->getDataAt(i); if (addr.size != IPV6_BINARY_LENGTH) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected key to be FixedString(16)"); - + throw Exception(ErrorCodes::TYPE_MISMATCH, "Expected key to be FixedString(16)"); auto found = tryLookupIPv6(reinterpret_cast(addr.data)); if (found != ipNotFound()) { @@ -744,6 +748,8 @@ void IPAddressDictionary::getItemsImpl( set_value(i, default_value_extractor[i]); } } + else + throw Exception(ErrorCodes::TYPE_MISMATCH, "Expected key to be IPv4 (or UInt32) or IPv6 (or FixedString(16))"); query_count.fetch_add(rows, std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed); diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h index cd6e64f1800..e1fabb89a7e 100644 --- a/src/Dictionaries/IPAddressDictionary.h +++ b/src/Dictionaries/IPAddressDictionary.h @@ -118,6 +118,8 @@ private: Float32, Float64, UUID, + IPv4, + IPv6, String, Array> null_values; @@ -142,6 +144,8 @@ private: ContainerType, ContainerType, ContainerType, + ContainerType, + ContainerType, ContainerType, ContainerType> maps; diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index 257bfcd528c..abff492471e 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -150,6 +150,8 @@ private: AttributeContainerType, AttributeContainerType, AttributeContainerType, + AttributeContainerType, + AttributeContainerType, AttributeContainerType, AttributeContainerType> container; @@ -189,7 +191,9 @@ private: ContainerType, ContainerType, ContainerType, - ContainerType>; + ContainerType, + ContainerType, + ContainerType>; struct KeyAttribute final { diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp index 97dac4b10fc..218f23d53b8 100644 --- a/src/Formats/ProtobufSerializer.cpp +++ b/src/Formats/ProtobufSerializer.cpp @@ -1648,6 +1648,83 @@ namespace String text_buffer; }; + /// Serializes a ColumnVector containing IPv6s to a field of type TYPE_STRING or TYPE_BYTES. + class ProtobufSerializerIPv6 : public ProtobufSerializerSingleValue + { + public: + ProtobufSerializerIPv6( + std::string_view column_name_, + const google::protobuf::FieldDescriptor & field_descriptor_, + const ProtobufReaderOrWriter & reader_or_writer_) + : ProtobufSerializerSingleValue(column_name_, field_descriptor_, reader_or_writer_) + { + setFunctions(); + } + + void writeRow(size_t row_num) override + { + const auto & column_vector = assert_cast &>(*column); + write_function(column_vector.getElement(row_num)); + } + + void readRow(size_t row_num) override + { + IPv6 value = read_function(); + auto & column_vector = assert_cast &>(column->assumeMutableRef()); + if (row_num < column_vector.size()) + column_vector.getElement(row_num) = value; + else + column_vector.insertValue(value); + } + + void insertDefaults(size_t row_num) override + { + auto & column_vector = assert_cast &>(column->assumeMutableRef()); + if (row_num < column_vector.size()) + return; + column_vector.insertDefault(); + } + + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializer" << TypeName << ": column " << quoteString(column_name) << " -> field " + << quoteString(field_descriptor.full_name()) << " (" << field_descriptor.type_name() << ")\n"; + } + + private: + void setFunctions() + { + if ((field_typeid != FieldTypeId::TYPE_STRING) && (field_typeid != FieldTypeId::TYPE_BYTES)) + incompatibleColumnType(TypeName); + + write_function = [this](IPv6 value) + { + ipToString(value, text_buffer); + writeStr(text_buffer); + }; + + read_function = [this]() -> IPv6 + { + readStr(text_buffer); + return parse(text_buffer); + }; + + default_function = [this]() -> IPv6 { return parse(field_descriptor.default_value_string()); }; + } + + static void ipToString(const IPv6 & ip, String & str) + { + WriteBufferFromString buf{str}; + writeText(ip, buf); + } + + std::function write_function; + std::function read_function; + std::function default_function; + String text_buffer; + }; + + using ProtobufSerializerIPv4 = ProtobufSerializerNumber; using ProtobufSerializerInterval = ProtobufSerializerNumber; @@ -3286,6 +3363,8 @@ namespace case TypeIndex::Decimal128: return std::make_unique>(column_name, assert_cast &>(*data_type), field_descriptor, reader_or_writer); case TypeIndex::Decimal256: return std::make_unique>(column_name, assert_cast &>(*data_type), field_descriptor, reader_or_writer); case TypeIndex::UUID: return std::make_unique(column_name, field_descriptor, reader_or_writer); + case TypeIndex::IPv4: return std::make_unique(column_name, field_descriptor, reader_or_writer); + case TypeIndex::IPv6: return std::make_unique(column_name, field_descriptor, reader_or_writer); case TypeIndex::Interval: return std::make_unique(column_name, field_descriptor, reader_or_writer); case TypeIndex::AggregateFunction: return std::make_unique(column_name, typeid_cast>(data_type), field_descriptor, reader_or_writer); diff --git a/src/Functions/CastOverloadResolver.h b/src/Functions/CastOverloadResolver.h index 0bd0bad0d14..29394c2af1c 100644 --- a/src/Functions/CastOverloadResolver.h +++ b/src/Functions/CastOverloadResolver.h @@ -33,10 +33,10 @@ public: ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - explicit CastOverloadResolverImpl(std::optional diagnostic_, bool keep_nullable_, bool cast_ipv4_ipv6_default_on_conversion_error_, const DataTypeValidationSettings & data_type_validation_settings_) - : diagnostic(std::move(diagnostic_)) + explicit CastOverloadResolverImpl(ContextPtr context_, std::optional diagnostic_, bool keep_nullable_, const DataTypeValidationSettings & data_type_validation_settings_) + : context(context_) + , diagnostic(std::move(diagnostic_)) , keep_nullable(keep_nullable_) - , cast_ipv4_ipv6_default_on_conversion_error(cast_ipv4_ipv6_default_on_conversion_error_) , data_type_validation_settings(data_type_validation_settings_) { } @@ -46,15 +46,21 @@ public: const auto & settings_ref = context->getSettingsRef(); if constexpr (internal) - return createImpl({}, false /*keep_nullable*/, settings_ref.cast_ipv4_ipv6_default_on_conversion_error); + return createImpl(context, {}, false /*keep_nullable*/); - return createImpl({}, settings_ref.cast_keep_nullable, settings_ref.cast_ipv4_ipv6_default_on_conversion_error, DataTypeValidationSettings(settings_ref)); + return createImpl(context, {}, settings_ref.cast_keep_nullable, DataTypeValidationSettings(settings_ref)); } - static FunctionOverloadResolverPtr createImpl(std::optional diagnostic = {}, bool keep_nullable = false, bool cast_ipv4_ipv6_default_on_conversion_error = false, const DataTypeValidationSettings & data_type_validation_settings = {}) + static FunctionOverloadResolverPtr createImpl(ContextPtr context, std::optional diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {}) { assert(!internal || !keep_nullable); - return std::make_unique(std::move(diagnostic), keep_nullable, cast_ipv4_ipv6_default_on_conversion_error, data_type_validation_settings); + return std::make_unique(context, std::move(diagnostic), keep_nullable, data_type_validation_settings); + } + + static FunctionOverloadResolverPtr createImpl(std::optional diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {}) + { + assert(!internal || !keep_nullable); + return std::make_unique(ContextPtr(), std::move(diagnostic), keep_nullable, data_type_validation_settings); } protected: @@ -67,7 +73,7 @@ protected: data_types[i] = arguments[i].type; auto monotonicity = MonotonicityHelper::getMonotonicityInformation(arguments.front().type, return_type.get()); - return std::make_unique>(name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type, cast_ipv4_ipv6_default_on_conversion_error); + return std::make_unique>(context, name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type); } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override @@ -104,9 +110,9 @@ protected: bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } private: + ContextPtr context; std::optional diagnostic; bool keep_nullable; - bool cast_ipv4_ipv6_default_on_conversion_error; DataTypeValidationSettings data_type_validation_settings; }; diff --git a/src/Functions/FunctionsBinaryRepresentation.cpp b/src/Functions/FunctionsBinaryRepresentation.cpp index b0bdbc2130c..a93f7c27ff4 100644 --- a/src/Functions/FunctionsBinaryRepresentation.cpp +++ b/src/Functions/FunctionsBinaryRepresentation.cpp @@ -257,6 +257,8 @@ public: !which.isFloat() && !which.isDecimal() && !which.isUUID() && + !which.isIPv4() && + !which.isIPv6() && !which.isAggregateFunction()) throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -297,7 +299,9 @@ public: tryExecuteDecimal(column, res_column) || tryExecuteDecimal(column, res_column) || tryExecuteDecimal(column, res_column) || - tryExecuteUUID(column, res_column)) + tryExecuteUUID(column, res_column) || + tryExecuteIPv4(column, res_column) || + tryExecuteIPv6(column, res_column)) return res_column; throw Exception("Illegal column " + arguments[0].column->getName() @@ -519,6 +523,88 @@ public: return false; } } + + bool tryExecuteIPv6(const IColumn * col, ColumnPtr & col_res) const + { + const ColumnIPv6 * col_vec = checkAndGetColumn(col); + + static constexpr size_t MAX_LENGTH = sizeof(IPv6) * word_size + 1; /// Including trailing zero byte. + + if (!col_vec) + return false; + + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const typename ColumnIPv6::Container & in_vec = col_vec->getData(); + const IPv6* ip = in_vec.data(); + + size_t size = in_vec.size(); + out_offsets.resize(size); + out_vec.resize(size * (word_size+1) + MAX_LENGTH); /// word_size+1 is length of one byte in hex/bin plus zero byte. + + size_t pos = 0; + for (size_t i = 0; i < size; ++i) + { + /// Manual exponential growth, so as not to rely on the linear amortized work time of `resize` (no one guarantees it). + if (pos + MAX_LENGTH > out_vec.size()) + out_vec.resize(out_vec.size() * word_size + MAX_LENGTH); + + char * begin = reinterpret_cast(&out_vec[pos]); + char * end = begin; + + Impl::executeOneString(reinterpret_cast(&ip[i].toUnderType().items[0]), reinterpret_cast(&ip[i].toUnderType().items[2]), end); + + pos += end - begin; + out_offsets[i] = pos; + } + out_vec.resize(pos); + + col_res = std::move(col_str); + return true; + } + + bool tryExecuteIPv4(const IColumn * col, ColumnPtr & col_res) const + { + const ColumnIPv4 * col_vec = checkAndGetColumn(col); + + static constexpr size_t MAX_LENGTH = sizeof(IPv4) * word_size + 1; /// Including trailing zero byte. + + if (!col_vec) + return false; + + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const typename ColumnIPv4::Container & in_vec = col_vec->getData(); + const IPv4* ip = in_vec.data(); + + size_t size = in_vec.size(); + out_offsets.resize(size); + out_vec.resize(size * (word_size+1) + MAX_LENGTH); /// word_size+1 is length of one byte in hex/bin plus zero byte. + + size_t pos = 0; + for (size_t i = 0; i < size; ++i) + { + /// Manual exponential growth, so as not to rely on the linear amortized work time of `resize` (no one guarantees it). + if (pos + MAX_LENGTH > out_vec.size()) + out_vec.resize(out_vec.size() * word_size + MAX_LENGTH); + + char * begin = reinterpret_cast(&out_vec[pos]); + char * end = begin; + + Impl::executeOneUIntOrInt(ip[i].toUnderType(), end); + + pos += end - begin; + out_offsets[i] = pos; + } + out_vec.resize(pos); + + col_res = std::move(col_str); + return true; + } }; /// Decode number or string from string with binary or hexadecimal representation diff --git a/src/Functions/FunctionsCodingIP.cpp b/src/Functions/FunctionsCodingIP.cpp index 3fea5e9d898..75b3d76745f 100644 --- a/src/Functions/FunctionsCodingIP.cpp +++ b/src/Functions/FunctionsCodingIP.cpp @@ -1,3 +1,4 @@ +#include #ifdef HAS_RESERVED_IDENTIFIER #pragma clang diagnostic ignored "-Wreserved-identifier" #endif @@ -17,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -44,8 +46,8 @@ namespace ErrorCodes /** Encoding functions for network addresses: * - * IPv4NumToString (num) - See below. - * IPv4StringToNum(string) - Convert, for example, '192.168.0.1' to 3232235521 and vice versa. + * IPv6NumToString (num) - See below. + * IPv6StringToNum(string) - Convert, for example, '::1' to 1 and vice versa. */ class FunctionIPv6NumToString : public IFunction { @@ -61,59 +63,64 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - const auto * ptr = checkAndGetDataType(arguments[0].get()); - if (!ptr || ptr->getN() != IPV6_BINARY_LENGTH) - throw Exception("Illegal type " + arguments[0]->getName() + - " of argument of function " + getName() + - ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + const auto * arg_string = checkAndGetDataType(arguments[0].get()); + const auto * arg_ipv6 = checkAndGetDataType(arguments[0].get()); + if (!arg_ipv6 && !(arg_string && arg_string->getN() == IPV6_BINARY_LENGTH)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}, expected IPv6 or FixedString({})", + arguments[0]->getName(), getName(), IPV6_BINARY_LENGTH + ); return std::make_shared(); } bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto & col_type_name = arguments[0]; - const ColumnPtr & column = col_type_name.column; + const ColumnPtr & column = arguments[0].column; + const auto * col_ipv6 = checkAndGetColumn(column.get()); + const auto * col_string = checkAndGetColumn(column.get()); + if (!col_ipv6 && !(col_string && col_string->getN() == IPV6_BINARY_LENGTH)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal column {} of argument of function {}, expected IPv6 or FixedString({})", + arguments[0].name, getName(), IPV6_BINARY_LENGTH + ); - if (const auto * col_in = checkAndGetColumn(column.get())) + auto col_res = ColumnString::create(); + ColumnString::Chars & vec_res = col_res->getChars(); + ColumnString::Offsets & offsets_res = col_res->getOffsets(); + vec_res.resize(input_rows_count * (IPV6_MAX_TEXT_LENGTH + 1)); + offsets_res.resize(input_rows_count); + + auto * begin = reinterpret_cast(vec_res.data()); + auto * pos = begin; + + if (col_ipv6) { - if (col_in->getN() != IPV6_BINARY_LENGTH) - throw Exception("Illegal type " + col_type_name.type->getName() + - " of column " + col_in->getName() + - " argument of function " + getName() + - ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + const auto & vec_in = col_ipv6->getData(); - const auto size = col_in->size(); - const auto & vec_in = col_in->getChars(); - - auto col_res = ColumnString::create(); - - ColumnString::Chars & vec_res = col_res->getChars(); - ColumnString::Offsets & offsets_res = col_res->getOffsets(); - vec_res.resize(size * (IPV6_MAX_TEXT_LENGTH + 1)); - offsets_res.resize(size); - - auto * begin = reinterpret_cast(vec_res.data()); - auto * pos = begin; - - for (size_t offset = 0, i = 0; offset < vec_in.size(); offset += IPV6_BINARY_LENGTH, ++i) + for (size_t i = 0; i < input_rows_count; ++i) { - formatIPv6(reinterpret_cast(&vec_in[offset]), pos); + formatIPv6(reinterpret_cast(&vec_in[i]), pos); offsets_res[i] = pos - begin; } - - vec_res.resize(pos - begin); - - return col_res; } else - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + { + const auto & vec_in = col_string->getChars(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + formatIPv6(reinterpret_cast(&vec_in[i * IPV6_BINARY_LENGTH]), pos); + offsets_res[i] = pos - begin; + } + } + + vec_res.resize(pos - begin); + return col_res; } }; @@ -302,14 +309,14 @@ public: { if (cast_ipv4_ipv6_default_on_conversion_error) { - auto result = convertToIPv6(column, null_map); + auto result = convertToIPv6(column, null_map); if (null_map && !result->isNullable()) return ColumnNullable::create(result, null_map_column); return result; } } - auto result = convertToIPv6(column, null_map); + auto result = convertToIPv6(column, null_map); if (null_map && !result->isNullable()) return ColumnNullable::create(IColumn::mutate(result), IColumn::mutate(null_map_column)); return result; @@ -325,6 +332,43 @@ private: template class FunctionIPv4NumToString : public IFunction { +private: + template + ColumnPtr executeTyped(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const + { + using ColumnType = ColumnVector; + + const ColumnPtr & column = arguments[0].column; + + if (const ColumnType * col = typeid_cast(column.get())) + { + const typename ColumnType::Container & vec_in = col->getData(); + + auto col_res = ColumnString::create(); + + ColumnString::Chars & vec_res = col_res->getChars(); + ColumnString::Offsets & offsets_res = col_res->getOffsets(); + + vec_res.resize(vec_in.size() * (IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0 + offsets_res.resize(vec_in.size()); + char * begin = reinterpret_cast(vec_res.data()); + char * pos = begin; + + for (size_t i = 0; i < vec_in.size(); ++i) + { + DB::formatIPv4(reinterpret_cast(&vec_in[i]), sizeof(ArgType), pos, mask_tail_octets, "xxx"); + offsets_res[i] = pos - begin; + } + + vec_res.resize(pos - begin); + + return col_res; + } + else + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } public: static constexpr auto name = Name::name; static FunctionPtr create(ContextPtr) { return std::make_shared>(); } @@ -340,47 +384,36 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (!WhichDataType(arguments[0]).isUInt32()) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName() + ", expected UInt32", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + WhichDataType arg_type(arguments[0]); + if (!(arg_type.isIPv4() || arg_type.isUInt8() || arg_type.isUInt16() || arg_type.isUInt32())) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}, expected IPv4 or UInt8 or UInt16 or UInt32", + arguments[0]->getName(), getName() + ); return std::make_shared(); } bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & ret_type, size_t input_rows_count) const override { - const ColumnPtr & column = arguments[0].column; - if (const ColumnUInt32 * col = typeid_cast(column.get())) + switch (arguments[0].type->getTypeId()) { - const ColumnUInt32::Container & vec_in = col->getData(); - - auto col_res = ColumnString::create(); - - ColumnString::Chars & vec_res = col_res->getChars(); - ColumnString::Offsets & offsets_res = col_res->getOffsets(); - - vec_res.resize(vec_in.size() * (IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0 - offsets_res.resize(vec_in.size()); - char * begin = reinterpret_cast(vec_res.data()); - char * pos = begin; - - for (size_t i = 0; i < vec_in.size(); ++i) - { - DB::formatIPv4(reinterpret_cast(&vec_in[i]), pos, mask_tail_octets, "xxx"); - offsets_res[i] = pos - begin; - } - - vec_res.resize(pos - begin); - - return col_res; + case TypeIndex::IPv4: return executeTyped(arguments, ret_type, input_rows_count); + case TypeIndex::UInt8: return executeTyped(arguments, ret_type, input_rows_count); + case TypeIndex::UInt16: return executeTyped(arguments, ret_type, input_rows_count); + case TypeIndex::UInt32: return executeTyped(arguments, ret_type, input_rows_count); + default: break; } - else - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of argument of function {}, expected IPv4 or UInt8 or UInt16 or UInt32", + arguments[0].column->getName(), getName() + ); } }; @@ -444,14 +477,14 @@ public: { if (cast_ipv4_ipv6_default_on_conversion_error) { - auto result = convertToIPv4(column, null_map); + auto result = convertToIPv4(column, null_map); if (null_map && !result->isNullable()) return ColumnNullable::create(result, null_map_column); return result; } } - auto result = convertToIPv4(column, null_map); + auto result = convertToIPv4(column, null_map); if (null_map && !result->isNullable()) return ColumnNullable::create(IColumn::mutate(result), IColumn::mutate(null_map_column)); return result; @@ -474,13 +507,21 @@ public: bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + /// for backward compatibility IPv4ToIPv6 is overloaded, and result type depends on type of argument - + /// if it is UInt32 (presenting IPv4) then result is FixedString(16), if IPv4 - result is IPv6 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (!checkAndGetDataType(arguments[0].get())) - throw Exception("Illegal type " + arguments[0]->getName() + - " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + const auto * dt_uint32 = checkAndGetDataType(arguments[0].get()); + const auto * dt_ipv4 = checkAndGetDataType(arguments[0].get()); + if (!dt_uint32 && !dt_ipv4) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}", arguments[0]->getName(), getName() + ); - return std::make_shared(16); + if (dt_uint32) + return std::make_shared(16); + return std::make_shared(); } bool useDefaultImplementationForConstants() const override { return true; } @@ -490,7 +531,22 @@ public: const auto & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; - if (const auto * col_in = typeid_cast(column.get())) + if (const auto * col_in = checkAndGetColumn(*column)) + { + auto col_res = ColumnIPv6::create(); + + auto & vec_res = col_res->getData(); + vec_res.resize(col_in->size()); + + const auto & vec_in = col_in->getData(); + + for (size_t i = 0; i < vec_res.size(); ++i) + mapIPv4ToIPv6(vec_in[i], reinterpret_cast(&vec_res[i].toUnderType())); + + return col_res; + } + + if (const auto * col_in = checkAndGetColumn(*column)) { auto col_res = ColumnFixedString::create(IPV6_BINARY_LENGTH); @@ -504,10 +560,11 @@ public: return col_res; } - else - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of argument of function {}", arguments[0].column->getName(), getName() + ); } private: @@ -522,82 +579,6 @@ private: } }; -template -class FunctionToIPv4 : public FunctionIPv4StringToNum -{ -public: - using Base = FunctionIPv4StringToNum; - - static constexpr auto name = exception_mode == IPStringToNumExceptionMode::Throw - ? "toIPv4" - : (exception_mode == IPStringToNumExceptionMode::Default ? "toIPv4OrDefault" : "toIPv4OrNull"); - - static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - - explicit FunctionToIPv4(ContextPtr context) : Base(context) { } - - String getName() const override { return name; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - size_t getNumberOfArguments() const override { return 1; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isString(removeNullable(arguments[0]))) - { - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName()); - } - - auto result_type = DataTypeFactory::instance().get("IPv4"); - - if constexpr (exception_mode == IPStringToNumExceptionMode::Null) - { - return makeNullable(result_type); - } - - return arguments[0]->isNullable() ? makeNullable(result_type) : result_type; - } -}; - -template -class FunctionToIPv6 : public FunctionIPv6StringToNum -{ -public: - using Base = FunctionIPv6StringToNum; - - static constexpr auto name = exception_mode == IPStringToNumExceptionMode::Throw - ? "toIPv6" - : (exception_mode == IPStringToNumExceptionMode::Default ? "toIPv6OrDefault" : "toIPv6OrNull"); - - static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - - explicit FunctionToIPv6(ContextPtr context) : Base(context) { } - - String getName() const override { return name; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isStringOrFixedString(removeNullable(arguments[0]))) - { - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName()); - } - - auto result_type = DataTypeFactory::instance().get("IPv6"); - - if constexpr (exception_mode == IPStringToNumExceptionMode::Null) - { - return makeNullable(result_type); - } - - return arguments[0]->isNullable() ? makeNullable(result_type) : result_type; - } -}; - class FunctionMACNumToString : public IFunction { public: @@ -800,7 +781,7 @@ private: #include - static inline void applyCIDRMask(const UInt8 * __restrict src, UInt8 * __restrict dst_lower, UInt8 * __restrict dst_upper, UInt8 bits_to_keep) + static inline void applyCIDRMask(const char * __restrict src, char * __restrict dst_lower, char * __restrict dst_upper, UInt8 bits_to_keep) { __m128i mask = _mm_loadu_si128(reinterpret_cast(getCIDRMaskIPv6(bits_to_keep).data())); __m128i lower = _mm_and_si128(_mm_loadu_si128(reinterpret_cast(src)), mask); @@ -814,7 +795,7 @@ private: #else /// NOTE IPv6 is stored in memory in big endian format that makes some difficulties. - static void applyCIDRMask(const UInt8 * __restrict src, UInt8 * __restrict dst_lower, UInt8 * __restrict dst_upper, UInt8 bits_to_keep) + static void applyCIDRMask(const char * __restrict src, char * __restrict dst_lower, char * __restrict dst_upper, UInt8 bits_to_keep) { const auto & mask = getCIDRMaskIPv6(bits_to_keep); @@ -837,20 +818,24 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - const auto * first_argument = checkAndGetDataType(arguments[0].get()); - if (!first_argument || first_argument->getN() != IPV6_BINARY_LENGTH) - throw Exception("Illegal type " + arguments[0]->getName() + - " of first argument of function " + getName() + - ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + const auto * ipv6 = checkAndGetDataType(arguments[0].get()); + const auto * str = checkAndGetDataType(arguments[0].get()); + if (!ipv6 && !(str && str->getN() == IPV6_BINARY_LENGTH)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}, expected IPv6 or FixedString({})", + arguments[0]->getName(), getName(), IPV6_BINARY_LENGTH + ); const DataTypePtr & second_argument = arguments[1]; if (!isUInt8(second_argument)) - throw Exception{"Illegal type " + second_argument->getName() - + " of second argument of function " + getName() - + ", expected UInt8", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of second argument of function {}, expected UInt8", + second_argument->getName(), getName() + ); - DataTypePtr element = DataTypeFactory::instance().get("IPv6"); + DataTypePtr element = std::make_shared(); return std::make_shared(DataTypes{element, element}); } @@ -859,25 +844,6 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto & col_type_name_ip = arguments[0]; - const ColumnPtr & column_ip = col_type_name_ip.column; - - const auto * col_const_ip_in = checkAndGetColumnConst(column_ip.get()); - const auto * col_ip_in = checkAndGetColumn(column_ip.get()); - - if (!col_ip_in && !col_const_ip_in) - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - - if ((col_const_ip_in && col_const_ip_in->getValue().size() != IPV6_BINARY_LENGTH) || - (col_ip_in && col_ip_in->getN() != IPV6_BINARY_LENGTH)) - throw Exception("Illegal type " + col_type_name_ip.type->getName() + - " of column " + column_ip->getName() + - " argument of function " + getName() + - ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - const auto & col_type_name_cidr = arguments[1]; const ColumnPtr & column_cidr = col_type_name_cidr.column; @@ -885,39 +851,57 @@ public: const auto * col_cidr_in = checkAndGetColumn(column_cidr.get()); if (!col_const_cidr_in && !col_cidr_in) - throw Exception("Illegal column " + arguments[1].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of argument of function {}", + arguments[1].column->getName(), getName() + ); - auto col_res_lower_range = ColumnFixedString::create(IPV6_BINARY_LENGTH); - auto col_res_upper_range = ColumnFixedString::create(IPV6_BINARY_LENGTH); + const auto & col_type_name_ip = arguments[0]; + const ColumnPtr & column_ip = col_type_name_ip.column; - ColumnString::Chars & vec_res_lower_range = col_res_lower_range->getChars(); - vec_res_lower_range.resize(input_rows_count * IPV6_BINARY_LENGTH); + const auto * col_const_ip_in = checkAndGetColumnConst(column_ip.get()); + const auto * col_ip_in = checkAndGetColumn(column_ip.get()); - ColumnString::Chars & vec_res_upper_range = col_res_upper_range->getChars(); - vec_res_upper_range.resize(input_rows_count * IPV6_BINARY_LENGTH); + const auto * col_const_str_in = checkAndGetColumnConst(column_ip.get()); + const auto * col_str_in = checkAndGetColumn(column_ip.get()); + + std::function get_ip_data; + if (col_const_ip_in) + get_ip_data = [col_const_ip_in](size_t) { return col_const_ip_in->getDataAt(0).data; }; + else if (col_const_str_in) + get_ip_data = [col_const_str_in](size_t) { return col_const_str_in->getDataAt(0).data; }; + else if (col_ip_in) + get_ip_data = [col_ip_in](size_t i) { return reinterpret_cast(&col_ip_in->getData()[i]); }; + else if (col_str_in) + get_ip_data = [col_str_in](size_t i) { return reinterpret_cast(&col_str_in->getChars().data()[i * IPV6_BINARY_LENGTH]); }; + else + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of argument of function {}", + arguments[0].column->getName(), getName() + ); + + auto col_res_lower_range = ColumnIPv6::create(); + auto col_res_upper_range = ColumnIPv6::create(); + + auto & vec_res_lower_range = col_res_lower_range->getData(); + vec_res_lower_range.resize(input_rows_count); + + auto & vec_res_upper_range = col_res_upper_range->getData(); + vec_res_upper_range.resize(input_rows_count); static constexpr UInt8 max_cidr_mask = IPV6_BINARY_LENGTH * 8; - const String col_const_ip_str = col_const_ip_in ? col_const_ip_in->getValue() : ""; - const UInt8 * col_const_ip_value = col_const_ip_in ? reinterpret_cast(col_const_ip_str.c_str()) : nullptr; - - for (size_t offset = 0; offset < input_rows_count; ++offset) + for (size_t i = 0; i < input_rows_count; ++i) { - const size_t offset_ipv6 = offset * IPV6_BINARY_LENGTH; - - const UInt8 * ip = col_const_ip_in - ? col_const_ip_value - : &col_ip_in->getChars()[offset_ipv6]; - UInt8 cidr = col_const_cidr_in - ? col_const_cidr_in->getValue() - : col_cidr_in->getData()[offset]; + ? col_const_cidr_in->getValue() + : col_cidr_in->getData()[i]; cidr = std::min(cidr, max_cidr_mask); - applyCIDRMask(ip, &vec_res_lower_range[offset_ipv6], &vec_res_upper_range[offset_ipv6], cidr); + applyCIDRMask(get_ip_data(i), reinterpret_cast(&vec_res_lower_range[i]), reinterpret_cast(&vec_res_upper_range[i]), cidr); } return ColumnTuple::create(Columns{std::move(col_res_lower_range), std::move(col_res_upper_range)}); @@ -942,47 +926,15 @@ private: return { lower, upper }; } -public: - static constexpr auto name = "IPv4CIDRToRange"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override { return name; } - size_t getNumberOfArguments() const override { return 2; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!WhichDataType(arguments[0]).isUInt32()) - throw Exception("Illegal type " + arguments[0]->getName() + - " of first argument of function " + getName() + - ", expected UInt32", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - - const DataTypePtr & second_argument = arguments[1]; - if (!isUInt8(second_argument)) - throw Exception{"Illegal type " + second_argument->getName() - + " of second argument of function " + getName() - + ", expected UInt8", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - - DataTypePtr element = DataTypeFactory::instance().get("IPv4"); - return std::make_shared(DataTypes{element, element}); - } - - bool useDefaultImplementationForConstants() const override { return true; } - - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + template + ColumnPtr executeTyped(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const { + using ColumnType = ColumnVector; const auto & col_type_name_ip = arguments[0]; const ColumnPtr & column_ip = col_type_name_ip.column; - const auto * col_const_ip_in = checkAndGetColumnConst(column_ip.get()); - const auto * col_ip_in = checkAndGetColumn(column_ip.get()); - if (!col_const_ip_in && !col_ip_in) - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + const auto * col_const_ip_in = checkAndGetColumnConst(column_ip.get()); + const auto * col_ip_in = checkAndGetColumn(column_ip.get()); const auto & col_type_name_cidr = arguments[1]; const ColumnPtr & column_cidr = col_type_name_cidr.column; @@ -990,13 +942,8 @@ public: const auto * col_const_cidr_in = checkAndGetColumnConst(column_cidr.get()); const auto * col_cidr_in = checkAndGetColumn(column_cidr.get()); - if (!col_const_cidr_in && !col_cidr_in) - throw Exception("Illegal column " + arguments[1].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - - auto col_res_lower_range = ColumnUInt32::create(); - auto col_res_upper_range = ColumnUInt32::create(); + auto col_res_lower_range = ColumnIPv4::create(); + auto col_res_upper_range = ColumnIPv4::create(); auto & vec_res_lower_range = col_res_lower_range->getData(); vec_res_lower_range.resize(input_rows_count); @@ -1006,8 +953,8 @@ public: for (size_t i = 0; i < input_rows_count; ++i) { - UInt32 ip = col_const_ip_in - ? col_const_ip_in->getValue() + ArgType ip = col_const_ip_in + ? col_const_ip_in->template getValue() : col_ip_in->getData()[i]; UInt8 cidr = col_const_cidr_in @@ -1019,6 +966,64 @@ public: return ColumnTuple::create(Columns{std::move(col_res_lower_range), std::move(col_res_upper_range)}); } + +public: + static constexpr auto name = "IPv4CIDRToRange"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 2; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + WhichDataType arg_type(arguments[0]); + if (!(arg_type.isIPv4() || arg_type.isUInt8() || arg_type.isUInt16() || arg_type.isUInt32())) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}, expected IPv4 or UInt8 or UInt16 or UInt32", + arguments[0]->getName(), getName() + ); + + + const DataTypePtr & second_argument = arguments[1]; + if (!isUInt8(second_argument)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of second argument of function {}, expected UInt8", + second_argument->getName(), getName() + ); + + DataTypePtr element = DataTypeFactory::instance().get("IPv4"); + return std::make_shared(DataTypes{element, element}); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & ret_type, size_t input_rows_count) const override + { + if (arguments[1].type->getTypeId() != TypeIndex::UInt8) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of second argument of function {}, expected UInt8", arguments[1].type->getName(), getName() + ); + + switch (arguments[0].type->getTypeId()) + { + case TypeIndex::IPv4: return executeTyped(arguments, ret_type, input_rows_count); + case TypeIndex::UInt8: return executeTyped(arguments, ret_type, input_rows_count); + case TypeIndex::UInt16: return executeTyped(arguments, ret_type, input_rows_count); + case TypeIndex::UInt32: return executeTyped(arguments, ret_type, input_rows_count); + default: break; + } + + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of argument of function {}, expected IPv4 or UInt8 or UInt16 or UInt32", + arguments[0].column->getName(), getName() + ); + } }; class FunctionIsIPv4String : public IFunction @@ -1066,7 +1071,7 @@ public: for (size_t i = 0; i < vec_res.size(); ++i) { - vec_res[i] = DB::parseIPv4(reinterpret_cast(&vec_src[prev_offset]), reinterpret_cast(&result)); + vec_res[i] = DB::parseIPv4whole(reinterpret_cast(&vec_src[prev_offset]), reinterpret_cast(&result)); prev_offset = offsets_src[i]; } @@ -1121,7 +1126,7 @@ public: for (size_t i = 0; i < vec_res.size(); ++i) { - vec_res[i] = DB::parseIPv6(reinterpret_cast(&vec_src[prev_offset]), reinterpret_cast(buffer)); + vec_res[i] = DB::parseIPv6whole(reinterpret_cast(&vec_src[prev_offset]), reinterpret_cast(buffer)); prev_offset = offsets_src[i]; } @@ -1150,18 +1155,11 @@ REGISTER_FUNCTION(Coding) factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); factory.registerFunction(); factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - /// MySQL compatibility aliases: factory.registerAlias("INET_ATON", FunctionIPv4StringToNum::name, FunctionFactory::CaseInsensitive); diff --git a/src/Functions/FunctionsCodingIP.h b/src/Functions/FunctionsCodingIP.h index f25b25b12f0..39ee734e6bc 100644 --- a/src/Functions/FunctionsCodingIP.h +++ b/src/Functions/FunctionsCodingIP.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -12,7 +13,8 @@ namespace DB namespace ErrorCodes { - extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING; + extern const int CANNOT_PARSE_IPV4; + extern const int CANNOT_PARSE_IPV6; extern const int ILLEGAL_COLUMN; } @@ -25,14 +27,18 @@ enum class IPStringToNumExceptionMode : uint8_t static inline bool tryParseIPv4(const char * pos, UInt32 & result_value) { - return parseIPv4(pos, reinterpret_cast(&result_value)); + return parseIPv4whole(pos, reinterpret_cast(&result_value)); } namespace detail { - template + template ColumnPtr convertToIPv6(const StringColumnType & string_column, const PaddedPODArray * null_map = nullptr) { + if constexpr (!std::is_same_v && !std::is_same_v) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal return column type {}. Expected IPv6 or FixedString", TypeName); + + size_t column_size = string_column.size(); ColumnUInt8::MutablePtr col_null_map_to; @@ -44,10 +50,73 @@ namespace detail vec_null_map_to = &col_null_map_to->getData(); } - auto col_res = ColumnFixedString::create(IPV6_BINARY_LENGTH); + /// This is a special treatment for source column of type FixedString(16) + /// to preserve previous behavior when IPv6 was a domain type of FixedString(16) + if constexpr (std::is_same_v) + { + if (string_column.getN() == IPV6_BINARY_LENGTH) + { + if constexpr (std::is_same_v) + { + auto col_res = ColumnFixedString::create(string_column); - auto & vec_res = col_res->getChars(); - vec_res.resize(column_size * IPV6_BINARY_LENGTH); + if constexpr (exception_mode == IPStringToNumExceptionMode::Null) + { + col_null_map_to = ColumnUInt8::create(column_size, false); + if (null_map) + memcpy(col_null_map_to->getData().data(), null_map->data(), column_size); + return ColumnNullable::create(std::move(col_res), std::move(col_null_map_to)); + } + + return col_res; + } + else + { + auto col_res = ColumnIPv6::create(); + auto & vec_res = col_res->getData(); + + vec_res.resize(column_size); + memcpy(vec_res.data(), string_column.getChars().data(), column_size * IPV6_BINARY_LENGTH); + + if constexpr (exception_mode == IPStringToNumExceptionMode::Null) + { + col_null_map_to = ColumnUInt8::create(column_size, false); + if (null_map) + memcpy(col_null_map_to->getData().data(), null_map->data(), column_size); + return ColumnNullable::create(std::move(col_res), std::move(col_null_map_to)); + } + + return col_res; + } + } + } + + auto column_create = []() -> typename ToColumn::MutablePtr + { + if constexpr (std::is_same_v) + return ColumnFixedString::create(IPV6_BINARY_LENGTH); + else + return ColumnIPv6::create(); + }; + + auto get_vector = [](auto & col_res, size_t col_size) -> decltype(auto) + { + if constexpr (std::is_same_v) + { + auto & vec_res = col_res->getChars(); + vec_res.resize(col_size * IPV6_BINARY_LENGTH); + return (vec_res); + } + else + { + auto & vec_res = col_res->getData(); + vec_res.resize(col_size); + return (vec_res); + } + }; + + auto col_res = column_create(); + auto & vec_res = get_vector(col_res, column_size); using Chars = typename StringColumnType::Chars; const Chars & vec_src = string_column.getChars(); @@ -56,6 +125,7 @@ namespace detail char src_ipv4_buf[sizeof("::ffff:") + IPV4_MAX_TEXT_LENGTH + 1] = "::ffff:"; /// ColumnFixedString contains not null terminated strings. But functions parseIPv6, parseIPv4 expect null terminated string. + /// TODO fix this - now parseIPv6/parseIPv4 accept end iterator, so can be parsed in-place std::string fixed_string_buffer; if constexpr (std::is_same_v) @@ -63,7 +133,11 @@ namespace detail fixed_string_buffer.resize(string_column.getN()); } - for (size_t out_offset = 0, i = 0; out_offset < vec_res.size(); out_offset += IPV6_BINARY_LENGTH, ++i) + int offset_inc = 1; + if constexpr (std::is_same_v) + offset_inc = IPV6_BINARY_LENGTH; + + for (size_t out_offset = 0, i = 0; i < column_size; out_offset += offset_inc, ++i) { size_t src_next_offset = src_offset; @@ -87,7 +161,7 @@ namespace detail if (null_map && (*null_map)[i]) { - std::fill_n(&vec_res[i], IPV6_BINARY_LENGTH, 0); + std::fill_n(&vec_res[out_offset], offset_inc, 0); src_offset = src_next_offset; if constexpr (exception_mode == IPStringToNumExceptionMode::Null) (*vec_null_map_to)[i] = true; @@ -107,17 +181,17 @@ namespace detail src_ipv4_buf + std::strlen("::ffff:"), src_value, std::min(src_next_offset - src_offset, IPV4_MAX_TEXT_LENGTH + 1)); - parse_result = parseIPv6(src_ipv4_buf, res_value); + parse_result = parseIPv6whole(src_ipv4_buf, res_value); } else { - parse_result = parseIPv6(src_value, res_value); + parse_result = parseIPv6whole(src_value, res_value); } if (!parse_result) { if constexpr (exception_mode == IPStringToNumExceptionMode::Throw) - throw Exception("Invalid IPv6 value", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING); + throw Exception("Invalid IPv6 value", ErrorCodes::CANNOT_PARSE_IPV6); else if constexpr (exception_mode == IPStringToNumExceptionMode::Default) vec_res[i] = 0; else if constexpr (exception_mode == IPStringToNumExceptionMode::Null) @@ -134,23 +208,16 @@ namespace detail } } -template +template ColumnPtr convertToIPv6(ColumnPtr column, const PaddedPODArray * null_map = nullptr) { - size_t column_size = column->size(); - - auto col_res = ColumnFixedString::create(IPV6_BINARY_LENGTH); - - auto & vec_res = col_res->getChars(); - vec_res.resize(column_size * IPV6_BINARY_LENGTH); - if (const auto * column_input_string = checkAndGetColumn(column.get())) { - return detail::convertToIPv6(*column_input_string, null_map); + return detail::convertToIPv6(*column_input_string, null_map); } else if (const auto * column_input_fixed_string = checkAndGetColumn(column.get())) { - return detail::convertToIPv6(*column_input_fixed_string, null_map); + return detail::convertToIPv6(*column_input_fixed_string, null_map); } else { @@ -158,7 +225,7 @@ ColumnPtr convertToIPv6(ColumnPtr column, const PaddedPODArray * null_map } } -template +template ColumnPtr convertToIPv4(ColumnPtr column, const PaddedPODArray * null_map = nullptr) { const ColumnString * column_string = checkAndGetColumn(column.get()); @@ -179,9 +246,9 @@ ColumnPtr convertToIPv4(ColumnPtr column, const PaddedPODArray * null_map vec_null_map_to = &col_null_map_to->getData(); } - auto col_res = ColumnUInt32::create(); + auto col_res = ToColumn::create(); - ColumnUInt32::Container & vec_res = col_res->getData(); + auto & vec_res = col_res->getData(); vec_res.resize(column_size); const ColumnString::Chars & vec_src = column_string->getChars(); @@ -205,7 +272,7 @@ ColumnPtr convertToIPv4(ColumnPtr column, const PaddedPODArray * null_map { if constexpr (exception_mode == IPStringToNumExceptionMode::Throw) { - throw Exception("Invalid IPv4 value", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING); + throw Exception("Invalid IPv4 value", ErrorCodes::CANNOT_PARSE_IPV4); } else if constexpr (exception_mode == IPStringToNumExceptionMode::Default) { diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index 2129201f2eb..208b31b65f7 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -1154,6 +1154,8 @@ public: /// You can compare the date, datetime, or datatime64 and an enumeration with a constant string. || ((left.isDate() || left.isDate32() || left.isDateTime() || left.isDateTime64()) && (right.isDate() || right.isDate32() || right.isDateTime() || right.isDateTime64()) && left.idx == right.idx) /// only date vs date, or datetime vs datetime || (left.isUUID() && right.isUUID()) + || (left.isIPv4() && right.isIPv4()) + || (left.isIPv6() && right.isIPv6()) || (left.isEnum() && right.isEnum() && arguments[0]->getName() == arguments[1]->getName()) /// only equivalent enum type values can be compared against || (left_tuple && right_tuple && left_tuple->getElements().size() == right_tuple->getElements().size()) || (arguments[0]->equals(*arguments[1])))) @@ -1245,6 +1247,15 @@ public: const bool left_is_float = which_left.isFloat(); const bool right_is_float = which_right.isFloat(); + const bool left_is_ipv6 = which_left.isIPv6(); + const bool right_is_ipv6 = which_right.isIPv6(); + const bool left_is_fixed_string = which_left.isFixedString(); + const bool right_is_fixed_string = which_right.isFixedString(); + size_t fixed_string_size = + left_is_fixed_string ? + assert_cast(*left_type).getN() : + (right_is_fixed_string ? assert_cast(*right_type).getN() : 0); + bool date_and_datetime = (which_left.idx != which_right.idx) && (which_left.isDate() || which_left.isDate32() || which_left.isDateTime() || which_left.isDateTime64()) && (which_right.isDate() || which_right.isDate32() || which_right.isDateTime() || which_right.isDateTime64()); @@ -1287,6 +1298,17 @@ public: { return res; } + else if (((left_is_ipv6 && right_is_fixed_string) || (right_is_ipv6 && left_is_fixed_string)) && fixed_string_size == IPV6_BINARY_LENGTH) + { + /// Special treatment for FixedString(16) as a binary representation of IPv6 - + /// CAST is customized for this case + ColumnPtr left_column = left_is_ipv6 ? + col_with_type_and_name_left.column : castColumn(col_with_type_and_name_left, right_type); + ColumnPtr right_column = right_is_ipv6 ? + col_with_type_and_name_right.column : castColumn(col_with_type_and_name_right, left_type); + + return executeGenericIdenticalTypes(left_column.get(), right_column.get()); + } else if ((isColumnedAsDecimal(left_type) || isColumnedAsDecimal(right_type))) { // Comparing Date/Date32 and DateTime64 requires implicit conversion, diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index d607af54fcd..01e057e19a1 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -39,6 +39,8 @@ REGISTER_FUNCTION(Conversion) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); @@ -68,6 +70,8 @@ REGISTER_FUNCTION(Conversion) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); @@ -94,6 +98,8 @@ REGISTER_FUNCTION(Conversion) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 8847621661d..f846005d7a7 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -55,6 +55,7 @@ #include #include #include +#include #include @@ -73,6 +74,8 @@ namespace ErrorCodes extern const int CANNOT_PARSE_DATETIME; extern const int CANNOT_PARSE_TEXT; extern const int CANNOT_PARSE_UUID; + extern const int CANNOT_PARSE_IPV4; + extern const int CANNOT_PARSE_IPV6; extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int LOGICAL_ERROR; extern const int TYPE_MISMATCH; @@ -120,6 +123,7 @@ struct AccurateOrNullConvertStrategyAdditions struct ConvertDefaultBehaviorTag {}; struct ConvertReturnNullOnErrorTag {}; +struct ConvertReturnZeroOnErrorTag {}; /** Conversion of number types to each other, enums to numbers, dates and datetimes to numbers and back: done by straight assignment. * (Date is represented internally as number of days from some day; DateTime - as unix timestamp) @@ -208,6 +212,17 @@ struct ConvertImpl { throw Exception("Conversion between numeric types and UUID is not supported. Probably the passed UUID is unquoted", ErrorCodes::NOT_IMPLEMENTED); } + else if constexpr ( + (std::is_same_v != std::is_same_v) + && !(is_any_of || is_any_of) + ) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Conversion from {} to {} is not supported", TypeName, TypeName); + } + else if constexpr (std::is_same_v != std::is_same_v) + { + throw Exception("Conversion between numeric types and IPv6 is not supported. Probably the passed IPv6 is unquoted", ErrorCodes::NOT_IMPLEMENTED); + } else { if constexpr (IsDataTypeDecimal || IsDataTypeDecimal) @@ -993,6 +1008,22 @@ inline void parseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb x = tmp.toUnderType(); } +template <> +inline void parseImpl(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +{ + IPv4 tmp; + readIPv4Text(tmp, rb); + x = tmp.toUnderType(); +} + +template <> +inline void parseImpl(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +{ + IPv6 tmp; + readIPv6Text(tmp, rb); + x = tmp; +} + template bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) { @@ -1043,6 +1074,28 @@ inline bool tryParseImpl(DataTypeUUID::FieldType & x, ReadBuffer & return true; } +template <> +inline bool tryParseImpl(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +{ + IPv4 tmp; + if (!tryReadIPv4Text(tmp, rb)) + return false; + + x = tmp.toUnderType(); + return true; +} + +template <> +inline bool tryParseImpl(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +{ + IPv6 tmp; + if (!tryReadIPv6Text(tmp, rb)) + return false; + + x = tmp; + return true; +} + /** Throw exception with verbose message when string value is not parsed completely. */ @@ -1262,7 +1315,20 @@ struct ConvertThroughParsing } else { - parseImpl(vec_to[i], read_buffer, local_time_zone); + /// we want to utilize constexpr condition here, which is not mixable with value comparison + do + { + if constexpr (std::is_same_v && std::is_same_v) + { + if (fixed_string_size == IPV6_BINARY_LENGTH) + { + readBinary(vec_to[i], read_buffer); + break; + } + } + + parseImpl(vec_to[i], read_buffer, local_time_zone); + } while (false); } } @@ -1317,7 +1383,23 @@ struct ConvertThroughParsing vec_to[i], read_buffer, ToDataType::maxPrecision(), col_to->getScale()); } else - parsed = tryParseImpl(vec_to[i], read_buffer, local_time_zone); + { + /// we want to utilize constexpr condition here, which is not mixable with value comparison + do + { + if constexpr (std::is_same_v && std::is_same_v) + { + if (fixed_string_size == IPV6_BINARY_LENGTH) + { + readBinary(vec_to[i], read_buffer); + parsed = true; + break; + } + } + + parsed = tryParseImpl(vec_to[i], read_buffer, local_time_zone); + } while (false); + } } if (!isAllRead(read_buffer)) @@ -1370,6 +1452,11 @@ requires (!std::is_same_v) struct ConvertImpl : ConvertThroughParsing {}; +template +requires (is_any_of && is_any_of) +struct ConvertImpl + : ConvertThroughParsing {}; + /// Generic conversion of any type from String. Used for complex types: Array and Tuple or types with custom serialization. template struct ConvertImplGenericFromString @@ -1597,9 +1684,12 @@ public: std::is_same_v || std::is_same_v; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } static FunctionPtr create() { return std::make_shared(); } + FunctionConvert() = default; + explicit FunctionConvert(ContextPtr context_) : context(context_) {} + String getName() const override { return name; @@ -1763,7 +1853,9 @@ public: || e.code() == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE || e.code() == ErrorCodes::CANNOT_PARSE_DATE || e.code() == ErrorCodes::CANNOT_PARSE_DATETIME - || e.code() == ErrorCodes::CANNOT_PARSE_UUID) + || e.code() == ErrorCodes::CANNOT_PARSE_UUID + || e.code() == ErrorCodes::CANNOT_PARSE_IPV4 + || e.code() == ErrorCodes::CANNOT_PARSE_IPV6) { e.addMessage("Cannot parse " + result_type->getName() + " from " @@ -1785,6 +1877,7 @@ public: } private: + ContextPtr context; mutable bool checked_return_type = false; mutable bool to_nullable = false; @@ -1884,19 +1977,27 @@ private: return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); } - bool done; + bool done = false; if constexpr (to_string_or_fixed_string) { done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{}); } else { - /// We should use ConvertFromStringExceptionMode::Null mode when converting from String (or FixedString) - /// to Nullable type, to avoid 'value is too short' error on attempt to parse empty string from NULL values. - if (to_nullable && WhichDataType(from_type).isStringOrFixedString()) - done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertReturnNullOnErrorTag{}); - else - done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{}); + bool cast_ipv4_ipv6_default_on_conversion_error = false; + if constexpr (is_any_of) + if (context && (cast_ipv4_ipv6_default_on_conversion_error = context->getSettingsRef().cast_ipv4_ipv6_default_on_conversion_error)) + done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertReturnZeroOnErrorTag{}); + + if (!cast_ipv4_ipv6_default_on_conversion_error) + { + /// We should use ConvertFromStringExceptionMode::Null mode when converting from String (or FixedString) + /// to Nullable type, to avoid 'value is too short' error on attempt to parse empty string from NULL values. + if (to_nullable && WhichDataType(from_type).isStringOrFixedString()) + done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertReturnNullOnErrorTag{}); + else + done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{}); + } } if (!done) @@ -2357,6 +2458,8 @@ struct NameToInt256 { static constexpr auto name = "toInt256"; }; struct NameToFloat32 { static constexpr auto name = "toFloat32"; }; struct NameToFloat64 { static constexpr auto name = "toFloat64"; }; struct NameToUUID { static constexpr auto name = "toUUID"; }; +struct NameToIPv4 { static constexpr auto name = "toIPv4"; }; +struct NameToIPv6 { static constexpr auto name = "toIPv6"; }; using FunctionToUInt8 = FunctionConvert>; using FunctionToUInt16 = FunctionConvert>; @@ -2378,6 +2481,8 @@ using FunctionToDateTime = FunctionConvert; using FunctionToDateTime64 = FunctionConvert; using FunctionToUUID = FunctionConvert>; +using FunctionToIPv4 = FunctionConvert>; +using FunctionToIPv6 = FunctionConvert>; using FunctionToString = FunctionConvert; using FunctionToUnixTimestamp = FunctionConvert>; using FunctionToDecimal32 = FunctionConvert, NameToDecimal32, UnknownMonotonicity>; @@ -2407,6 +2512,8 @@ template <> struct FunctionTo { using Type = FunctionToDate32; } template <> struct FunctionTo { using Type = FunctionToDateTime; }; template <> struct FunctionTo { using Type = FunctionToDateTime64; }; template <> struct FunctionTo { using Type = FunctionToUUID; }; +template <> struct FunctionTo { using Type = FunctionToIPv4; }; +template <> struct FunctionTo { using Type = FunctionToIPv6; }; template <> struct FunctionTo { using Type = FunctionToString; }; template <> struct FunctionTo { using Type = FunctionToFixedString; }; template <> struct FunctionTo> { using Type = FunctionToDecimal32; }; @@ -2442,6 +2549,8 @@ struct NameToDecimal64OrZero { static constexpr auto name = "toDecimal64OrZero"; struct NameToDecimal128OrZero { static constexpr auto name = "toDecimal128OrZero"; }; struct NameToDecimal256OrZero { static constexpr auto name = "toDecimal256OrZero"; }; struct NameToUUIDOrZero { static constexpr auto name = "toUUIDOrZero"; }; +struct NameToIPv4OrZero { static constexpr auto name = "toIPv4OrZero"; }; +struct NameToIPv6OrZero { static constexpr auto name = "toIPv6OrZero"; }; using FunctionToUInt8OrZero = FunctionConvertFromString; using FunctionToUInt16OrZero = FunctionConvertFromString; @@ -2466,6 +2575,8 @@ using FunctionToDecimal64OrZero = FunctionConvertFromString, NameToDecimal128OrZero, ConvertFromStringExceptionMode::Zero>; using FunctionToDecimal256OrZero = FunctionConvertFromString, NameToDecimal256OrZero, ConvertFromStringExceptionMode::Zero>; using FunctionToUUIDOrZero = FunctionConvertFromString; +using FunctionToIPv4OrZero = FunctionConvertFromString; +using FunctionToIPv6OrZero = FunctionConvertFromString; struct NameToUInt8OrNull { static constexpr auto name = "toUInt8OrNull"; }; struct NameToUInt16OrNull { static constexpr auto name = "toUInt16OrNull"; }; @@ -2490,6 +2601,8 @@ struct NameToDecimal64OrNull { static constexpr auto name = "toDecimal64OrNull"; struct NameToDecimal128OrNull { static constexpr auto name = "toDecimal128OrNull"; }; struct NameToDecimal256OrNull { static constexpr auto name = "toDecimal256OrNull"; }; struct NameToUUIDOrNull { static constexpr auto name = "toUUIDOrNull"; }; +struct NameToIPv4OrNull { static constexpr auto name = "toIPv4OrNull"; }; +struct NameToIPv6OrNull { static constexpr auto name = "toIPv6OrNull"; }; using FunctionToUInt8OrNull = FunctionConvertFromString; using FunctionToUInt16OrNull = FunctionConvertFromString; @@ -2514,6 +2627,8 @@ using FunctionToDecimal64OrNull = FunctionConvertFromString, NameToDecimal128OrNull, ConvertFromStringExceptionMode::Null>; using FunctionToDecimal256OrNull = FunctionConvertFromString, NameToDecimal256OrNull, ConvertFromStringExceptionMode::Null>; using FunctionToUUIDOrNull = FunctionConvertFromString; +using FunctionToIPv4OrNull = FunctionConvertFromString; +using FunctionToIPv6OrNull = FunctionConvertFromString; struct NameParseDateTimeBestEffort { static constexpr auto name = "parseDateTimeBestEffort"; }; struct NameParseDateTimeBestEffortOrZero { static constexpr auto name = "parseDateTimeBestEffortOrZero"; }; @@ -2642,17 +2757,17 @@ class FunctionCast final : public FunctionCastBase public: using WrapperType = std::function; - FunctionCast(const char * cast_name_ + FunctionCast(ContextPtr context_ + , const char * cast_name_ , MonotonicityForRange && monotonicity_for_range_ , const DataTypes & argument_types_ , const DataTypePtr & return_type_ , std::optional diagnostic_ - , CastType cast_type_ - , bool cast_ipv4_ipv6_default_on_conversion_error_) + , CastType cast_type_) : cast_name(cast_name_), monotonicity_for_range(std::move(monotonicity_for_range_)) , argument_types(argument_types_), return_type(return_type_), diagnostic(std::move(diagnostic_)) , cast_type(cast_type_) - , cast_ipv4_ipv6_default_on_conversion_error(cast_ipv4_ipv6_default_on_conversion_error_) + , context(context_) { } @@ -2699,7 +2814,7 @@ private: std::optional diagnostic; CastType cast_type; - bool cast_ipv4_ipv6_default_on_conversion_error; + ContextPtr context; static WrapperType createFunctionAdaptor(FunctionPtr function, const DataTypePtr & from_type) { @@ -2740,7 +2855,7 @@ private: } else if (!can_apply_accurate_cast) { - FunctionPtr function = FunctionTo::Type::create(); + FunctionPtr function = FunctionTo::Type::create(context); return createFunctionAdaptor(function, from_type); } @@ -3767,7 +3882,9 @@ private: std::is_same_v || std::is_same_v || std::is_same_v || - std::is_same_v) + std::is_same_v || + std::is_same_v || + std::is_same_v) { ret = createWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); return true; @@ -3801,9 +3918,11 @@ private: return false; }; - bool cast_ipv4_ipv6_default_on_conversion_error_value = cast_ipv4_ipv6_default_on_conversion_error; + bool cast_ipv4_ipv6_default_on_conversion_error_value = context && context->getSettingsRef().cast_ipv4_ipv6_default_on_conversion_error; + bool input_format_ipv4_default_on_conversion_error_value = context && context->getSettingsRef().input_format_ipv4_default_on_conversion_error; + bool input_format_ipv6_default_on_conversion_error_value = context && context->getSettingsRef().input_format_ipv6_default_on_conversion_error; - auto make_custom_serialization_wrapper = [&, cast_ipv4_ipv6_default_on_conversion_error_value](const auto & types) -> bool + auto make_custom_serialization_wrapper = [&, cast_ipv4_ipv6_default_on_conversion_error_value, input_format_ipv4_default_on_conversion_error_value, input_format_ipv6_default_on_conversion_error_value](const auto & types) -> bool { using Types = std::decay_t; using ToDataType = typename Types::RightType; @@ -3811,47 +3930,47 @@ private: if constexpr (WhichDataType(FromDataType::type_id).isStringOrFixedString()) { + if constexpr (std::is_same_v) + { + ret = [cast_ipv4_ipv6_default_on_conversion_error_value, input_format_ipv4_default_on_conversion_error_value, requested_result_is_nullable]( + ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t) + -> ColumnPtr + { + if (!WhichDataType(result_type).isIPv4()) + throw Exception(ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv4", result_type->getName()); + + const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; + if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv4_default_on_conversion_error_value || requested_result_is_nullable) + return convertToIPv4(arguments[0].column, null_map); + else + return convertToIPv4(arguments[0].column, null_map); + }; + + return true; + } + + if constexpr (std::is_same_v) + { + ret = [cast_ipv4_ipv6_default_on_conversion_error_value, input_format_ipv6_default_on_conversion_error_value, requested_result_is_nullable]( + ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t) + -> ColumnPtr + { + if (!WhichDataType(result_type).isIPv6()) + throw Exception( + ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv6", result_type->getName()); + + const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; + if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv6_default_on_conversion_error_value || requested_result_is_nullable) + return convertToIPv6(arguments[0].column, null_map); + else + return convertToIPv6(arguments[0].column, null_map); + }; + + return true; + } + if (to_type->getCustomSerialization() && to_type->getCustomName()) { - if (to_type->getCustomName()->getName() == "IPv4") - { - ret = [cast_ipv4_ipv6_default_on_conversion_error_value]( - ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t) - -> ColumnPtr - { - if (!WhichDataType(result_type).isUInt32()) - throw Exception(ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected UInt32", result_type->getName()); - - const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; - if (cast_ipv4_ipv6_default_on_conversion_error_value) - return convertToIPv4(arguments[0].column, null_map); - else - return convertToIPv4(arguments[0].column, null_map); - }; - - return true; - } - - if (to_type->getCustomName()->getName() == "IPv6") - { - ret = [cast_ipv4_ipv6_default_on_conversion_error_value]( - ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t) - -> ColumnPtr - { - if (!WhichDataType(result_type).isFixedString()) - throw Exception( - ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected FixedString", result_type->getName()); - - const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; - if (cast_ipv4_ipv6_default_on_conversion_error_value) - return convertToIPv6(arguments[0].column, null_map); - else - return convertToIPv6(arguments[0].column, null_map); - }; - - return true; - } - ret = &ConvertImplGenericFromString::execute; return true; } diff --git a/src/Functions/FunctionsExternalDictionaries.cpp b/src/Functions/FunctionsExternalDictionaries.cpp index 371b838635b..cdc229de99b 100644 --- a/src/Functions/FunctionsExternalDictionaries.cpp +++ b/src/Functions/FunctionsExternalDictionaries.cpp @@ -7,42 +7,113 @@ namespace DB REGISTER_FUNCTION(ExternalDictionaries) { - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction(); + const std::string dict_get_description { R"( +Retrieves values from a dictionary. + +Accepts 3 parameters: +-- name of the dictionary; +-- name of the column of the dictionary or tuple of column names; +-- key value - expression returning dictionary key-type value or tuple-type value - depending on the dictionary configuration; + +Returned value: value of the dictionary attribute parsed in the {} if key is found, otherwise element specified in the dictionary configuration. + +Throws an exception if cannot parse the value of the attribute or the value does not match the attribute data type. +)" }; + + const std::string dict_get_or_default_description { R"( +Retrieves values from a dictionary. + +Accepts 4 parameters: +-- name of the dictionary; +-- name of the column of the dictionary or tuple of column names; +-- key value - expression returning dictionary key-type value or tuple-type value - depending on the dictionary configuration; +-- default values returned if the dictionary does not contain a row with the key value; + +Returned value: value of the dictionary attribute parsed in the {} if key is found, otherwise default value. + +Throws an exception if cannot parse the value of the attribute or the value does not match the attribute data type. +)" }; + + const std::string dict_get_or_null_description { R"( +Retrieves values from a dictionary. + +Accepts 3 parameters: +-- name of the dictionary; +-- name of the column of the dictionary or tuple of column names; +-- key value - expression returning dictionary key-type value or tuple-type value - depending on the dictionary configuration; + +Returned value: value of the dictionary attribute parsed in the attribute’s data type if key is found, otherwise NULL. + +Throws an exception if cannot parse the value of the attribute or the value does not match the attribute data type. +)" }; + + factory.registerFunction>(Documentation{ fmt::format(fmt::runtime(dict_get_description), "attribute’s data type") }); + factory.registerFunction>(Documentation{ fmt::format(fmt::runtime(dict_get_or_default_description), "attribute’s data type") }); + factory.registerFunction(Documentation{ dict_get_or_null_description }); + + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_description), "UInt8") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_description), "UInt16") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_description), "UInt32") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_description), "UInt64") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_description), "Int8") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_description), "Int16") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_description), "Int32") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_description), "Int64") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_description), "Float32") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_description), "Float64") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_description), "Date") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_description), "DateTime") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_description), "UUID") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_description), "IPv4") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_description), "IPv6") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_description), "String") }); + + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_or_default_description), "UInt8") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_or_default_description), "UInt16") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_or_default_description), "UInt32") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_or_default_description), "UInt64") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_or_default_description), "Int8") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_or_default_description), "Int16") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_or_default_description), "Int32") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_or_default_description), "Int64") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_or_default_description), "Float32") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_or_default_description), "Float64") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_or_default_description), "Date") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_or_default_description), "DateTime") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_or_default_description), "UUID") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_or_default_description), "IPv4") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_or_default_description), "IPv6") }); + factory.registerFunction(Documentation{ fmt::format(fmt::runtime(dict_get_or_default_description), "String") }); + + factory.registerFunction(Documentation{ R"( +Checks whether a key is present in a dictionary. +Accepts 2 parameters: name of the dictionary, key value - expression returning dictionary key-type value or tuple-type value - depending on the dictionary configuration. +Returned value: 0 if there is no key, 1 if there is a key, type of UInt8 +)"}); + + factory.registerFunction(Documentation{ R"( +Creates an array, containing all the parents of a key in the hierarchical dictionary. +Accepts 2 parameters: name of the dictionary, key value - expression returning a UInt64-type value. +Returned value: parents for the key, type of Array(UInt64) +)"}); + + factory.registerFunction(Documentation{ R"( +Checks the ancestor of a key through the whole hierarchical chain in the dictionary. +Accepts 3 parameters: name of the dictionary, key to be checked - expression returning a UInt64-type value, alleged ancestor of the key - expression returning a UInt64-type. +Returned value: 0 if key is not a child of the ancestor, 1 if key is a child of the ancestor or if key is the ancestor, type of UInt8 +)"}); + + factory.registerFunction(Documentation{ R"( +Returns first-level children as an array of indexes. It is the inverse transformation for dictGetHierarchy. +Accepts 2 parameters: name of the dictionary, key value - expression returning a UInt64-type value. +Returned value: first-level descendants for the key, type of Array(UInt64) +)"}); + + factory.registerFunction(Documentation{ R"( +Returns all descendants as if dictGetChildren function was applied level times recursively. +Accepts 3 parameters: name of the dictionary, key value - expression returning a UInt64-type value, level — hierarchy level - If level = 0 returns all descendants to the end - UInt8 +Returned value: descendants for the key, type of Array(UInt64) +)"}); } } diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 4bc7a17cf4c..4a2fd50553b 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -716,6 +717,8 @@ struct NameDictGetFloat64 { static constexpr auto name = "dictGetFloat64"; }; struct NameDictGetDate { static constexpr auto name = "dictGetDate"; }; struct NameDictGetDateTime { static constexpr auto name = "dictGetDateTime"; }; struct NameDictGetUUID { static constexpr auto name = "dictGetUUID"; }; +struct NameDictGetIPv4 { static constexpr auto name = "dictGetIPv4"; }; +struct NameDictGetIPv6 { static constexpr auto name = "dictGetIPv6"; }; struct NameDictGetDecimal32 { static constexpr auto name = "dictGetDecimal32"; }; struct NameDictGetDecimal64 { static constexpr auto name = "dictGetDecimal64"; }; struct NameDictGetDecimal128 { static constexpr auto name = "dictGetDecimal128"; }; @@ -734,6 +737,8 @@ using FunctionDictGetFloat64 = FunctionDictGet; using FunctionDictGetDateTime = FunctionDictGet; using FunctionDictGetUUID = FunctionDictGet; +using FunctionDictGetIPv4 = FunctionDictGet; +using FunctionDictGetIPv6 = FunctionDictGet; using FunctionDictGetDecimal32 = FunctionDictGet, NameDictGetDecimal32>; using FunctionDictGetDecimal64 = FunctionDictGet, NameDictGetDecimal64>; using FunctionDictGetDecimal128 = FunctionDictGet, NameDictGetDecimal128>; @@ -755,6 +760,8 @@ struct NameDictGetFloat64OrDefault { static constexpr auto name = "dictGetFloat6 struct NameDictGetDateOrDefault { static constexpr auto name = "dictGetDateOrDefault"; }; struct NameDictGetDateTimeOrDefault { static constexpr auto name = "dictGetDateTimeOrDefault"; }; struct NameDictGetUUIDOrDefault { static constexpr auto name = "dictGetUUIDOrDefault"; }; +struct NameDictGetIPv4OrDefault { static constexpr auto name = "dictGetIPv4OrDefault"; }; +struct NameDictGetIPv6OrDefault { static constexpr auto name = "dictGetIPv6OrDefault"; }; struct NameDictGetDecimal32OrDefault { static constexpr auto name = "dictGetDecimal32OrDefault"; }; struct NameDictGetDecimal64OrDefault { static constexpr auto name = "dictGetDecimal64OrDefault"; }; struct NameDictGetDecimal128OrDefault { static constexpr auto name = "dictGetDecimal128OrDefault"; }; @@ -773,6 +780,8 @@ using FunctionDictGetFloat64OrDefault = FunctionDictGetOrDefault; using FunctionDictGetDateTimeOrDefault = FunctionDictGetOrDefault; using FunctionDictGetUUIDOrDefault = FunctionDictGetOrDefault; +using FunctionDictGetIPv4OrDefault = FunctionDictGetOrDefault; +using FunctionDictGetIPv6OrDefault = FunctionDictGetOrDefault; using FunctionDictGetDecimal32OrDefault = FunctionDictGetOrDefault, NameDictGetDecimal32OrDefault>; using FunctionDictGetDecimal64OrDefault = FunctionDictGetOrDefault, NameDictGetDecimal64OrDefault>; using FunctionDictGetDecimal128OrDefault = FunctionDictGetOrDefault, NameDictGetDecimal128OrDefault>; diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 2d1f0ea0c39..cadff178098 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -1136,6 +1136,8 @@ private: else if (which.isInt128()) executeBigIntType(icolumn, vec_to); else if (which.isInt256()) executeBigIntType(icolumn, vec_to); else if (which.isUUID()) executeBigIntType(icolumn, vec_to); + else if (which.isIPv4()) executeBigIntType(icolumn, vec_to); + else if (which.isIPv6()) executeBigIntType(icolumn, vec_to); else if (which.isEnum8()) executeIntType(icolumn, vec_to); else if (which.isEnum16()) executeIntType(icolumn, vec_to); else if (which.isDate()) executeIntType(icolumn, vec_to); diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp index 59224096d3c..3e43d8d34ee 100644 --- a/src/Functions/array/arrayElement.cpp +++ b/src/Functions/array/arrayElement.cpp @@ -927,7 +927,9 @@ static bool castColumnNumeric(const IColumn * column, F && f) ColumnVector, ColumnVector, ColumnVector, - ColumnVector + ColumnVector, + ColumnVector, + ColumnVector >(column, std::forward(f)); } diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index 1f527b7ec23..f71675dcbad 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -334,6 +335,8 @@ struct NameToDecimal64OrDefault { static constexpr auto name = "toDecimal64OrDef struct NameToDecimal128OrDefault { static constexpr auto name = "toDecimal128OrDefault"; }; struct NameToDecimal256OrDefault { static constexpr auto name = "toDecimal256OrDefault"; }; struct NameToUUIDOrDefault { static constexpr auto name = "toUUIDOrDefault"; }; +struct NameToIPv4OrDefault { static constexpr auto name = "toIPv4OrDefault"; }; +struct NameToIPv6OrDefault { static constexpr auto name = "toIPv6OrDefault"; }; using FunctionToUInt8OrDefault = FunctionCastOrDefaultTyped; using FunctionToUInt16OrDefault = FunctionCastOrDefaultTyped; @@ -362,6 +365,8 @@ using FunctionToDecimal128OrDefault = FunctionCastOrDefaultTyped, NameToDecimal256OrDefault>; using FunctionToUUIDOrDefault = FunctionCastOrDefaultTyped; +using FunctionToIPv4OrDefault = FunctionCastOrDefaultTyped; +using FunctionToIPv6OrDefault = FunctionCastOrDefaultTyped; REGISTER_FUNCTION(CastOrDefault) { @@ -394,6 +399,8 @@ REGISTER_FUNCTION(CastOrDefault) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/isIPAddressContainedIn.cpp b/src/Functions/isIPAddressContainedIn.cpp index 15dcdc584d8..23cac4dbef0 100644 --- a/src/Functions/isIPAddressContainedIn.cpp +++ b/src/Functions/isIPAddressContainedIn.cpp @@ -29,22 +29,17 @@ public: explicit IPAddressVariant(std::string_view address_str) { - /// IP address parser functions require that the input is - /// NULL-terminated so we need to copy it. - const auto address_str_copy = std::string(address_str); - UInt32 v4; - if (DB::parseIPv4(address_str_copy.c_str(), reinterpret_cast(&v4))) + if (DB::parseIPv4whole(address_str.begin(), address_str.end(), reinterpret_cast(&v4))) { addr = v4; } else { addr = IPv6AddrType(); - bool success = DB::parseIPv6(address_str_copy.c_str(), std::get(addr).data()); + bool success = DB::parseIPv6whole(address_str.begin(), address_str.end(), std::get(addr).data()); if (!success) - throw DB::Exception("Neither IPv4 nor IPv6 address: '" + address_str_copy + "'", - DB::ErrorCodes::CANNOT_PARSE_TEXT); + throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_TEXT, "Neither IPv4 nor IPv6 address: '{}'", address_str); } } diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 5b13f52e277..f69e3d5c7b3 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -11,6 +11,7 @@ #include +#include #include #include #include @@ -21,6 +22,7 @@ #include #include #include +#include #include #include @@ -54,6 +56,8 @@ namespace ErrorCodes extern const int CANNOT_PARSE_BOOL; extern const int CANNOT_PARSE_DATETIME; extern const int CANNOT_PARSE_UUID; + extern const int CANNOT_PARSE_IPV4; + extern const int CANNOT_PARSE_IPV6; extern const int CANNOT_READ_ARRAY_FROM_TEXT; extern const int CANNOT_PARSE_NUMBER; extern const int INCORRECT_DATA; @@ -844,6 +848,49 @@ inline bool tryReadUUIDText(UUID & uuid, ReadBuffer & buf) return readUUIDTextImpl(uuid, buf); } +template +inline ReturnType readIPv4TextImpl(IPv4 & ip, ReadBuffer & buf) +{ + if (parseIPv4(buf.position(), [&buf](){ return buf.eof(); }, reinterpret_cast(&ip.toUnderType()))) + return ReturnType(true); + + if constexpr (std::is_same_v) + throw ParsingException(std::string("Cannot parse IPv4 ").append(buf.position(), buf.available()), ErrorCodes::CANNOT_PARSE_IPV4); + else + return ReturnType(false); +} + +inline void readIPv4Text(IPv4 & ip, ReadBuffer & buf) +{ + return readIPv4TextImpl(ip, buf); +} + +inline bool tryReadIPv4Text(IPv4 & ip, ReadBuffer & buf) +{ + return readIPv4TextImpl(ip, buf); +} + +template +inline ReturnType readIPv6TextImpl(IPv6 & ip, ReadBuffer & buf) +{ + if (parseIPv6orIPv4(buf.position(), [&buf](){ return buf.eof(); }, reinterpret_cast(ip.toUnderType().items))) + return ReturnType(true); + + if constexpr (std::is_same_v) + throw ParsingException(std::string("Cannot parse IPv6 ").append(buf.position(), buf.available()), ErrorCodes::CANNOT_PARSE_IPV6); + else + return ReturnType(false); +} + +inline void readIPv6Text(IPv6 & ip, ReadBuffer & buf) +{ + return readIPv6TextImpl(ip, buf); +} + +inline bool tryReadIPv6Text(IPv6 & ip, ReadBuffer & buf) +{ + return readIPv6TextImpl(ip, buf); +} template inline T parse(const char * data, size_t size); @@ -1054,8 +1101,10 @@ inline void readBinary(bool & x, ReadBuffer & buf) } inline void readBinary(String & x, ReadBuffer & buf) { readStringBinary(x, buf); } +inline void readBinary(Int32 & x, ReadBuffer & buf) { readPODBinary(x, buf); } inline void readBinary(Int128 & x, ReadBuffer & buf) { readPODBinary(x, buf); } inline void readBinary(Int256 & x, ReadBuffer & buf) { readPODBinary(x, buf); } +inline void readBinary(UInt32 & x, ReadBuffer & buf) { readPODBinary(x, buf); } inline void readBinary(UInt128 & x, ReadBuffer & buf) { readPODBinary(x, buf); } inline void readBinary(UInt256 & x, ReadBuffer & buf) { readPODBinary(x, buf); } inline void readBinary(Decimal32 & x, ReadBuffer & buf) { readPODBinary(x, buf); } @@ -1111,6 +1160,8 @@ inline bool tryReadText(is_integer auto & x, ReadBuffer & buf) } inline bool tryReadText(UUID & x, ReadBuffer & buf) { return tryReadUUIDText(x, buf); } +inline bool tryReadText(IPv4 & x, ReadBuffer & buf) { return tryReadIPv4Text(x, buf); } +inline bool tryReadText(IPv6 & x, ReadBuffer & buf) { return tryReadIPv6Text(x, buf); } inline void readText(is_floating_point auto & x, ReadBuffer & buf) { readFloatText(x, buf); } @@ -1119,6 +1170,8 @@ inline void readText(LocalDate & x, ReadBuffer & buf) { readDateText(x, buf); } inline void readText(DayNum & x, ReadBuffer & buf) { readDateText(x, buf); } inline void readText(LocalDateTime & x, ReadBuffer & buf) { readDateTimeText(x, buf); } inline void readText(UUID & x, ReadBuffer & buf) { readUUIDText(x, buf); } +inline void readText(IPv4 & x, ReadBuffer & buf) { readIPv4Text(x, buf); } +inline void readText(IPv6 & x, ReadBuffer & buf) { readIPv6Text(x, buf); } /// Generic methods to read value in text format, /// possibly in single quotes (only for data types that use quotes in VALUES format of INSERT statement in SQL). @@ -1149,6 +1202,19 @@ inline void readQuoted(UUID & x, ReadBuffer & buf) assertChar('\'', buf); } +inline void readQuoted(IPv4 & x, ReadBuffer & buf) +{ + assertChar('\'', buf); + readIPv4Text(x, buf); + assertChar('\'', buf); +} + +inline void readQuoted(IPv6 & x, ReadBuffer & buf) +{ + assertChar('\'', buf); + readIPv6Text(x, buf); + assertChar('\'', buf); +} /// Same as above, but in double quotes. template @@ -1201,6 +1267,8 @@ inline void readCSV(LocalDate & x, ReadBuffer & buf) { readCSVSimple(x, buf); } inline void readCSV(DayNum & x, ReadBuffer & buf) { readCSVSimple(x, buf); } inline void readCSV(LocalDateTime & x, ReadBuffer & buf) { readCSVSimple(x, buf); } inline void readCSV(UUID & x, ReadBuffer & buf) { readCSVSimple(x, buf); } +inline void readCSV(IPv4 & x, ReadBuffer & buf) { readCSVSimple(x, buf); } +inline void readCSV(IPv6 & x, ReadBuffer & buf) { readCSVSimple(x, buf); } inline void readCSV(UInt128 & x, ReadBuffer & buf) { readCSVSimple(x, buf); } inline void readCSV(Int128 & x, ReadBuffer & buf) { readCSVSimple(x, buf); } inline void readCSV(UInt256 & x, ReadBuffer & buf) { readCSVSimple(x, buf); } diff --git a/src/IO/WriteHelpers.cpp b/src/IO/WriteHelpers.cpp index a9788505995..caeea0a82a3 100644 --- a/src/IO/WriteHelpers.cpp +++ b/src/IO/WriteHelpers.cpp @@ -1,5 +1,7 @@ #include #include +#include +#include #include @@ -34,6 +36,29 @@ void formatUUID(std::reverse_iterator src16, UInt8 * dst36) formatHex(src16 + 2, &dst36[24], 6); } +void writeIPv4Text(const IPv4 & ip, WriteBuffer & buf) +{ + size_t idx = (ip >> 24); + buf.write(one_byte_to_string_lookup_table[idx].first, one_byte_to_string_lookup_table[idx].second); + buf.write('.'); + idx = (ip >> 16) & 0xFF; + buf.write(one_byte_to_string_lookup_table[idx].first, one_byte_to_string_lookup_table[idx].second); + buf.write('.'); + idx = (ip >> 8) & 0xFF; + buf.write(one_byte_to_string_lookup_table[idx].first, one_byte_to_string_lookup_table[idx].second); + buf.write('.'); + idx = ip & 0xFF; + buf.write(one_byte_to_string_lookup_table[idx].first, one_byte_to_string_lookup_table[idx].second); +} + +void writeIPv6Text(const IPv6 & ip, WriteBuffer & buf) +{ + char addr[IPV6_MAX_TEXT_LENGTH + 1] {}; + char * paddr = addr; + + formatIPv6(reinterpret_cast(&ip), paddr); + buf.write(addr, paddr - addr - 1); +} void writeException(const Exception & e, WriteBuffer & buf, bool with_stack_trace) { diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 8dbfe63be7e..a16381f7fe4 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -633,6 +634,9 @@ inline void writeUUIDText(const UUID & uuid, WriteBuffer & buf) buf.write(s, sizeof(s)); } +void writeIPv4Text(const IPv4 & ip, WriteBuffer & buf); +void writeIPv6Text(const IPv6 & ip, WriteBuffer & buf); + template inline void writeDateTime64FractionalText(typename DecimalType::NativeType fractional, UInt32 scale, WriteBuffer & buf) { @@ -858,6 +862,8 @@ inline void writeBinary(const Decimal256 & x, WriteBuffer & buf) { writePODBinar inline void writeBinary(const LocalDate & x, WriteBuffer & buf) { writePODBinary(x, buf); } inline void writeBinary(const LocalDateTime & x, WriteBuffer & buf) { writePODBinary(x, buf); } inline void writeBinary(const UUID & x, WriteBuffer & buf) { writePODBinary(x, buf); } +inline void writeBinary(const IPv4 & x, WriteBuffer & buf) { writePODBinary(x, buf); } +inline void writeBinary(const IPv6 & x, WriteBuffer & buf) { writePODBinary(x, buf); } /// Methods for outputting the value in text form for a tab-separated format. @@ -881,6 +887,8 @@ inline void writeText(const DayNum & x, WriteBuffer & buf) { writeDateText(Local inline void writeText(const LocalDate & x, WriteBuffer & buf) { writeDateText(x, buf); } inline void writeText(const LocalDateTime & x, WriteBuffer & buf) { writeDateTimeText(x, buf); } inline void writeText(const UUID & x, WriteBuffer & buf) { writeUUIDText(x, buf); } +inline void writeText(const IPv4 & x, WriteBuffer & buf) { writeIPv4Text(x, buf); } +inline void writeText(const IPv6 & x, WriteBuffer & buf) { writeIPv6Text(x, buf); } template void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros) @@ -999,6 +1007,19 @@ inline void writeQuoted(const UUID & x, WriteBuffer & buf) writeChar('\'', buf); } +inline void writeQuoted(const IPv4 & x, WriteBuffer & buf) +{ + writeChar('\'', buf); + writeText(x, buf); + writeChar('\'', buf); +} + +inline void writeQuoted(const IPv6 & x, WriteBuffer & buf) +{ + writeChar('\'', buf); + writeText(x, buf); + writeChar('\'', buf); +} /// String, date, datetime are in double quotes with C-style escaping. Numbers - without. template @@ -1032,6 +1053,19 @@ inline void writeDoubleQuoted(const UUID & x, WriteBuffer & buf) writeChar('"', buf); } +inline void writeDoubleQuoted(const IPv4 & x, WriteBuffer & buf) +{ + writeChar('"', buf); + writeText(x, buf); + writeChar('"', buf); +} + +inline void writeDoubleQuoted(const IPv6 & x, WriteBuffer & buf) +{ + writeChar('"', buf); + writeText(x, buf); + writeChar('"', buf); +} /// String - in double quotes and with CSV-escaping; date, datetime - in double quotes. Numbers - without. template @@ -1042,6 +1076,8 @@ inline void writeCSV(const String & x, WriteBuffer & buf) { writeCSVString<>(x, inline void writeCSV(const LocalDate & x, WriteBuffer & buf) { writeDoubleQuoted(x, buf); } inline void writeCSV(const LocalDateTime & x, WriteBuffer & buf) { writeDoubleQuoted(x, buf); } inline void writeCSV(const UUID & x, WriteBuffer & buf) { writeDoubleQuoted(x, buf); } +inline void writeCSV(const IPv4 & x, WriteBuffer & buf) { writeDoubleQuoted(x, buf); } +inline void writeCSV(const IPv6 & x, WriteBuffer & buf) { writeDoubleQuoted(x, buf); } template void writeBinary(const std::vector & x, WriteBuffer & buf) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 3b4d2dd1dd4..9968c43cc9d 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -33,6 +33,35 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +namespace +{ + +std::pair getFunctionArguments(const ActionsDAG::NodeRawConstPtrs & children) +{ + size_t num_arguments = children.size(); + + bool all_const = true; + ColumnsWithTypeAndName arguments(num_arguments); + + for (size_t i = 0; i < num_arguments; ++i) + { + const auto & child = *children[i]; + + ColumnWithTypeAndName argument; + argument.column = child.column; + argument.type = child.result_type; + argument.name = child.result_name; + + if (!argument.column || !isColumnConst(*argument.column)) + all_const = false; + + arguments[i] = std::move(argument); + } + return { std::move(arguments), all_const }; +} + +} + void ActionsDAG::Node::toTree(JSONBuilder::JSONMap & map) const { map.add("Node Type", magic_enum::enum_name(type)); @@ -161,85 +190,38 @@ const ActionsDAG::Node & ActionsDAG::addFunction( NodeRawConstPtrs children, std::string result_name) { - size_t num_arguments = children.size(); + auto [arguments, all_const] = getFunctionArguments(children); - Node node; - node.type = ActionType::FUNCTION; - node.children = std::move(children); - - bool all_const = true; - ColumnsWithTypeAndName arguments(num_arguments); - - for (size_t i = 0; i < num_arguments; ++i) - { - const auto & child = *node.children[i]; - - ColumnWithTypeAndName argument; - argument.column = child.column; - argument.type = child.result_type; - argument.name = child.result_name; - - if (!argument.column || !isColumnConst(*argument.column)) - all_const = false; - - arguments[i] = std::move(argument); - } - - node.function_base = function->build(arguments); - node.result_type = node.function_base->getResultType(); - node.function = node.function_base->prepare(arguments); - node.is_deterministic = node.function_base->isDeterministic(); - - /// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function. - if (node.function_base->isSuitableForConstantFolding()) - { - ColumnPtr column; - - if (all_const) - { - size_t num_rows = arguments.empty() ? 0 : arguments.front().column->size(); - column = node.function->execute(arguments, node.result_type, num_rows, true); - } - else - { - column = node.function_base->getConstantResultForNonConstArguments(arguments, node.result_type); - } - - /// If the result is not a constant, just in case, we will consider the result as unknown. - if (column && isColumnConst(*column)) - { - /// All constant (literal) columns in block are added with size 1. - /// But if there was no columns in block before executing a function, the result has size 0. - /// Change the size to 1. - - if (column->empty()) - column = column->cloneResized(1); - - node.column = std::move(column); - } - } - - if (result_name.empty()) - { - result_name = function->getName() + "("; - for (size_t i = 0; i < num_arguments; ++i) - { - if (i) - result_name += ", "; - result_name += node.children[i]->result_name; - } - result_name += ")"; - } - - node.result_name = std::move(result_name); - - return addNode(std::move(node)); + auto function_base = function->build(arguments); + return addFunctionImpl( + function_base, + std::move(children), + std::move(arguments), + std::move(result_name), + all_const); } const ActionsDAG::Node & ActionsDAG::addFunction( const FunctionBasePtr & function_base, NodeRawConstPtrs children, std::string result_name) +{ + auto [arguments, all_const] = getFunctionArguments(children); + + return addFunctionImpl( + function_base, + std::move(children), + std::move(arguments), + std::move(result_name), + all_const); +} + +const ActionsDAG::Node & ActionsDAG::addFunctionImpl( + const FunctionBasePtr & function_base, + NodeRawConstPtrs children, + ColumnsWithTypeAndName arguments, + std::string result_name, + bool all_const) { size_t num_arguments = children.size(); @@ -247,24 +229,6 @@ const ActionsDAG::Node & ActionsDAG::addFunction( node.type = ActionType::FUNCTION; node.children = std::move(children); - bool all_const = true; - ColumnsWithTypeAndName arguments(num_arguments); - - for (size_t i = 0; i < num_arguments; ++i) - { - const auto & child = *node.children[i]; - - ColumnWithTypeAndName argument; - argument.column = child.column; - argument.type = child.result_type; - argument.name = child.result_name; - - if (!argument.column || !isColumnConst(*argument.column)) - all_const = false; - - arguments[i] = std::move(argument); - } - node.function_base = function_base; node.result_type = node.function_base->getResultType(); node.function = node.function_base->prepare(arguments); @@ -640,9 +604,15 @@ Block ActionsDAG::updateHeader(Block header) const arguments[i] = node_to_column[node->children[i]]; if (!arguments[i].column) throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, - "Not found column {} in block", node->children[i]->result_name); + "Not found column {} in block {}", node->children[i]->result_name, + header.dumpStructure()); } + if (node->type == ActionsDAG::ActionType::INPUT) + throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, + "Not found column {} in block {}", + node->result_name, header.dumpStructure()); + node_to_column[node] = executeActionForHeader(node, std::move(arguments)); } } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index f574757abac..a26694e00f5 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -350,6 +351,13 @@ private: Node & addNode(Node node); + const Node & addFunctionImpl( + const FunctionBasePtr & function_base, + NodeRawConstPtrs children, + ColumnsWithTypeAndName arguments, + std::string result_name, + bool all_const); + #if USE_EMBEDDED_COMPILER void compileFunctions(size_t min_count_to_compile_expression, const std::unordered_set & lazy_executed_nodes = {}); #endif diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index e57016d969a..c67de14a189 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -249,6 +249,25 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID /// Already in needed type. return src; } + else if (which_type.isIPv4() && src.getType() == Field::Types::IPv4) + { + /// Already in needed type. + return src; + } + else if (which_type.isIPv6()) + { + /// Already in needed type. + if (src.getType() == Field::Types::IPv6) + return src; + /// Treat FixedString(16) as a binary representation of IPv6 + if (which_from_type.isFixedString() && assert_cast(from_type_hint)->getN() == IPV6_BINARY_LENGTH) + { + const auto col = type.createColumn(); + ReadBufferFromString in_buffer(src.get()); + type.getDefaultSerialization()->deserializeBinary(*col, in_buffer, {}); + return (*col)[0]; + } + } else if (which_type.isStringOrFixedString()) { if (src.getType() == Field::Types::String) diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 6b2de30722c..a0e8c4687c6 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -350,7 +350,7 @@ void Planner::buildQueryPlanIfNeeded() auto function_node = std::make_shared("and"); auto and_function = FunctionFactory::instance().get("and", query_context); function_node->getArguments().getNodes() = {query_node.getPrewhere(), query_node.getWhere()}; - function_node->resolveAsFunction(and_function->build(function_node->getArgumentTypes())); + function_node->resolveAsFunction(and_function->build(function_node->getArgumentColumns())); query_node.getWhere() = std::move(function_node); query_node.getPrewhere() = {}; } diff --git a/src/Processors/Formats/IRowInputFormat.cpp b/src/Processors/Formats/IRowInputFormat.cpp index 6f153019df5..f7374111a30 100644 --- a/src/Processors/Formats/IRowInputFormat.cpp +++ b/src/Processors/Formats/IRowInputFormat.cpp @@ -26,6 +26,8 @@ namespace ErrorCodes extern const int ARGUMENT_OUT_OF_BOUND; extern const int INCORRECT_DATA; extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING; + extern const int CANNOT_PARSE_IPV4; + extern const int CANNOT_PARSE_IPV6; } @@ -44,7 +46,9 @@ bool isParseError(int code) || code == ErrorCodes::TOO_LARGE_STRING_SIZE || code == ErrorCodes::ARGUMENT_OUT_OF_BOUND /// For Decimals || code == ErrorCodes::INCORRECT_DATA /// For some ReadHelpers - || code == ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING; + || code == ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING + || code == ErrorCodes::CANNOT_PARSE_IPV4 + || code == ErrorCodes::CANNOT_PARSE_IPV6; } IRowInputFormat::IRowInputFormat(Block header, ReadBuffer & in_, Params params_) diff --git a/src/Processors/Transforms/AddingDefaultsTransform.cpp b/src/Processors/Transforms/AddingDefaultsTransform.cpp index f55d4d88ae8..64bdf663d0f 100644 --- a/src/Processors/Transforms/AddingDefaultsTransform.cpp +++ b/src/Processors/Transforms/AddingDefaultsTransform.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 7856659c6a7..46abfa2a59a 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -63,6 +63,9 @@ namespace ErrorCodes extern const int CANNOT_PARSE_DATE; extern const int CANNOT_PARSE_DATETIME; extern const int CANNOT_PARSE_NUMBER; + extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING; + extern const int CANNOT_PARSE_IPV4; + extern const int CANNOT_PARSE_IPV6; extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; extern const int CANNOT_OPEN_FILE; extern const int CANNOT_COMPILE_REGEXP; @@ -188,6 +191,9 @@ static Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int excepti exception_code == ErrorCodes::CANNOT_PARSE_DATE || exception_code == ErrorCodes::CANNOT_PARSE_DATETIME || exception_code == ErrorCodes::CANNOT_PARSE_NUMBER || + exception_code == ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING || + exception_code == ErrorCodes::CANNOT_PARSE_IPV4 || + exception_code == ErrorCodes::CANNOT_PARSE_IPV6 || exception_code == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED || exception_code == ErrorCodes::UNKNOWN_ELEMENT_IN_AST || exception_code == ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE || diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 5d0c3fc3cad..2340bdd99b2 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -613,7 +613,7 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown( } } - const auto & func = inverted_dag.addFunction(FunctionFactory::instance().get(node.function_base->getName(), context), children, ""); + const auto & func = inverted_dag.addFunction(node.function_base, children, ""); to_inverted[&node] = &func; return func; } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 30d0570ff11..d842afc4151 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5728,6 +5728,15 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( bool need_primary_key_max_column = false; const auto & primary_key_max_column_name = metadata_snapshot->minmax_count_projection->primary_key_max_column_name; NameSet required_columns_set(required_columns.begin(), required_columns.end()); + + if (required_columns_set.contains("_partition_value") && !typeid_cast(getPartitionValueType().get())) + { + throw Exception( + ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, + "Missing column `_partition_value` because there is no partition column in table {}", + getStorageID().getTableName()); + } + if (!primary_key_max_column_name.empty()) need_primary_key_max_column = required_columns_set.contains(primary_key_max_column_name); diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index e7fdf1617f0..0fd081a8425 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -85,6 +85,18 @@ namespace { operator()(x.toUnderType()); } + void operator() (const IPv4 & x) const + { + UInt8 type = Field::Types::IPv4; + hash.update(type); + hash.update(x); + } + void operator() (const IPv6 & x) const + { + UInt8 type = Field::Types::IPv6; + hash.update(type); + hash.update(x); + } void operator() (const Float64 & x) const { UInt8 type = Field::Types::Float64; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 028d6dd8caa..e096811e5b9 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -44,7 +44,6 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; extern const int ILLEGAL_PREWHERE; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; @@ -653,7 +652,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( /// Subordinary tables could have different but convertible types, like numeric types of different width. /// We must return streams with structure equals to structure of Merge table. - convertingSourceStream(header, storage_snapshot->metadata, aliases, modified_context, modified_query_info.query, *builder, processed_stage); + convertingSourceStream(header, storage_snapshot->metadata, aliases, modified_context, *builder); } return builder; @@ -829,9 +828,7 @@ void ReadFromMerge::convertingSourceStream( const StorageMetadataPtr & metadata_snapshot, const Aliases & aliases, ContextPtr local_context, - ASTPtr & query, - QueryPipelineBuilder & builder, - QueryProcessingStage::Enum processed_stage) + QueryPipelineBuilder & builder) { Block before_block_header = builder.getHeader(); @@ -868,39 +865,6 @@ void ReadFromMerge::convertingSourceStream( return std::make_shared(stream_header, actions); }); } - - auto where_expression = query->as()->where(); - - if (!where_expression) - return; - - if (processed_stage > QueryProcessingStage::FetchColumns) - { - for (size_t column_index : collections::range(0, header.columns())) - { - ColumnWithTypeAndName header_column = header.getByPosition(column_index); - ColumnWithTypeAndName before_column = before_block_header.getByName(header_column.name); - /// If the processed_stage greater than FetchColumns and the block structure between streams is different. - /// the where expression maybe invalid because of ConvertingTransform. - /// So we need to throw exception. - if (!header_column.type->equals(*before_column.type.get())) - { - NamesAndTypesList source_columns = metadata_snapshot->getSampleBlock().getNamesAndTypesList(); - auto virtual_column = *storage_merge->getVirtuals().tryGetByName("_table"); - source_columns.emplace_back(NameAndTypePair{virtual_column.name, virtual_column.type}); - auto syntax_result = TreeRewriter(local_context).analyze(where_expression, source_columns); - ExpressionActionsPtr actions = ExpressionAnalyzer{where_expression, syntax_result, local_context}.getActions(false, false); - Names required_columns = actions->getRequiredColumns(); - - for (const auto & required_column : required_columns) - { - if (required_column == header_column.name) - throw Exception("Block structure mismatch in Merge Storage: different types:\n" + before_block_header.dumpStructure() - + "\n" + header.dumpStructure(), ErrorCodes::LOGICAL_ERROR); - } - } - } - } } IStorage::ColumnSizeByName StorageMerge::getColumnSizes() const diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 54f3999514d..d296eeea421 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -193,10 +193,10 @@ private: size_t streams_num, bool concat_streams = false); - void convertingSourceStream( + static void convertingSourceStream( const Block & header, const StorageMetadataPtr & metadata_snapshot, const Aliases & aliases, - ContextPtr context, ASTPtr & query, - QueryPipelineBuilder & builder, QueryProcessingStage::Enum processed_stage); + ContextPtr context, + QueryPipelineBuilder & builder); }; } diff --git a/tests/ci/tests/docker_images.json b/tests/ci/tests/docker_images.json index 8c13c760168..30fa1416f9e 100644 --- a/tests/ci/tests/docker_images.json +++ b/tests/ci/tests/docker_images.json @@ -65,10 +65,6 @@ "name": "clickhouse/integration-tests-runner", "dependent": [] }, - "docker/test/testflows/runner": { - "name": "clickhouse/testflows-runner", - "dependent": [] - }, "docker/test/fasttest": { "name": "clickhouse/fasttest", "dependent": [] diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 5a9f8b20254..7b53e033c9d 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3225,6 +3225,40 @@ class ClickHouseInstance: database=database, ) + def query_and_get_error_with_retry( + self, + sql, + stdin=None, + timeout=None, + settings=None, + user=None, + password=None, + database=None, + retry_count=20, + sleep_time=0.5, + ): + logging.debug(f"Executing query {sql} on {self.name}") + result = None + for i in range(retry_count): + try: + result = self.client.query_and_get_error( + sql, + stdin=stdin, + timeout=timeout, + settings=settings, + user=user, + password=password, + database=database, + ) + time.sleep(sleep_time) + except QueryRuntimeException as ex: + logging.debug("Retry {} got exception {}".format(i + 1, ex)) + time.sleep(sleep_time) + + if result is not None: + return result + raise Exception("Query {sql} did not fail".format(sql)) + # The same as query_and_get_error but ignores successful query. def query_and_get_answer_with_error( self, diff --git a/tests/integration/helpers/mock_servers.py b/tests/integration/helpers/mock_servers.py new file mode 100644 index 00000000000..ea1f4e07a9f --- /dev/null +++ b/tests/integration/helpers/mock_servers.py @@ -0,0 +1,67 @@ +import logging +import os +import time + + +# Starts simple HTTP servers written in Python. +# Parameters: +# `script_dir` contains a path to the directory containing server scripts. +# `mocks` is a list of tuples (server_script, container_name, port), where +# `server_script` is a name of a python file inside `script_dir`, +# `container_name` is usually "resolver" (see docker/test/integration/resolver) +def start_mock_servers(cluster, script_dir, mocks, timeout=100): + server_names = [mock[0] for mock in mocks] + server_names_with_desc = ( + f"{'server' if len(server_names) == 1 else 'servers'} {','.join(server_names)}" + ) + logging.info(f"Starting mock {server_names_with_desc}") + + start_time = time.time() + mocks_to_check = {} + + for server_name, container, port in mocks: + filepath = os.path.join(script_dir, server_name) + container_id = cluster.get_container_id(container) + mocks_to_check[server_name] = (container_id, port) + + cluster.copy_file_to_container( + container_id, + filepath, + server_name, + ) + + cluster.exec_in_container( + container_id, + ["python", server_name, str(port)], + detach=True, + ) + + # Wait for the server to start. + attempt = 1 + while mocks_to_check: + for server_name in list(mocks_to_check.keys()): + container_id, port = mocks_to_check[server_name] + + ping_response = cluster.exec_in_container( + container_id, + ["curl", "-s", f"http://localhost:{port}/"], + nothrow=True, + ) + + if ping_response == "OK": + logging.debug( + f"{server_name} answered {ping_response} on attempt {attempt}" + ) + del mocks_to_check[server_name] + elif time.time() - start_time > timeout: + assert ( + ping_response == "OK" + ), 'Expected "OK", but got "{}" from {}'.format( + ping_response, server_name + ) + + if mocks_to_check: + time.sleep(1) + attempt += 1 + + logging.info(f"Mock {server_names_with_desc} started") diff --git a/tests/integration/test_backward_compatibility/test_ip_types_binary_compatibility.py b/tests/integration/test_backward_compatibility/test_ip_types_binary_compatibility.py new file mode 100644 index 00000000000..bb40dff27ac --- /dev/null +++ b/tests/integration/test_backward_compatibility/test_ip_types_binary_compatibility.py @@ -0,0 +1,41 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +# Version 21.6.3.14 has incompatible partition id for tables with UUID in partition key. +node_22_6 = cluster.add_instance( + "node_22_6", + image="clickhouse/clickhouse-server", + tag="22.6", + stay_alive=True, + with_installed_binary=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_ip_types_binary_compatibility(start_cluster): + node_22_6.query( + "create table tab (ipv4 IPv4, ipv6 IPv6) engine = MergeTree order by tuple()" + ) + node_22_6.query( + "insert into tab values ('123.231.213.132', '0123:4567:89ab:cdef:fedc:ba98:7654:3210')" + ) + res_22_6 = node_22_6.query("select * from tab") + + node_22_6.restart_with_latest_version() + + res_latest = node_22_6.query("select * from tab") + + assert res_22_6 == res_latest + + node_22_6.query("drop table tab") diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index 8c7e958bbd9..a1bc0d42a46 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -362,44 +362,46 @@ def test_progress(): "SELECT number, sleep(0.31) FROM numbers(8) SETTINGS max_block_size=2, interactive_delay=100000", stream_output=True, ) + results = list(results) for result in results: result.time_zone = "" result.query_id = "" # print(results) - assert ( - str(results) - == """[output_format: "TabSeparated" -progress { - read_rows: 2 - read_bytes: 16 - total_rows_to_read: 8 -} -, output: "0\\t0\\n1\\t0" -, progress { - read_rows: 2 - read_bytes: 16 -} -, output: "\\n2\\t0\\n3\\t0" -, progress { - read_rows: 2 - read_bytes: 16 -} -, output: "\\n4\\t0\\n5\\t0" -, progress { - read_rows: 2 - read_bytes: 16 -} -, output: "\\n6\\t0\\n7\\t0" -, output: "\\n" -stats { - rows: 8 - blocks: 4 - allocated_bytes: 1092 - applied_limit: true - rows_before_limit: 8 -} -]""" - ) + + # Note: We can't convert those messages to string like `results = str(results)` and then compare it as a string + # because str() can serialize a protobuf message with any order of fields. + expected_results = [ + clickhouse_grpc_pb2.Result( + output_format="TabSeparated", + progress=clickhouse_grpc_pb2.Progress( + read_rows=2, read_bytes=16, total_rows_to_read=8 + ), + ), + clickhouse_grpc_pb2.Result(output=b"0\t0\n1\t0\n"), + clickhouse_grpc_pb2.Result( + progress=clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16) + ), + clickhouse_grpc_pb2.Result(output=b"2\t0\n3\t0\n"), + clickhouse_grpc_pb2.Result( + progress=clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16) + ), + clickhouse_grpc_pb2.Result(output=b"4\t0\n5\t0\n"), + clickhouse_grpc_pb2.Result( + progress=clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16) + ), + clickhouse_grpc_pb2.Result(output=b"6\t0\n7\t0\n"), + clickhouse_grpc_pb2.Result( + stats=clickhouse_grpc_pb2.Stats( + rows=8, + blocks=4, + allocated_bytes=1092, + applied_limit=True, + rows_before_limit=8, + ) + ), + ] + + assert results == expected_results def test_session_settings(): diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index ec59d6f9cb2..f0f81100320 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -4,6 +4,7 @@ import os import pytest from helpers.cluster import ClickHouseCluster +from helpers.mock_servers import start_mock_servers from helpers.utility import generate_values, replace_config, SafeThread from helpers.wait_for_helpers import wait_for_delete_inactive_parts from helpers.wait_for_helpers import wait_for_delete_empty_parts @@ -79,46 +80,15 @@ def create_table(node, table_name, **additional_settings): def run_s3_mocks(cluster): - logging.info("Starting s3 mocks") - mocks = ( - ("unstable_proxy.py", "resolver", "8081"), - ("no_delete_objects.py", "resolver", "8082"), + script_dir = os.path.join(os.path.dirname(__file__), "s3_mocks") + start_mock_servers( + cluster, + script_dir, + [ + ("unstable_proxy.py", "resolver", "8081"), + ("no_delete_objects.py", "resolver", "8082"), + ], ) - for mock_filename, container, port in mocks: - container_id = cluster.get_container_id(container) - current_dir = os.path.dirname(__file__) - cluster.copy_file_to_container( - container_id, - os.path.join(current_dir, "s3_mocks", mock_filename), - mock_filename, - ) - cluster.exec_in_container( - container_id, ["python", mock_filename, port], detach=True - ) - - # Wait for S3 mocks to start - for mock_filename, container, port in mocks: - num_attempts = 100 - for attempt in range(num_attempts): - ping_response = cluster.exec_in_container( - cluster.get_container_id(container), - ["curl", "-s", f"http://localhost:{port}/"], - nothrow=True, - ) - if ping_response != "OK": - if attempt == num_attempts - 1: - assert ( - ping_response == "OK" - ), f'Expected "OK", but got "{ping_response}"' - else: - time.sleep(1) - else: - logging.debug( - f"mock {mock_filename} ({port}) answered {ping_response} on attempt {attempt}" - ) - break - - logging.info("S3 mocks started") def wait_for_delete_s3_objects(cluster, expected, timeout=30): diff --git a/tests/integration/test_replicated_users/test.py b/tests/integration/test_replicated_users/test.py index 4fb2408c46f..1c73fc19c01 100644 --- a/tests/integration/test_replicated_users/test.py +++ b/tests/integration/test_replicated_users/test.py @@ -58,7 +58,7 @@ def test_create_replicated(started_cluster, entity): node1.query(f"CREATE {entity.keyword} {entity.name} {entity.options}") assert ( f"cannot insert because {entity.keyword.lower()} `{entity.name}{entity.options}` already exists in replicated" - in node2.query_and_get_error( + in node2.query_and_get_error_with_retry( f"CREATE {entity.keyword} {entity.name} {entity.options}" ) ) @@ -68,7 +68,7 @@ def test_create_replicated(started_cluster, entity): @pytest.mark.parametrize("entity", entities, ids=get_entity_id) def test_create_and_delete_replicated(started_cluster, entity): node1.query(f"CREATE {entity.keyword} {entity.name} {entity.options}") - node2.query(f"DROP {entity.keyword} {entity.name} {entity.options}") + node2.query_with_retry(f"DROP {entity.keyword} {entity.name} {entity.options}") @pytest.mark.parametrize("entity", entities, ids=get_entity_id) @@ -93,7 +93,7 @@ def test_create_replicated_if_not_exists_on_cluster(started_cluster, entity): @pytest.mark.parametrize("entity", entities, ids=get_entity_id) def test_rename_replicated(started_cluster, entity): node1.query(f"CREATE {entity.keyword} {entity.name} {entity.options}") - node2.query( + node2.query_with_retry( f"ALTER {entity.keyword} {entity.name} {entity.options} RENAME TO {entity.name}2" ) node1.query(f"DROP {entity.keyword} {entity.name}2 {entity.options}") diff --git a/tests/integration/test_s3_ec2_metadata/ec2_metadata_server/request_response_server.py b/tests/integration/test_s3_ec2_metadata/ec2_metadata_server/request_response_server.py deleted file mode 100644 index f347866be58..00000000000 --- a/tests/integration/test_s3_ec2_metadata/ec2_metadata_server/request_response_server.py +++ /dev/null @@ -1,36 +0,0 @@ -import http.server -import sys - - -class RequestHandler(http.server.BaseHTTPRequestHandler): - def get_response(self): - if self.path == "/": - return "OK" - elif self.path == "/latest/meta-data/iam/security-credentials": - return "myrole" - elif self.path == "/latest/meta-data/iam/security-credentials/myrole": - return '{ "Code" : "Success", "Type" : "AWS-HMAC", "AccessKeyId" : "minio", "SecretAccessKey" : "minio123" }' - else: - return None - - def do_HEAD(self): - response = self.get_response() - if response: - self.send_response(200) - self.send_header("Content-Type", "text/plain") - self.send_header("Content-Length", len(response.encode())) - self.end_headers() - else: - self.send_response(404) - self.send_header("Content-Type", "text/plain") - self.end_headers() - - def do_GET(self): - self.do_HEAD() - response = self.get_response() - if response: - self.wfile.write(response.encode()) - - -httpd = http.server.HTTPServer(("0.0.0.0", int(sys.argv[1])), RequestHandler) -httpd.serve_forever() diff --git a/tests/integration/test_s3_ec2_metadata/test.py b/tests/integration/test_s3_ec2_metadata/test.py deleted file mode 100644 index 982656df009..00000000000 --- a/tests/integration/test_s3_ec2_metadata/test.py +++ /dev/null @@ -1,94 +0,0 @@ -import pytest -from helpers.cluster import ClickHouseCluster -import logging -import os -import time - -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) - -EC2_METADATA_SERVER_HOSTNAME = "resolver" -EC2_METADATA_SERVER_PORT = 8080 - -cluster = ClickHouseCluster(__file__) - -node = cluster.add_instance( - "node", - with_minio=True, - main_configs=["configs/use_environment_credentials.xml"], - env_variables={ - "AWS_EC2_METADATA_SERVICE_ENDPOINT": f"{EC2_METADATA_SERVER_HOSTNAME}:{EC2_METADATA_SERVER_PORT}", - }, -) - - -def start_ec2_metadata_server(): - logging.info("Starting EC2 metadata server") - container_id = cluster.get_container_id("resolver") - - cluster.copy_file_to_container( - container_id, - os.path.join(SCRIPT_DIR, "ec2_metadata_server/request_response_server.py"), - "request_response_server.py", - ) - - cluster.exec_in_container( - container_id, - ["python", "request_response_server.py", str(EC2_METADATA_SERVER_PORT)], - detach=True, - ) - - # Wait for the server to start. - num_attempts = 100 - for attempt in range(num_attempts): - ping_response = cluster.exec_in_container( - container_id, - ["curl", "-s", f"http://localhost:{EC2_METADATA_SERVER_PORT}/"], - nothrow=True, - ) - if ping_response != "OK": - if attempt == num_attempts - 1: - assert ping_response == "OK", 'Expected "OK", but got "{}"'.format( - ping_response - ) - else: - time.sleep(1) - else: - logging.debug( - f"request_response_server.py answered {ping_response} on attempt {attempt}" - ) - break - - logging.info("EC2 metadata server started") - - -@pytest.fixture(scope="module", autouse=True) -def start_cluster(): - try: - cluster.start() - start_ec2_metadata_server() - yield - finally: - cluster.shutdown() - - -def test_credentials_from_ec2_metadata(): - node.query( - f"INSERT INTO FUNCTION s3('http://{cluster.minio_host}:{cluster.minio_port}/{cluster.minio_bucket}/test1.jsonl') SELECT * FROM numbers(100)" - ) - - assert ( - "100" - == node.query( - f"SELECT count() FROM s3('http://{cluster.minio_host}:{cluster.minio_port}/{cluster.minio_bucket}/test1.jsonl')" - ).strip() - ) - - expected_logs = [ - "Getting default credentials for ec2 instance from resolver:8080", - "Calling EC2MetadataService resource, /latest/meta-data/iam/security-credentials returned credential string myrole", - "Calling EC2MetadataService resource /latest/meta-data/iam/security-credentials/myrole", - "Successfully pulled credentials from EC2MetadataService with access key", - ] - - for expected_msg in expected_logs: - node.contains_in_log("AWSEC2InstanceProfileConfigLoader: " + expected_msg) diff --git a/tests/integration/test_s3_ec2_metadata/__init__.py b/tests/integration/test_s3_imds/__init__.py similarity index 100% rename from tests/integration/test_s3_ec2_metadata/__init__.py rename to tests/integration/test_s3_imds/__init__.py diff --git a/tests/integration/test_s3_ec2_metadata/configs/use_environment_credentials.xml b/tests/integration/test_s3_imds/configs/use_environment_credentials.xml similarity index 100% rename from tests/integration/test_s3_ec2_metadata/configs/use_environment_credentials.xml rename to tests/integration/test_s3_imds/configs/use_environment_credentials.xml diff --git a/tests/integration/test_s3_imds/metadata_servers/server_with_session_tokens.py b/tests/integration/test_s3_imds/metadata_servers/server_with_session_tokens.py new file mode 100644 index 00000000000..5dcb27d6f86 --- /dev/null +++ b/tests/integration/test_s3_imds/metadata_servers/server_with_session_tokens.py @@ -0,0 +1,64 @@ +import http.server +import sys +import uuid + + +# Session tokens for IMDS sessions. +tokens = set() + + +def new_token(): + token = str(uuid.uuid4()) + global tokens + tokens.add(token) + return token + + +def token_exists(token): + global tokens + return token in tokens + + +class RequestHandler(http.server.BaseHTTPRequestHandler): + def get_response(self): + if self.path == "/": + return "OK", 200 + + if self.path == "/latest/api/token": + return new_token(), 200 + + if self.path == "/latest/meta-data/iam/security-credentials": + if token_exists(self.headers.get("x-aws-ec2-metadata-token")): + return "myrole", 200 + + if self.path == "/latest/meta-data/iam/security-credentials/myrole": + if token_exists(self.headers.get("x-aws-ec2-metadata-token")): + return ( + '{ "Code" : "Success", "Type" : "AWS-HMAC", "AccessKeyId" : "minio", "SecretAccessKey" : "minio123" }', + 200, + ) + + if self.path.startswith("/latest/meta-data/iam/security-credentials"): + return "", 401 # Unknown token or not specified. + + # Resource not found. + return "", 404 + + def do_HEAD(self): + response, code = self.get_response() + self.send_response(code) + self.send_header("Content-Type", "text/plain") + self.send_header("Content-Length", len(response.encode())) + self.end_headers() + return response, code + + def do_GET(self): + response, _ = self.do_HEAD() + self.wfile.write(response.encode()) + + def do_PUT(self): + self.do_GET() + + +httpd = http.server.HTTPServer(("0.0.0.0", int(sys.argv[1])), RequestHandler) +httpd.serve_forever() diff --git a/tests/integration/test_s3_imds/metadata_servers/simple_server.py b/tests/integration/test_s3_imds/metadata_servers/simple_server.py new file mode 100644 index 00000000000..eaa24eb3561 --- /dev/null +++ b/tests/integration/test_s3_imds/metadata_servers/simple_server.py @@ -0,0 +1,36 @@ +import http.server +import sys + + +class RequestHandler(http.server.BaseHTTPRequestHandler): + def get_response(self): + if self.path == "/": + return "OK", 200 + + if self.path == "/latest/meta-data/iam/security-credentials": + return "myrole", 200 + + if self.path == "/latest/meta-data/iam/security-credentials/myrole": + return ( + '{ "Code" : "Success", "Type" : "AWS-HMAC", "AccessKeyId" : "minio", "SecretAccessKey" : "minio123" }', + 200, + ) + + # Resource not found. + return 404 + + def do_HEAD(self): + response, code = self.get_response() + self.send_response(code) + self.send_header("Content-Type", "text/plain") + self.send_header("Content-Length", len(response.encode())) + self.end_headers() + return response, code + + def do_GET(self): + response, _ = self.do_HEAD() + self.wfile.write(response.encode()) + + +httpd = http.server.HTTPServer(("0.0.0.0", int(sys.argv[1])), RequestHandler) +httpd.serve_forever() diff --git a/tests/integration/test_s3_imds/test_session_token.py b/tests/integration/test_s3_imds/test_session_token.py new file mode 100644 index 00000000000..681dc997760 --- /dev/null +++ b/tests/integration/test_s3_imds/test_session_token.py @@ -0,0 +1,69 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.mock_servers import start_mock_servers +import os + +METADATA_SERVER_HOSTNAME = "resolver" +METADATA_SERVER_PORT = 8080 + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + with_minio=True, + main_configs=["configs/use_environment_credentials.xml"], + env_variables={ + "AWS_EC2_METADATA_SERVICE_ENDPOINT": f"{METADATA_SERVER_HOSTNAME}:{METADATA_SERVER_PORT}", + }, +) + + +def start_metadata_server(): + script_dir = os.path.join(os.path.dirname(__file__), "metadata_servers") + start_mock_servers( + cluster, + script_dir, + [ + ( + "server_with_session_tokens.py", + METADATA_SERVER_HOSTNAME, + METADATA_SERVER_PORT, + ) + ], + ) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + start_metadata_server() + yield + finally: + cluster.shutdown() + + +def test_credentials_from_metadata(): + node.query( + f"INSERT INTO FUNCTION s3('http://{cluster.minio_host}:{cluster.minio_port}/{cluster.minio_bucket}/test1.jsonl') SELECT * FROM numbers(100)" + ) + + assert ( + "100" + == node.query( + f"SELECT count() FROM s3('http://{cluster.minio_host}:{cluster.minio_port}/{cluster.minio_bucket}/test1.jsonl')" + ).strip() + ) + + expected_logs = [ + "Calling EC2MetadataService to get token", + "Calling EC2MetadataService resource, /latest/meta-data/iam/security-credentials with token returned profile string myrole", + "Calling EC2MetadataService resource resolver:8080/latest/meta-data/iam/security-credentials/myrole with token", + "Successfully pulled credentials from EC2MetadataService with access key", + ] + + node.query("SYSTEM FLUSH LOGS") + for expected_msg in expected_logs: + assert node.contains_in_log( + "AWSEC2InstanceProfileConfigLoader: " + expected_msg + ) diff --git a/tests/integration/test_s3_imds/test_simple.py b/tests/integration/test_s3_imds/test_simple.py new file mode 100644 index 00000000000..0dacac2b0b9 --- /dev/null +++ b/tests/integration/test_s3_imds/test_simple.py @@ -0,0 +1,70 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.mock_servers import start_mock_servers +import os + +METADATA_SERVER_HOSTNAME = "resolver" +METADATA_SERVER_PORT = 8080 + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + with_minio=True, + main_configs=["configs/use_environment_credentials.xml"], + env_variables={ + "AWS_EC2_METADATA_SERVICE_ENDPOINT": f"{METADATA_SERVER_HOSTNAME}:{METADATA_SERVER_PORT}", + }, +) + + +def start_metadata_server(): + script_dir = os.path.join(os.path.dirname(__file__), "metadata_servers") + start_mock_servers( + cluster, + script_dir, + [ + ( + "simple_server.py", + METADATA_SERVER_HOSTNAME, + METADATA_SERVER_PORT, + ) + ], + ) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + start_metadata_server() + yield + finally: + cluster.shutdown() + + +def test_credentials_from_metadata(): + node.query( + f"INSERT INTO FUNCTION s3('http://{cluster.minio_host}:{cluster.minio_port}/{cluster.minio_bucket}/test1.jsonl') SELECT * FROM numbers(100)" + ) + + assert ( + "100" + == node.query( + f"SELECT count() FROM s3('http://{cluster.minio_host}:{cluster.minio_port}/{cluster.minio_bucket}/test1.jsonl')" + ).strip() + ) + + expected_logs = [ + "Calling EC2MetadataService to get token failed, falling back to less secure way", + "Getting default credentials for ec2 instance from resolver:8080", + "Calling EC2MetadataService resource, /latest/meta-data/iam/security-credentials returned credential string myrole", + "Calling EC2MetadataService resource /latest/meta-data/iam/security-credentials/myrole", + "Successfully pulled credentials from EC2MetadataService with access key", + ] + + node.query("SYSTEM FLUSH LOGS") + for expected_msg in expected_logs: + assert node.contains_in_log( + "AWSEC2InstanceProfileConfigLoader: " + expected_msg + ) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 2fa499eb78e..2e959a4d0ed 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -11,6 +11,7 @@ import helpers.client import pytest from helpers.cluster import ClickHouseCluster, ClickHouseInstance from helpers.network import PartitionManager +from helpers.mock_servers import start_mock_servers from helpers.test_tools import exec_query_with_retry MINIO_INTERNAL_PORT = 9001 @@ -694,47 +695,16 @@ def test_s3_glob_scheherazade(started_cluster): def run_s3_mocks(started_cluster): - logging.info("Starting s3 mocks") - mocks = ( - ("mock_s3.py", "resolver", "8080"), - ("unstable_server.py", "resolver", "8081"), - ("echo.py", "resolver", "8082"), + script_dir = os.path.join(os.path.dirname(__file__), "s3_mocks") + start_mock_servers( + started_cluster, + script_dir, + [ + ("mock_s3.py", "resolver", "8080"), + ("unstable_server.py", "resolver", "8081"), + ("echo.py", "resolver", "8082"), + ], ) - for mock_filename, container, port in mocks: - container_id = started_cluster.get_container_id(container) - current_dir = os.path.dirname(__file__) - started_cluster.copy_file_to_container( - container_id, - os.path.join(current_dir, "s3_mocks", mock_filename), - mock_filename, - ) - started_cluster.exec_in_container( - container_id, ["python", mock_filename, port], detach=True - ) - - # Wait for S3 mocks to start - for mock_filename, container, port in mocks: - num_attempts = 100 - for attempt in range(num_attempts): - ping_response = started_cluster.exec_in_container( - started_cluster.get_container_id(container), - ["curl", "-s", f"http://localhost:{port}/"], - nothrow=True, - ) - if ping_response != "OK": - if attempt == num_attempts - 1: - assert ping_response == "OK", 'Expected "OK", but got "{}"'.format( - ping_response - ) - else: - time.sleep(1) - else: - logging.debug( - f"mock {mock_filename} ({port}) answered {ping_response} on attempt {attempt}" - ) - break - - logging.info("S3 mocks started") def replace_config(path, old, new): diff --git a/tests/queries/0_stateless/00937_ipv4_cidr_range.reference b/tests/queries/0_stateless/00937_ipv4_cidr_range.reference index 01f85dc6447..3824c190627 100644 --- a/tests/queries/0_stateless/00937_ipv4_cidr_range.reference +++ b/tests/queries/0_stateless/00937_ipv4_cidr_range.reference @@ -1,4 +1,3 @@ -check invalid params tests 4 3 diff --git a/tests/queries/0_stateless/00937_ipv4_cidr_range.sql b/tests/queries/0_stateless/00937_ipv4_cidr_range.sql index badefe22383..2cc33eb9f58 100644 --- a/tests/queries/0_stateless/00937_ipv4_cidr_range.sql +++ b/tests/queries/0_stateless/00937_ipv4_cidr_range.sql @@ -1,7 +1,3 @@ -SELECT 'check invalid params'; -SELECT IPv4CIDRToRange(1, 1); -- { serverError 43 } -SELECT IPv4CIDRToRange(toUInt32(1), 512); -- { serverError 43 } - SELECT 'tests'; DROP TABLE IF EXISTS ipv4_range; diff --git a/tests/queries/0_stateless/01848_partition_value_column.sql b/tests/queries/0_stateless/01848_partition_value_column.sql index 0fc4fbe2802..d13e4508789 100644 --- a/tests/queries/0_stateless/01848_partition_value_column.sql +++ b/tests/queries/0_stateless/01848_partition_value_column.sql @@ -13,6 +13,7 @@ select count() from tbl where _partition_value.3 = 4 settings max_rows_to_read = create table tbl2(i int) engine MergeTree order by i; insert into tbl2 values (1); select _partition_value from tbl2; -- { serverError 16 } +select _partition_value from tbl2 group by 1; -- { serverError 16 } drop table tbl; drop table tbl2; diff --git a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql index 85db40f1104..6012d5904f4 100644 --- a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql +++ b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql @@ -1,4 +1,4 @@ -- Tags: no-replicated-database -SET max_memory_usage = '75M'; +SET max_memory_usage = '50M'; SELECT cityHash64(rand() % 1000) as n, groupBitmapState(number) FROM numbers_mt(2000000000) GROUP BY n FORMAT Null; -- { serverError 241 } diff --git a/tests/queries/0_stateless/02007_ipv4_and_ipv6_to_and_from_string.sql b/tests/queries/0_stateless/02007_ipv4_and_ipv6_to_and_from_string.sql index b303d580e72..f392d0125d9 100644 --- a/tests/queries/0_stateless/02007_ipv4_and_ipv6_to_and_from_string.sql +++ b/tests/queries/0_stateless/02007_ipv4_and_ipv6_to_and_from_string.sql @@ -7,7 +7,7 @@ SELECT CAST(toIPv6('2001:0db8:0000:85a3:0000:0000:ac1f:8001') as String) as v, t SELECT toIPv4OrDefault('hello') as v, toTypeName(v); SELECT toIPv6OrDefault('hello') as v, toTypeName(v); -SELECT CAST('hello' as IPv4) as v, toTypeName(v); -- { serverError CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING } -SELECT CAST('hello' as IPv6) as v, toTypeName(v); -- { serverError CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING } +SELECT CAST('hello' as IPv4) as v, toTypeName(v); -- { serverError CANNOT_PARSE_IPV4 } +SELECT CAST('hello' as IPv6) as v, toTypeName(v); -- { serverError CANNOT_PARSE_IPV6 } SELECT CAST('1.1.1.1' as IPv6) as v, toTypeName(v); diff --git a/tests/queries/0_stateless/02184_ipv6_select_parsing.reference b/tests/queries/0_stateless/02184_ipv6_select_parsing.reference new file mode 100644 index 00000000000..734f9c07b3d --- /dev/null +++ b/tests/queries/0_stateless/02184_ipv6_select_parsing.reference @@ -0,0 +1,4 @@ +::ffff:127.0.0.1 +::ffff:127.0.0.1 +::ffff:127.0.0.1 +::ffff:127.0.0.1 diff --git a/tests/queries/0_stateless/02184_ipv6_select_parsing.sql b/tests/queries/0_stateless/02184_ipv6_select_parsing.sql new file mode 100644 index 00000000000..2892de309c4 --- /dev/null +++ b/tests/queries/0_stateless/02184_ipv6_select_parsing.sql @@ -0,0 +1,10 @@ +drop table if exists ips_v6; +create table ips_v6(i IPv6) Engine=Memory; + +INSERT INTO ips_v6 SELECT toIPv6('::ffff:127.0.0.1'); +INSERT INTO ips_v6 values ('::ffff:127.0.0.1'); +INSERT INTO ips_v6 FORMAT TSV ::ffff:127.0.0.1 +INSERT INTO ips_v6 SELECT ('::ffff:127.0.0.1'); + +SELECT * FROM ips_v6; +drop table ips_v6; diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.reference b/tests/queries/0_stateless/02234_cast_to_ip_address.reference index 3a4c40a07cf..96aae2a978c 100644 --- a/tests/queries/0_stateless/02234_cast_to_ip_address.reference +++ b/tests/queries/0_stateless/02234_cast_to_ip_address.reference @@ -41,5 +41,6 @@ IPv6 functions :: :: -- -::ffff:127.0.0.1 ::ffff:127.0.0.1 ::ffff:127.0.0.1 ::1\0\0 ::1 ::1 +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0 :: +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0 :: diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.sql b/tests/queries/0_stateless/02234_cast_to_ip_address.sql index d436c93b9db..436f232e441 100644 --- a/tests/queries/0_stateless/02234_cast_to_ip_address.sql +++ b/tests/queries/0_stateless/02234_cast_to_ip_address.sql @@ -1,6 +1,6 @@ SELECT 'IPv4 functions'; -SELECT IPv4StringToNum('test'); --{serverError 441} +SELECT IPv4StringToNum('test'); --{serverError CANNOT_PARSE_IPV4} SELECT IPv4StringToNumOrDefault('test'); SELECT IPv4StringToNumOrNull('test'); @@ -10,7 +10,7 @@ SELECT IPv4StringToNumOrNull('127.0.0.1'); SELECT '--'; -SELECT toIPv4('test'); --{serverError 441} +SELECT toIPv4('test'); --{serverError CANNOT_PARSE_IPV4} SELECT toIPv4OrDefault('test'); SELECT toIPv4OrNull('test'); @@ -20,7 +20,7 @@ SELECT toIPv4OrNull('127.0.0.1'); SELECT '--'; -SELECT cast('test' , 'IPv4'); --{serverError 441} +SELECT cast('test' , 'IPv4'); --{serverError CANNOT_PARSE_IPV4} SELECT cast('127.0.0.1' , 'IPv4'); SELECT '--'; @@ -38,7 +38,7 @@ SET cast_ipv4_ipv6_default_on_conversion_error = 0; SELECT 'IPv6 functions'; -SELECT IPv6StringToNum('test'); --{serverError 441} +SELECT IPv6StringToNum('test'); --{serverError CANNOT_PARSE_IPV6} SELECT IPv6StringToNumOrDefault('test'); SELECT IPv6StringToNumOrNull('test'); @@ -48,7 +48,7 @@ SELECT IPv6StringToNumOrNull('::ffff:127.0.0.1'); SELECT '--'; -SELECT toIPv6('test'); --{serverError 441} +SELECT toIPv6('test'); --{serverError CANNOT_PARSE_IPV6} SELECT toIPv6OrDefault('test'); SELECT toIPv6OrNull('test'); @@ -58,7 +58,7 @@ SELECT toIPv6OrNull('::ffff:127.0.0.1'); SELECT '--'; -SELECT cast('test' , 'IPv6'); --{serverError 441} +SELECT cast('test' , 'IPv6'); --{serverError CANNOT_PARSE_IPV6} SELECT cast('::ffff:127.0.0.1', 'IPv6'); SELECT '--'; @@ -76,7 +76,6 @@ SELECT '--'; SET cast_ipv4_ipv6_default_on_conversion_error = 0; -SELECT toFixedString('::ffff:127.0.0.1', 16) as value, cast(value, 'IPv6'), toIPv6(value); SELECT toFixedString('::1', 5) as value, cast(value, 'IPv6'), toIPv6(value); -SELECT toFixedString('', 16) as value, cast(value, 'IPv6'); --{serverError 441} -SELECT toFixedString('', 16) as value, toIPv6(value); --{serverError 441} +SELECT toFixedString('', 16) as value, cast(value, 'IPv6'); +SELECT toFixedString('', 16) as value, toIPv6(value); diff --git a/tests/queries/0_stateless/02244_ip_address_invalid_insert.reference b/tests/queries/0_stateless/02244_ip_address_invalid_insert.reference index 60e6a5da083..a7b2fcc11b4 100644 --- a/tests/queries/0_stateless/02244_ip_address_invalid_insert.reference +++ b/tests/queries/0_stateless/02244_ip_address_invalid_insert.reference @@ -3,7 +3,7 @@ 1.1.1.1 1.1.1.1 0.0.0.0 fe80::9801:43ff:fe1f:7690 fe80::9801:43ff:fe1f:7690 -1.1.1.1 :: +1.1.1.1 ::ffff:1.1.1.1 :: fe80::9801:43ff:fe1f:7690 fe80::9801:43ff:fe1f:7690 1.1.1.1 ::ffff:1.1.1.1 diff --git a/tests/queries/0_stateless/02244_ip_address_invalid_insert.sql b/tests/queries/0_stateless/02244_ip_address_invalid_insert.sql index 4057b9b2d98..27e2ab219d4 100644 --- a/tests/queries/0_stateless/02244_ip_address_invalid_insert.sql +++ b/tests/queries/0_stateless/02244_ip_address_invalid_insert.sql @@ -5,7 +5,7 @@ CREATE TABLE test_table_ipv4 ipv4 IPv4 ) ENGINE = TinyLog; -INSERT INTO test_table_ipv4 VALUES ('1.1.1.1', '1.1.1.1'), ('', ''); --{clientError 441} +INSERT INTO test_table_ipv4 VALUES ('1.1.1.1', '1.1.1.1'), ('', ''); --{clientError CANNOT_PARSE_IPV4} SET input_format_ipv4_default_on_conversion_error = 1; @@ -23,11 +23,11 @@ CREATE TABLE test_table_ipv4_materialized ipv6 IPv4 MATERIALIZED toIPv4(ip) ) ENGINE = TinyLog; -INSERT INTO test_table_ipv4_materialized(ip) VALUES ('1.1.1.1'), (''); --{serverError 441} +INSERT INTO test_table_ipv4_materialized(ip) VALUES ('1.1.1.1'), (''); --{serverError CANNOT_PARSE_IPV4} SET input_format_ipv4_default_on_conversion_error = 1; -INSERT INTO test_table_ipv4_materialized(ip) VALUES ('1.1.1.1'), (''); --{serverError 441} +INSERT INTO test_table_ipv4_materialized(ip) VALUES ('1.1.1.1'), (''); --{serverError CANNOT_PARSE_IPV4} SET cast_ipv4_ipv6_default_on_conversion_error = 1; @@ -46,7 +46,7 @@ CREATE TABLE test_table_ipv6 ipv6 IPv6 ) ENGINE = TinyLog; -INSERT INTO test_table_ipv6 VALUES ('fe80::9801:43ff:fe1f:7690', 'fe80::9801:43ff:fe1f:7690'), ('1.1.1.1', '1.1.1.1'), ('', ''); --{clientError 441} +INSERT INTO test_table_ipv6 VALUES ('fe80::9801:43ff:fe1f:7690', 'fe80::9801:43ff:fe1f:7690'), ('1.1.1.1', '1.1.1.1'), ('', ''); --{clientError CANNOT_PARSE_IPV6} SET input_format_ipv6_default_on_conversion_error = 1; @@ -64,11 +64,11 @@ CREATE TABLE test_table_ipv6_materialized ipv6 IPv6 MATERIALIZED toIPv6(ip) ) ENGINE = TinyLog; -INSERT INTO test_table_ipv6_materialized(ip) VALUES ('fe80::9801:43ff:fe1f:7690'), ('1.1.1.1'), (''); --{serverError 441} +INSERT INTO test_table_ipv6_materialized(ip) VALUES ('fe80::9801:43ff:fe1f:7690'), ('1.1.1.1'), (''); --{serverError CANNOT_PARSE_IPV6} SET input_format_ipv6_default_on_conversion_error = 1; -INSERT INTO test_table_ipv6_materialized(ip) VALUES ('fe80::9801:43ff:fe1f:7690'), ('1.1.1.1'), (''); --{serverError 441} +INSERT INTO test_table_ipv6_materialized(ip) VALUES ('fe80::9801:43ff:fe1f:7690'), ('1.1.1.1'), (''); --{serverError CANNOT_PARSE_IPV6} SET cast_ipv4_ipv6_default_on_conversion_error = 1; diff --git a/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql b/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql index 1a0d9a4c830..1cc5140f339 100644 --- a/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql +++ b/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql @@ -12,6 +12,7 @@ select CAST(number % 2 ? 'true' : NULL, 'Nullable(Bool)') from numbers(2); select CAST(number % 2 ? '0.0.0.0' : NULL, 'Nullable(IPv4)') from numbers(2); select CAST(number % 2 ? '0000:0000:0000:0000:0000:0000:0000:0000' : NULL, 'Nullable(IPv6)') from numbers(2); +set cast_keep_nullable = 1; select toBool(number % 2 ? 'true' : NULL) from numbers(2); select toIPv4(number % 2 ? '0.0.0.0' : NULL) from numbers(2); select toIPv4OrDefault(number % 2 ? '' : NULL) from numbers(2); diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 0996ad37e6e..9d747f9c572 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -240,42 +240,6 @@ detectCharset detectLanguageUnknown detectProgrammingLanguage detectTonality -dictGet -dictGetChildren -dictGetDate -dictGetDateOrDefault -dictGetDateTime -dictGetDateTimeOrDefault -dictGetDescendants -dictGetFloat32 -dictGetFloat32OrDefault -dictGetFloat64 -dictGetFloat64OrDefault -dictGetHierarchy -dictGetInt16 -dictGetInt16OrDefault -dictGetInt32 -dictGetInt32OrDefault -dictGetInt64 -dictGetInt64OrDefault -dictGetInt8 -dictGetInt8OrDefault -dictGetOrDefault -dictGetOrNull -dictGetString -dictGetStringOrDefault -dictGetUInt16 -dictGetUInt16OrDefault -dictGetUInt32 -dictGetUInt32OrDefault -dictGetUInt64 -dictGetUInt64OrDefault -dictGetUInt8 -dictGetUInt8OrDefault -dictGetUUID -dictGetUUIDOrDefault -dictHas -dictIsIn divide dotProduct dumpColumnStructure @@ -773,9 +737,11 @@ toHour toIPv4 toIPv4OrDefault toIPv4OrNull +toIPv4OrZero toIPv6 toIPv6OrDefault toIPv6OrNull +toIPv6OrZero toISOWeek toISOYear toInt128 diff --git a/tests/queries/0_stateless/02521_cannot-find-column-in-projection.reference b/tests/queries/0_stateless/02521_cannot-find-column-in-projection.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02521_cannot-find-column-in-projection.sql b/tests/queries/0_stateless/02521_cannot-find-column-in-projection.sql new file mode 100644 index 00000000000..31602c5bae2 --- /dev/null +++ b/tests/queries/0_stateless/02521_cannot-find-column-in-projection.sql @@ -0,0 +1,3 @@ +create table test(day Date, id UInt32) engine=MergeTree partition by day order by tuple(); +insert into test select toDate('2023-01-05') AS day, number from numbers(10); +with toUInt64(id) as id_with select day, count(id_with) from test where day >= '2023-01-01' group by day limit 1000; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK } diff --git a/tests/queries/0_stateless/02522_different_types_in_storage_merge.reference b/tests/queries/0_stateless/02522_different_types_in_storage_merge.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02522_different_types_in_storage_merge.sql b/tests/queries/0_stateless/02522_different_types_in_storage_merge.sql new file mode 100644 index 00000000000..db0a498fd82 --- /dev/null +++ b/tests/queries/0_stateless/02522_different_types_in_storage_merge.sql @@ -0,0 +1,6 @@ +CREATE TABLE test_s64_local (date Date, value Int64) ENGINE = MergeTree order by tuple(); +CREATE TABLE test_u64_local (date Date, value UInt64) ENGINE = MergeTree order by tuple(); +CREATE TABLE test_s64_distributed AS test_s64_local ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_s64_local, rand()); +CREATE TABLE test_u64_distributed AS test_u64_local ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_u64_local, rand()); + +SELECT * FROM merge(currentDatabase(), '') WHERE value = 1048575; diff --git a/utils/check-style/check-style b/utils/check-style/check-style index ad34f5e82e1..b5e1a4748a5 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -244,7 +244,7 @@ find $ROOT_PATH/{src,base,programs} -name '*.h' -or -name '*.cpp' 2>/dev/null | find $ROOT_PATH -not -path $ROOT_PATH'/tests/ci*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name Dockerfile -type f 2>/dev/null | xargs --no-run-if-empty -n1 echo "Please move Dockerfile to docker directory:" # There shouldn't be any docker compose files outside docker directory -#find $ROOT_PATH -not -path $ROOT_PATH'/tests/testflows*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name '*compose*.yml' -type f 2>/dev/null | xargs --no-run-if-empty grep -l "version:" | xargs --no-run-if-empty -n1 echo "Please move docker compose to docker directory:" +find $ROOT_PATH -name '*compose*.yml' -type f -not -path $ROOT_PATH'/docker' -not -path $ROOT_PATH'/tests/integration*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' 2>/dev/null | grep -vP $EXCLUDE_DIRS | xargs --no-run-if-empty grep -l "version:" | xargs --no-run-if-empty -n1 echo "Please move docker compose to the 'docker' or 'tests' directory:" # Check that every header file has #pragma once in first line find $ROOT_PATH/{src,programs,utils} -name '*.h' | diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index d4c5924d6f3..4f98e9a1ef6 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v22.12.2.25-stable 2023-01-06 v22.12.1.1752-stable 2022-12-15 v22.11.2.30-stable 2022-12-02 v22.11.1.1360-stable 2022-11-17