diff --git a/.clang-tidy b/.clang-tidy index f8622039f29..7f78143ec3d 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -110,6 +110,7 @@ Checks: '*, -misc-const-correctness, -misc-no-recursion, -misc-non-private-member-variables-in-classes, + -misc-confusable-identifiers, # useful but slooow -modernize-avoid-c-arrays, -modernize-concat-nested-namespaces, @@ -148,19 +149,6 @@ Checks: '*, -readability-use-anyofallof, -zirkon-*, - - -misc-*, # temporarily disabled due to being too slow - # also disable checks in other categories which are aliases of checks in misc-*: - # https://releases.llvm.org/15.0.0/tools/clang/tools/extra/docs/clang-tidy/checks/list.html - -cert-dcl54-cpp, # alias of misc-new-delete-overloads - -hicpp-new-delete-operators, # alias of misc-new-delete-overloads - -cert-fio38-c, # alias of misc-non-copyable-objects - -cert-dcl03-c, # alias of misc-static-assert - -hicpp-static-assert, # alias of misc-static-assert - -cert-err09-cpp, # alias of misc-throw-by-value-catch-by-reference - -cert-err61-cpp, # alias of misc-throw-by-value-catch-by-reference - -cppcoreguidelines-c-copy-assignment-signature, # alias of misc-unconventional-assign-operator - -cppcoreguidelines-non-private-member-variables-in-classes, # alias of misc-non-private-member-variables-in-classes ' WarningsAsErrors: '*' diff --git a/base/base/find_symbols.h b/base/base/find_symbols.h index 01efe7046bc..fe5d3bbadab 100644 --- a/base/base/find_symbols.h +++ b/base/base/find_symbols.h @@ -36,7 +36,7 @@ namespace detail { -template constexpr bool is_in(char x) { return ((x == chars) || ...); } +template constexpr bool is_in(char x) { return ((x == chars) || ...); } // NOLINT(misc-redundant-expression) #if defined(__SSE2__) template diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md index e9ca87916a0..e14ba5699e4 100644 --- a/docs/en/engines/table-engines/mergetree-family/replication.md +++ b/docs/en/engines/table-engines/mergetree-family/replication.md @@ -8,11 +8,18 @@ sidebar_label: Data Replication :::note In ClickHouse Cloud replication is managed for you. Please create your tables without adding arguments. For example, in the text below you would replace: + +```sql +ENGINE = ReplicatedReplacingMergeTree( + '/clickhouse/tables/{shard}/table_name', + '{replica}', + ver +) ``` -ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/table_name', '{replica}', ver) -``` + with: -``` + +```sql ENGINE = ReplicatedReplacingMergeTree ``` ::: diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 68ff1c50b19..03f0201e3e0 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1235,8 +1235,8 @@ For output it uses the following correspondence between ClickHouse types and BSO | ClickHouse type | BSON Type | |-----------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------| | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `\x08` boolean | -| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 | -| [Int16UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 | +| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `\x10` int32 | +| [Int16/UInt16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `\x10` int32 | | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 | | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 | | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 | @@ -1255,30 +1255,30 @@ For output it uses the following correspondence between ClickHouse types and BSO | [Array](/docs/en/sql-reference/data-types/array.md) | `\x04` array | | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x04` array | | [Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x03` document | -| [Map](/docs/en/sql-reference/data-types/map.md) (with String keys) | `\x03` document | +| [Map](/docs/en/sql-reference/data-types/map.md) | `\x03` document | | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `\x10` int32 | | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `\x05` binary, `\x00` binary subtype | For input it uses the following correspondence between BSON types and ClickHouse types: -| BSON Type | ClickHouse Type | -|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `\x01` double | [Float32/Float64](/docs/en/sql-reference/data-types/float.md) | -| `\x02` string | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | -| `\x03` document | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | -| `\x04` array | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) | -| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | -| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | -| `\x05` binary, `\x03` old uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) | -| `\x05` binary, `\x04` uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) | -| `\x07` ObjectId | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | -| `\x08` boolean | [Bool](/docs/en/sql-reference/data-types/boolean.md) | -| `\x09` datetime | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | -| `\x0A` null value | [NULL](/docs/en/sql-reference/data-types/nullable.md) | -| `\x0D` JavaScript code | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | -| `\x0E` symbol | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | -| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | -| `\x12` int64 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | +| BSON Type | ClickHouse Type | +|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `\x01` double | [Float32/Float64](/docs/en/sql-reference/data-types/float.md) | +| `\x02` string | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x03` document | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | +| `\x04` array | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) | +| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | +| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x05` binary, `\x03` old uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) | +| `\x05` binary, `\x04` uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) | +| `\x07` ObjectId | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x08` boolean | [Bool](/docs/en/sql-reference/data-types/boolean.md) | +| `\x09` datetime | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | +| `\x0A` null value | [NULL](/docs/en/sql-reference/data-types/nullable.md) | +| `\x0D` JavaScript code | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x0E` symbol | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)/[Enum8/Enum16](/docs/en/sql-reference/data-types/enum.md) | +| `\x12` int64 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8). Big integers and decimals (Int128/UInt128/Int256/UInt256/Decimal128/Decimal256) can be parsed from BSON Binary value with `\x00` binary subtype. In this case this format will validate that the size of binary data equals the size of expected value. @@ -1610,29 +1610,34 @@ See also [Format Schema](#formatschema). The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. -| CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) | -|----------------------------------|------------------------------------------------------------------------------------------------------------------------|------------------------------| -| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` | -| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` | -| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` | -| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` | -| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` | -| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` | -| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` | -| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `INT64` | -| `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` | -| `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` | -| `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` | -| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` | -| `ENUM` | [Enum(8\ |16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` | -| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | -| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | -| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` | -| `DATA` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `DATA` | +| CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) | +|------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------| +| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` | +| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` | +| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` | +| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` | +| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` | +| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md), [Decimal32](/docs/en/sql-reference/data-types/decimal.md) | `INT32` | +| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` | +| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md), [Decimal64](/docs/en/sql-reference/data-types/decimal.md) | `INT64` | +| `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` | +| `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` | +| `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` | +| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` | +| `ENUM` | [Enum(8/16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` | +| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | +| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | +| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` | +| `DATA` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `DATA` | +| `DATA` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `DATA` | +| `DATA` | [Decimal128/Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `DATA` | +| `STRUCT(entries LIST(STRUCT(key Key, value Value)))` | [Map](/docs/en/sql-reference/data-types/map.md) | `STRUCT(entries LIST(STRUCT(key Key, value Value)))` | + +Integer types can be converted into each other during input/output. For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings-formats.md/#format_capn_proto_enum_comparising_mode) setting. -Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` type also can be nested. +Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested. ### Inserting and Selecting Data {#inserting-and-selecting-data-capnproto} diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileApprox.md b/docs/en/sql-reference/aggregate-functions/reference/quantileApprox.md new file mode 100644 index 00000000000..21b9a3500c4 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileApprox.md @@ -0,0 +1,76 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/quantileApprox +sidebar_position: 204 +--- + +# quantileApprox + +Computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [Greenwald-Khanna](http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf) algorithm. The Greenwald-Khanna algorithm is an algorithm used to compute quantiles on a stream of data in a highly efficient manner. It was introduced by Michael Greenwald and Sanjeev Khanna in 2001. It is widely used in databases and big data systems where computing accurate quantiles on a large stream of data in real-time is necessary. The algorithm is highly efficient, taking only O(log n) space and O(log log n) time per item (where n is the size of the input). It is also highly accurate, providing an approximate quantile value with high probability. + +`quantileApprox` is different from other quantile functions in ClickHouse, because it enables user to control the accuracy of the approximate quantile result. + +**Syntax** + +``` sql +quantileApprox(accuracy, level)(expr) +``` + +Alias: `medianApprox`. + +**Arguments** + +- `accuracy` — Accuracy of quantile. Constant positive integer. Larger accuracy value means less error. For example, if the accuracy argument is set to 100, the computed quantile will have an error no greater than 1% with high probability. There is a trade-off between the accuracy of the computed quantiles and the computational complexity of the algorithm. A larger accuracy requires more memory and computational resources to compute the quantile accurately, while a smaller accuracy argument allows for a faster and more memory-efficient computation but with a slightly lower accuracy. + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). + +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). + + +**Returned value** + +- Quantile of the specified level and accuracy. + + +Type: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + +**Example** + +``` sql +SELECT quantileApprox(1, 0.25)(number + 1) +FROM numbers(1000) + +┌─quantileApprox(1, 0.25)(plus(number, 1))─┐ +│ 1 │ +└──────────────────────────────────────────┘ + +SELECT quantileApprox(10, 0.25)(number + 1) +FROM numbers(1000) + +┌─quantileApprox(10, 0.25)(plus(number, 1))─┐ +│ 156 │ +└───────────────────────────────────────────┘ + +SELECT quantileApprox(100, 0.25)(number + 1) +FROM numbers(1000) + +┌─quantileApprox(100, 0.25)(plus(number, 1))─┐ +│ 251 │ +└────────────────────────────────────────────┘ + +SELECT quantileApprox(1000, 0.25)(number + 1) +FROM numbers(1000) + +┌─quantileApprox(1000, 0.25)(plus(number, 1))─┐ +│ 249 │ +└─────────────────────────────────────────────┘ +``` + + +**See Also** + +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index 57151915336..fe22ad7b048 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -114,3 +114,59 @@ Result: │ [249.75,499.5,749.25,899.1,949.05,989.01,998.001] │ └─────────────────────────────────────────────────────────────────────┘ ``` + +## quantilesApprox + +`quantilesApprox` works similarly with `quantileApprox` but allows us to calculate quantities at different levels simultaneously and returns an array. + +**Syntax** + +``` sql +quantilesApprox(accuracy, level1, level2, ...)(expr) +``` + +**Returned value** + +- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels. + +Type of array values: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + +**Example** + +Query: + + +``` sql +SELECT quantilesApprox(1, 0.25, 0.5, 0.75)(number + 1) +FROM numbers(1000) + +┌─quantilesApprox(1, 0.25, 0.5, 0.75)(plus(number, 1))─┐ +│ [1,1,1] │ +└──────────────────────────────────────────────────────┘ + +SELECT quantilesApprox(10, 0.25, 0.5, 0.75)(number + 1) +FROM numbers(1000) + +┌─quantilesApprox(10, 0.25, 0.5, 0.75)(plus(number, 1))─┐ +│ [156,413,659] │ +└───────────────────────────────────────────────────────┘ + + +SELECT quantilesApprox(100, 0.25, 0.5, 0.75)(number + 1) +FROM numbers(1000) + +┌─quantilesApprox(100, 0.25, 0.5, 0.75)(plus(number, 1))─┐ +│ [251,498,741] │ +└────────────────────────────────────────────────────────┘ + +SELECT quantilesApprox(1000, 0.25, 0.5, 0.75)(number + 1) +FROM numbers(1000) + +┌─quantilesApprox(1000, 0.25, 0.5, 0.75)(plus(number, 1))─┐ +│ [249,499,749] │ +└─────────────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 71b7fa07f18..11036d804dc 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1276,16 +1276,16 @@ Using replacement fields, you can define a pattern for the resulting string. “ | %k | hour in 24h format (00-23) | 22 | | %l | hour in 12h format (01-12) | 09 | | %m | month as an integer number (01-12) | 01 | -| %M | minute (00-59) | 33 | +| %M | full month name (January-December) | January | | %n | new-line character (‘’) | | | %p | AM or PM designation | PM | | %Q | Quarter (1-4) | 1 | -| %r | 12-hour HH:MM AM/PM time, equivalent to %H:%M %p | 10:30 PM | -| %R | 24-hour HH:MM time, equivalent to %H:%M | 22:33 | +| %r | 12-hour HH:MM AM/PM time, equivalent to %H:%i %p | 10:30 PM | +| %R | 24-hour HH:MM time, equivalent to %H:%i | 22:33 | | %s | second (00-59) | 44 | | %S | second (00-59) | 44 | | %t | horizontal-tab character (’) | | -| %T | ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S | 22:33:44 | +| %T | ISO 8601 time format (HH:MM:SS), equivalent to %H:%i:%S | 22:33:44 | | %u | ISO 8601 weekday as number with Monday as 1 (1-7) | 2 | | %V | ISO 8601 week number (01-53) | 01 | | %w | weekday as a integer number with Sunday as 0 (0-6) | 2 | diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 50e15f70f5d..d4c7c451af2 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -13,17 +13,18 @@ Functions for [searching](../../sql-reference/functions/string-search-functions. ## replaceOne(haystack, pattern, replacement) Replaces the first occurrence of the substring ‘pattern’ (if it exists) in ‘haystack’ by the ‘replacement’ string. -‘pattern’ and ‘replacement’ must be constants. ## replaceAll(haystack, pattern, replacement), replace(haystack, pattern, replacement) Replaces all occurrences of the substring ‘pattern’ in ‘haystack’ by the ‘replacement’ string. +Alias: `replace`. + ## replaceRegexpOne(haystack, pattern, replacement) Replaces the first occurrence of the substring matching the regular expression ‘pattern’ in ‘haystack‘ by the ‘replacement‘ string. -‘pattern‘ must be a constant [re2 regular expression](https://github.com/google/re2/wiki/Syntax). -‘replacement’ must be a plain constant string or a constant string containing substitutions `\0-\9`. +‘pattern‘ must be a [re2 regular expression](https://github.com/google/re2/wiki/Syntax). +‘replacement’ must be a plain string or a string containing substitutions `\0-\9`. Substitutions `\1-\9` correspond to the 1st to 9th capturing group (submatch), substitution `\0` corresponds to the entire match. To use a verbatim `\` character in the ‘pattern‘ or ‘replacement‘ string, escape it using `\`. Also keep in mind that string literals require an extra escaping. @@ -88,6 +89,8 @@ SELECT replaceRegexpAll('Hello, World!', '^', 'here: ') AS res └─────────────────────┘ ``` +Alias: `REGEXP_REPLACE`. + ## regexpQuoteMeta(s) The function adds a backslash before some predefined characters in the string. diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.h b/src/AggregateFunctions/AggregateFunctionQuantile.h index 85eaf6ceba6..93cff6326ba 100644 --- a/src/AggregateFunctions/AggregateFunctionQuantile.h +++ b/src/AggregateFunctions/AggregateFunctionQuantile.h @@ -26,9 +26,11 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_ARGUMENTS; } template class QuantileTiming; +template class QuantileApprox; /** Generic aggregate function for calculation of quantiles. @@ -60,6 +62,7 @@ private: using ColVecType = ColumnVectorOrDecimal; static constexpr bool returns_float = !(std::is_same_v); + static constexpr bool is_quantile_approx = std::is_same_v>; static_assert(!is_decimal || !returns_float); QuantileLevels levels; @@ -67,22 +70,57 @@ private: /// Used when there are single level to get. Float64 level = 0.5; + /// Used for the approximate version of the algorithm (Greenwald-Khanna) + ssize_t accuracy = 10000; + DataTypePtr & argument_type; public: AggregateFunctionQuantile(const DataTypes & argument_types_, const Array & params) : IAggregateFunctionDataHelper>( argument_types_, params, createResultType(argument_types_)) - , levels(params, returns_many) + , levels(is_quantile_approx && !params.empty() ? Array(params.begin() + 1, params.end()) : params, returns_many) , level(levels.levels[0]) , argument_type(this->argument_types[0]) { if (!returns_many && levels.size() > 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require one parameter or less", getName()); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires one level parameter or less", getName()); + + if constexpr (is_quantile_approx) + { + if (params.empty()) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one param", getName()); + + const auto & accuracy_field = params[0]; + if (!isInt64OrUInt64FieldType(accuracy_field.getType())) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires accuracy parameter with integer type", getName()); + + if (accuracy_field.getType() == Field::Types::Int64) + accuracy = accuracy_field.get(); + else + accuracy = accuracy_field.get(); + + if (accuracy <= 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Aggregate function {} requires accuracy parameter with positive value but is {}", + getName(), + accuracy); + } } String getName() const override { return Name::name; } + void create(AggregateDataPtr __restrict place) const override /// NOLINT + { + if constexpr (is_quantile_approx) + new (place) Data(accuracy); + else + new (place) Data; + } + static DataTypePtr createResultType(const DataTypes & argument_types_) { DataTypePtr res; @@ -257,4 +295,7 @@ struct NameQuantilesBFloat16 { static constexpr auto name = "quantilesBFloat16"; struct NameQuantileBFloat16Weighted { static constexpr auto name = "quantileBFloat16Weighted"; }; struct NameQuantilesBFloat16Weighted { static constexpr auto name = "quantilesBFloat16Weighted"; }; +struct NameQuantileApprox { static constexpr auto name = "quantileApprox"; }; +struct NameQuantilesApprox { static constexpr auto name = "quantilesApprox"; }; + } diff --git a/src/AggregateFunctions/AggregateFunctionQuantileApprox.cpp b/src/AggregateFunctions/AggregateFunctionQuantileApprox.cpp new file mode 100644 index 00000000000..2a913fdc956 --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionQuantileApprox.cpp @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + +template using FuncQuantileApprox = AggregateFunctionQuantile, NameQuantileApprox, false, void, false>; +template using FuncQuantilesApprox = AggregateFunctionQuantile, NameQuantilesApprox, false, void, true>; + +template