diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index c2a893a8e99..91dcb6a4968 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -106,7 +106,8 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} # stage for jobs that do not prohibit merge Tests_3: - needs: [RunConfig, Builds_1] + # Test_3 should not wait for Test_1/Test_2 and should not be blocked by them on master branch since all jobs need to run there. + needs: [RunConfig, Builds_1, Builds_2] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} uses: ./.github/workflows/reusable_test_stage.yml with: diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 7d22554473e..e4deaf9f35e 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -135,7 +135,7 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} # stage for jobs that do not prohibit merge Tests_3: - needs: [RunConfig, Tests_1, Tests_2] + needs: [RunConfig, Builds_1, Tests_1, Builds_2, Tests_2] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} uses: ./.github/workflows/reusable_test_stage.yml with: diff --git a/CMakeLists.txt b/CMakeLists.txt index 601cbe7201c..455adc24182 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -122,7 +122,7 @@ add_library(global-libs INTERFACE) include (cmake/sanitize.cmake) -include (cmake/instrument.cmake) +include (cmake/xray_instrumentation.cmake) option(ENABLE_COLORED_BUILD "Enable colors in compiler output" ON) diff --git a/SECURITY.md b/SECURITY.md index f8511fb42d6..8635951dc0e 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -19,7 +19,10 @@ The following versions of ClickHouse server are currently supported with securit | 24.3 | ✔️ | | 24.2 | ❌ | | 24.1 | ❌ | -| 23.* | ❌ | +| 23.12 | ❌ | +| 23.11 | ❌ | +| 23.10 | ❌ | +| 23.9 | ❌ | | 23.8 | ✔️ | | 23.7 | ❌ | | 23.6 | ❌ | diff --git a/cmake/instrument.cmake b/cmake/xray_instrumentation.cmake similarity index 91% rename from cmake/instrument.cmake rename to cmake/xray_instrumentation.cmake index bd2fb4d45fc..661c0575e54 100644 --- a/cmake/instrument.cmake +++ b/cmake/xray_instrumentation.cmake @@ -7,7 +7,7 @@ if (NOT ENABLE_XRAY) return() endif() -if (NOT (ARCH_AMD64 AND (OS_LINUX OR OS_FREEBSD))) +if (NOT (ARCH_AMD64 AND OS_LINUX)) message (STATUS "Not using LLVM XRay, only amd64 Linux or FreeBSD are supported") return() endif() diff --git a/docs/en/sql-reference/data-types/boolean.md b/docs/en/sql-reference/data-types/boolean.md index 4c59bd947de..6fcbc218c5d 100644 --- a/docs/en/sql-reference/data-types/boolean.md +++ b/docs/en/sql-reference/data-types/boolean.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/data-types/boolean sidebar_position: 22 -sidebar_label: Boolean +sidebar_label: Bool --- # Bool diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index 18c7816f811..9f82c2f093a 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -6,101 +6,106 @@ sidebar_label: Map(K, V) # Map(K, V) -`Map(K, V)` data type stores `key:value` pairs. -The Map datatype is implemented as `Array(Tuple(key T1, value T2))`, which means that the order of keys in each map does not change, i.e., this data type maintains insertion order. +Data type `Map(K, V)` stores key-value pairs. + +Unlike other databases, maps are not unique in ClickHouse, i.e. a map can contain two elements with the same key. +(The reason for that is that maps are internally implemented as `Array(Tuple(K, V))`.) + +You can use use syntax `m[k]` to obtain the value for key `k` in map `m`. +Also, `m[k]` scans the map, i.e. the runtime of the operation is linear in the size of the map. **Parameters** -- `key` — The key part of the pair. Arbitrary type, except [Nullable](../../sql-reference/data-types/nullable.md) and [LowCardinality](../../sql-reference/data-types/lowcardinality.md) nested with [Nullable](../../sql-reference/data-types/nullable.md) types. -- `value` — The value part of the pair. Arbitrary type, including [Map](../../sql-reference/data-types/map.md) and [Array](../../sql-reference/data-types/array.md). - -To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax. This lookup works now with a linear complexity. +- `K` — The type of the Map keys. Arbitrary type except [Nullable](../../sql-reference/data-types/nullable.md) and [LowCardinality](../../sql-reference/data-types/lowcardinality.md) nested with [Nullable](../../sql-reference/data-types/nullable.md) types. +- `V` — The type of the Map values. Arbitrary type. **Examples** -Consider the table: +Create a table with a column of type map: ``` sql -CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory; -INSERT INTO table_map VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30}); +CREATE TABLE tab (m Map(String, UInt64)) ENGINE=Memory; +INSERT INTO tab VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30}); ``` -Select all `key2` values: +To select `key2` values: ```sql -SELECT a['key2'] FROM table_map; +SELECT m['key2'] FROM tab; ``` + Result: ```text -┌─arrayElement(a, 'key2')─┐ +┌─arrayElement(m, 'key2')─┐ │ 10 │ │ 20 │ │ 30 │ └─────────────────────────┘ ``` -If there's no such `key` in the `Map()` column, the query returns zeros for numerical values, empty strings or empty arrays. +If the requested key `k` is not contained in the map, `m[k]` returns the value type's default value, e.g. `0` for integer types and `''` for string types. +To check whether a key exists in a map, you can use function [mapContains](../../sql-reference/functions/tuple-map-functions#mapcontains). ```sql -INSERT INTO table_map VALUES ({'key3':100}), ({}); -SELECT a['key3'] FROM table_map; +CREATE TABLE tab (m Map(String, UInt64)) ENGINE=Memory; +INSERT INTO tab VALUES ({'key1':100}), ({}); +SELECT m['key1'] FROM tab; ``` Result: ```text -┌─arrayElement(a, 'key3')─┐ +┌─arrayElement(m, 'key1')─┐ │ 100 │ │ 0 │ └─────────────────────────┘ -┌─arrayElement(a, 'key3')─┐ -│ 0 │ -│ 0 │ -│ 0 │ -└─────────────────────────┘ ``` -## Convert Tuple to Map Type +## Converting Tuple to Map -You can cast `Tuple()` as `Map()` using [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function: +Values of type `Tuple()` can be casted to values of type `Map()` using function [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast): + +**Example** + +Query: ``` sql SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map; ``` +Result: + ``` text ┌─map───────────────────────────┐ │ {1:'Ready',2:'Steady',3:'Go'} │ └───────────────────────────────┘ ``` -## Map.keys and Map.values Subcolumns +## Reading subcolumns of Map -To optimize `Map` column processing, in some cases you can use the `keys` and `values` subcolumns instead of reading the whole column. +To avoid reading the entire map, you can use subcolumns `keys` and `values` in some cases. **Example** Query: ``` sql -CREATE TABLE t_map (`a` Map(String, UInt64)) ENGINE = Memory; +CREATE TABLE tab (m Map(String, UInt64)) ENGINE = Memory; +INSERT INTO tab VALUES (map('key1', 1, 'key2', 2, 'key3', 3)); -INSERT INTO t_map VALUES (map('key1', 1, 'key2', 2, 'key3', 3)); - -SELECT a.keys FROM t_map; - -SELECT a.values FROM t_map; +SELECT m.keys FROM tab; -- same as mapKeys(m) +SELECT m.values FROM tab; -- same as mapValues(m) ``` Result: ``` text -┌─a.keys─────────────────┐ +┌─m.keys─────────────────┐ │ ['key1','key2','key3'] │ └────────────────────────┘ -┌─a.values─┐ +┌─m.values─┐ │ [1,2,3] │ └──────────┘ ``` diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index d9c18e2a0a2..ad40725d680 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -6,7 +6,7 @@ sidebar_label: Maps ## map -Arranges `key:value` pairs into [Map(key, value)](../data-types/map.md) data type. +Creates a value of type [Map(key, value)](../data-types/map.md) from key-value pairs. **Syntax** @@ -16,12 +16,12 @@ map(key1, value1[, key2, value2, ...]) **Arguments** -- `key` — The key part of the pair. Arbitrary type, except [Nullable](../data-types/nullable.md) and [LowCardinality](../data-types/lowcardinality.md) nested with [Nullable](../data-types/nullable.md). -- `value` — The value part of the pair. Arbitrary type, including [Map](../data-types/map.md) and [Array](../data-types/array.md). +- `key_n` — The keys of the map entries. Any type supported as key type of [Map](../data-types/map.md). +- `value_n` — The values of the map entries. Any type supported as value type of [Map](../data-types/map.md). **Returned value** -- Data structure as `key:value` pairs. [Map(key, value)](../data-types/map.md). +- A map containing `key:value` pairs. [Map(key, value)](../data-types/map.md). **Examples** @@ -41,35 +41,16 @@ Result: └──────────────────────────────────────────────────┘ ``` -Query: - -```sql -CREATE TABLE table_map (a Map(String, UInt64)) ENGINE = MergeTree() ORDER BY a; -INSERT INTO table_map SELECT map('key1', number, 'key2', number * 2) FROM numbers(3); -SELECT a['key2'] FROM table_map; -``` - -Result: - -```text -┌─arrayElement(a, 'key2')─┐ -│ 0 │ -│ 2 │ -│ 4 │ -└─────────────────────────┘ -``` - -**See Also** - -- [Map(key, value)](../data-types/map.md) data type - ## mapFromArrays -Merges an [Array](../data-types/array.md) of keys and an [Array](../data-types/array.md) of values into a [Map(key, value)](../data-types/map.md). Notice that the second argument could also be a [Map](../data-types/map.md), thus it is casted to an Array when executing. +Creates a map from an array of keys and an array of values. +The function is a convenient alternative to syntax `CAST([...], 'Map(key_type, value_type)')`. +For example, instead of writing +- `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, or +- `CAST([('aa',4), ('bb',5)], 'Map(String, UInt32)')` -The function is a more convenient alternative to `CAST((key_array, value_array_or_map), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`. - +you can write `mapFromArrays(['aa', 'bb'], [4, 5])`. **Syntax** @@ -81,12 +62,12 @@ Alias: `MAP_FROM_ARRAYS(keys, values)` **Arguments** -- `keys` — Given key array to create a map from. The nested type of array must be: [String](../data-types/string.md), [Integer](../data-types/int-uint.md), [LowCardinality](../data-types/lowcardinality.md), [FixedString](../data-types/fixedstring.md), [UUID](../data-types/uuid.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [Date32](../data-types/date32.md), [Enum](../data-types/enum.md) -- `values` - Given value array or map to create a map from. +- `keys` — Array of keys to create the map from. [Array(T)](../data-types/array.md) where `T` can be any type supported by [Map](../data-types/map.md) as key type. +- `values` - Array or map of values to create the map from. [Array](../data-types/array.md) or [Map](../data-types/map.md). **Returned value** -- A map whose keys and values are constructed from the key array and value array/map. +- A map with keys and values constructed from the key array and value array/map. **Example** @@ -94,14 +75,25 @@ Query: ```sql select mapFromArrays(['a', 'b', 'c'], [1, 2, 3]) +``` +Result: +``` ┌─mapFromArrays(['a', 'b', 'c'], [1, 2, 3])─┐ │ {'a':1,'b':2,'c':3} │ └───────────────────────────────────────────┘ +``` +`mapFromArrays` also accepts arguments of type [Map](../data-types/map.md). These are casted to array of tuples during execution. + +```sql SELECT mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3)) +``` +Result: + +``` ┌─mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3))─┐ │ {1:('a',1),2:('b',2),3:('c',3)} │ └───────────────────────────────────────────────────────┘ @@ -109,9 +101,11 @@ SELECT mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3)) ## extractKeyValuePairs -Extracts key-value pairs, i.e. a [Map(String, String)](../data-types/map.md), from a string. Parsing is robust towards noise (e.g. log files). - -A key-value pair consists of a key, followed by a `key_value_delimiter` and a value. Key value pairs must be separated by `pair_delimiter`. Quoted keys and values are also supported. +Converts a string of key-value pairs to a [Map(String, String)](../data-types/map.md). +Parsing is tolerant towards noise (e.g. log files). +Key-value pairs in the input string consist of a key, followed by a key-value delimiter, and a value. +Key value pairs are separated by a pair delimiter. +Keys and values can be quoted. **Syntax** @@ -126,17 +120,17 @@ Alias: **Arguments** - `data` - String to extract key-value pairs from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `key_value_delimiter` - Character to be used as delimiter between the key and the value. Defaults to `:`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `pair_delimiters` - Set of character to be used as delimiters between pairs. Defaults to ` `, `,` and `;`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `quoting_character` - Character to be used as quoting character. Defaults to `"`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `key_value_delimiter` - Single character delimiting keys and values. Defaults to `:`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `pair_delimiters` - Set of character delimiting pairs. Defaults to ` `, `,` and `;`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `quoting_character` - Single character used as quoting character. Defaults to `"`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned values** -- A [Map(String, String)](../data-types/map.md) of key-value pairs. +- A of key-value pairs. Type: [Map(String, String)](../data-types/map.md) **Examples** -Simple case: +Query ``` sql SELECT extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') as kv @@ -150,7 +144,7 @@ Result: └─────────────────────────────────────────────────────────────────────────┘ ``` -Single quote as quoting character: +With a single quote `'` as quoting character: ``` sql SELECT extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv @@ -178,9 +172,29 @@ Result: └────────────────────────┘ ``` +To restore a map string key-value pairs serialized with `toString`: + +```sql +SELECT + map('John', '33', 'Paula', '31') AS m, + toString(m) as map_serialized, + extractKeyValuePairs(map_serialized, ':', ',', '\'') AS map_restored +FORMAT Vertical; +``` + +Result: + +``` +Row 1: +────── +m: {'John':'33','Paula':'31'} +map_serialized: {'John':'33','Paula':'31'} +map_restored: {'John':'33','Paula':'31'} +``` + ## extractKeyValuePairsWithEscaping -Same as `extractKeyValuePairs` but with escaping support. +Same as `extractKeyValuePairs` but supports escaping. Supported escape sequences: `\x`, `\N`, `\a`, `\b`, `\e`, `\f`, `\n`, `\r`, `\t`, `\v` and `\0`. Non standard escape sequences are returned as it is (including the backslash) unless they are one of the following: @@ -229,20 +243,6 @@ Arguments are [maps](../data-types/map.md) or [tuples](../data-types/tuple.md#tu **Example** -Query with a tuple: - -```sql -SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTypeName(res) as type; -``` - -Result: - -```text -┌─res───────────┬─type───────────────────────────────┐ -│ ([1,2],[2,2]) │ Tuple(Array(UInt8), Array(UInt64)) │ -└───────────────┴────────────────────────────────────┘ -``` - Query with `Map` type: ```sql @@ -257,6 +257,20 @@ Result: └──────────────────────────────┘ ``` +Query with a tuple: + +```sql +SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTypeName(res) as type; +``` + +Result: + +```text +┌─res───────────┬─type───────────────────────────────┐ +│ ([1,2],[2,2]) │ Tuple(Array(UInt8), Array(UInt64)) │ +└───────────────┴────────────────────────────────────┘ +``` + ## mapSubtract Collect all the keys and subtract corresponding values. @@ -277,20 +291,6 @@ Arguments are [maps](../data-types/map.md) or [tuples](../data-types/tuple.md#tu **Example** -Query with a tuple map: - -```sql -SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt32(2), 1])) as res, toTypeName(res) as type; -``` - -Result: - -```text -┌─res────────────┬─type──────────────────────────────┐ -│ ([1,2],[-1,0]) │ Tuple(Array(UInt8), Array(Int64)) │ -└────────────────┴───────────────────────────────────┘ -``` - Query with `Map` type: ```sql @@ -305,55 +305,57 @@ Result: └───────────────────────────────────┘ ``` -## mapPopulateSeries - -Fills missing keys in the maps (key and value array pair), where keys are integers. Also, it supports specifying the max key, which is used to extend the keys array. - -**Syntax** +Query with a tuple map: ```sql -mapPopulateSeries(keys, values[, max]) -mapPopulateSeries(map[, max]) -``` - -Generates a map (a tuple with two arrays or a value of `Map` type, depending on the arguments), where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from the map with a step size of one, and corresponding values. If the value is not specified for the key, then it uses the default value in the resulting map. For repeated keys, only the first value (in order of appearing) gets associated with the key. - -For array arguments the number of elements in `keys` and `values` must be the same for each row. - -**Arguments** - -Arguments are [maps](../data-types/map.md) or two [arrays](../data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key. - -Mapped arrays: - -- `keys` — Array of keys. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)). -- `values` — Array of values. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)). -- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../data-types/int-uint.md#int-ranges). - -or - -- `map` — Map with integer keys. [Map](../data-types/map.md). - -**Returned value** - -- Depending on the arguments returns a [map](../data-types/map.md) or a [tuple](../data-types/tuple.md#tuplet1-t2) of two [arrays](../data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys. - -**Example** - -Query with mapped arrays: - -```sql -SELECT mapPopulateSeries([1,2,4], [11,22,44], 5) AS res, toTypeName(res) AS type; +SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt32(2), 1])) as res, toTypeName(res) as type; ``` Result: ```text -┌─res──────────────────────────┬─type──────────────────────────────┐ -│ ([1,2,3,4,5],[11,22,0,44,0]) │ Tuple(Array(UInt8), Array(UInt8)) │ -└──────────────────────────────┴───────────────────────────────────┘ +┌─res────────────┬─type──────────────────────────────┐ +│ ([1,2],[-1,0]) │ Tuple(Array(UInt8), Array(Int64)) │ +└────────────────┴───────────────────────────────────┘ ``` +## mapPopulateSeries + +Fills missing key-value pairs in a map with integer keys. +To support extending the keys beyond the largest value, a maximum key can be specified. +More specifically, the function returns a map in which the the keys form a series from the smallest to the largest key (or `max` argument if it specified) with step size of 1, and corresponding values. +If no value is specified for a key, a default value is used as value. +In case keys repeat, only the first value (in order of appearance) is associated with the key. + +**Syntax** + +```sql +mapPopulateSeries(map[, max]) +mapPopulateSeries(keys, values[, max]) +``` + +For array arguments the number of elements in `keys` and `values` must be the same for each row. + +**Arguments** + +Arguments are [Maps](../data-types/map.md) or two [Arrays](../data-types/array.md#data-type-array), where the first and second array contains keys and values for the each key. + +Mapped arrays: + +- `map` — Map with integer keys. [Map](../data-types/map.md). + +or + +- `keys` — Array of keys. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)). +- `values` — Array of values. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)). +- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../data-types/int-uint.md#int-ranges). + +**Returned value** + +- Depending on the arguments a [Map](../data-types/map.md) or a [Tuple](../data-types/tuple.md#tuplet1-t2) of two [Arrays](../data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys. + +**Example** + Query with `Map` type: ```sql @@ -368,9 +370,23 @@ Result: └─────────────────────────────────────────┘ ``` +Query with mapped arrays: + +```sql +SELECT mapPopulateSeries([1,2,4], [11,22,44], 5) AS res, toTypeName(res) AS type; +``` + +Result: + +```text +┌─res──────────────────────────┬─type──────────────────────────────┐ +│ ([1,2,3,4,5],[11,22,0,44,0]) │ Tuple(Array(UInt8), Array(UInt8)) │ +└──────────────────────────────┴───────────────────────────────────┘ +``` + ## mapContains -Determines whether the `map` contains the `key` parameter. +Returns if a given key is contained in a given map. **Syntax** @@ -381,7 +397,7 @@ mapContains(map, key) **Arguments** - `map` — Map. [Map](../data-types/map.md). -- `key` — Key. Type matches the type of keys of `map` parameter. +- `key` — Key. Type must match the key type of `map`. **Returned value** @@ -392,11 +408,11 @@ mapContains(map, key) Query: ```sql -CREATE TABLE test (a Map(String,String)) ENGINE = Memory; +CREATE TABLE tab (a Map(String, String)) ENGINE = Memory; -INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); +INSERT INTO tab VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); -SELECT mapContains(a, 'name') FROM test; +SELECT mapContains(a, 'name') FROM tab; ``` @@ -411,9 +427,11 @@ Result: ## mapKeys -Returns all keys from the `map` parameter. +Returns the keys of a given map. -Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [keys](../data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapKeys(m) FROM table` transforms to `SELECT m.keys FROM table`. +This function can be optimized by enabling setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). +With enabled setting, the function only reads the [keys](../data-types/map.md#map-subcolumns) subcolumn instead the whole map. +The query `SELECT mapKeys(m) FROM table` is transformed to `SELECT m.keys FROM table`. **Syntax** @@ -434,11 +452,11 @@ mapKeys(map) Query: ```sql -CREATE TABLE test (a Map(String,String)) ENGINE = Memory; +CREATE TABLE tab (a Map(String, String)) ENGINE = Memory; -INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); +INSERT INTO tab VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); -SELECT mapKeys(a) FROM test; +SELECT mapKeys(a) FROM tab; ``` Result: @@ -452,9 +470,11 @@ Result: ## mapValues -Returns all values from the `map` parameter. +Returns the values of a given map. -Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [values](../data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapValues(m) FROM table` transforms to `SELECT m.values FROM table`. +This function can be optimized by enabling setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). +With enabled setting, the function only reads the [values](../data-types/map.md#map-subcolumns) subcolumn instead the whole map. +The query `SELECT mapValues(m) FROM table` is transformed to `SELECT m.values FROM table`. **Syntax** @@ -475,11 +495,11 @@ mapValues(map) Query: ```sql -CREATE TABLE test (a Map(String,String)) ENGINE = Memory; +CREATE TABLE tab (a Map(String, String)) ENGINE = Memory; -INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); +INSERT INTO tab VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); -SELECT mapValues(a) FROM test; +SELECT mapValues(a) FROM tab; ``` Result: @@ -512,11 +532,11 @@ mapContainsKeyLike(map, pattern) Query: ```sql -CREATE TABLE test (a Map(String,String)) ENGINE = Memory; +CREATE TABLE tab (a Map(String, String)) ENGINE = Memory; -INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'}); +INSERT INTO tab VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'}); -SELECT mapContainsKeyLike(a, 'a%') FROM test; +SELECT mapContainsKeyLike(a, 'a%') FROM tab; ``` Result: @@ -530,6 +550,8 @@ Result: ## mapExtractKeyLike +Give a map with string keys and a LIKE pattern, this function returns a map with elements where the key matches the pattern. + **Syntax** ```sql @@ -543,18 +565,18 @@ mapExtractKeyLike(map, pattern) **Returned value** -- A map contained elements the key of which matches the specified pattern. If there are no elements matched the pattern, it will return an empty map. +- A map containing elements the key matching the specified pattern. If no elements match the pattern, an empty map is returned. **Example** Query: ```sql -CREATE TABLE test (a Map(String,String)) ENGINE = Memory; +CREATE TABLE tab (a Map(String, String)) ENGINE = Memory; -INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'}); +INSERT INTO tab VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'}); -SELECT mapExtractKeyLike(a, 'a%') FROM test; +SELECT mapExtractKeyLike(a, 'a%') FROM tab; ``` Result: @@ -568,6 +590,8 @@ Result: ## mapApply +Applies a function to each element of a map. + **Syntax** ```sql @@ -608,6 +632,8 @@ Result: ## mapFilter +Filters a map by applying a function to each map element. + **Syntax** ```sql @@ -623,7 +649,6 @@ mapFilter(func, map) - Returns a map containing only the elements in `map` for which `func(map1[i], ..., mapN[i])` returns something other than 0. - **Example** Query: @@ -647,7 +672,6 @@ Result: └─────────────────────┘ ``` - ## mapUpdate **Syntax** @@ -683,6 +707,9 @@ Result: ## mapConcat +Concatenates multiple maps based on the equality of their keys. +If elements with the same key exist in more than one input map, all elements are added to the result map, but only the first one is accessible via operator `[]` + **Syntax** ```sql @@ -691,11 +718,11 @@ mapConcat(maps) **Arguments** -- `maps` – Arbitrary number of arguments of [Map](../data-types/map.md) type. +- `maps` – Arbitrarily many [Maps](../data-types/map.md). **Returned value** -- Returns a map with concatenated maps passed as arguments. If there are same keys in two or more maps, all of them are added to the result map, but only the first one is accessible via operator `[]` +- Returns a map with concatenated maps passed as arguments. **Examples** @@ -729,9 +756,12 @@ Result: ## mapExists(\[func,\], map) -Returns 1 if there is at least one key-value pair in `map` for which `func(key, value)` returns something other than 0. Otherwise, it returns 0. +Returns 1 if at least one key-value pair in `map` exists for which `func(key, value)` returns something other than 0. Otherwise, it returns 0. -Note that the `mapExists` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. +:::note +`mapExists` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). +You can pass a lambda function to it as the first argument. +::: **Example** @@ -743,7 +773,7 @@ SELECT mapExists((k, v) -> (v = 1), map('k1', 1, 'k2', 2)) AS res Result: -```text +``` ┌─res─┐ │ 1 │ └─────┘ @@ -753,7 +783,10 @@ Result: Returns 1 if `func(key, value)` returns something other than 0 for all key-value pairs in `map`. Otherwise, it returns 0. -Note that the `mapAll` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. +:::note +Note that the `mapAll` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). +You can pass a lambda function to it as the first argument. +::: **Example** @@ -765,7 +798,7 @@ SELECT mapAll((k, v) -> (v = 1), map('k1', 1, 'k2', 2)) AS res Result: -```text +``` ┌─res─┐ │ 0 │ └─────┘ @@ -773,7 +806,8 @@ Result: ## mapSort(\[func,\], map) -Sorts the elements of the `map` in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the keys and values of the map. +Sorts the elements of a map in ascending order. +If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map. **Examples** @@ -801,8 +835,8 @@ For more details see the [reference](../../sql-reference/functions/array-functio ## mapReverseSort(\[func,\], map) -Sorts the elements of the `map` in descending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the keys and values of the map. - +Sorts the elements of a map in descending order. +If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map. **Examples** @@ -826,4 +860,4 @@ SELECT mapReverseSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map; └──────────────────────────────┘ ``` -For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort) for `arrayReverseSort` function. +For more details see function [arrayReverseSort](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort). diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp index 7518227a070..639e0957f49 100644 --- a/programs/keeper-converter/KeeperConverter.cpp +++ b/programs/keeper-converter/KeeperConverter.cpp @@ -57,7 +57,7 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv) DB::KeeperSnapshotManager manager(1, keeper_context); auto snp = manager.serializeSnapshotToBuffer(snapshot); auto file_info = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID()); - std::cout << "Snapshot serialized to path:" << fs::path(file_info.disk->getPath()) / file_info.path << std::endl; + std::cout << "Snapshot serialized to path:" << fs::path(file_info->disk->getPath()) / file_info->path << std::endl; } catch (...) { diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index 03730fcedaa..f5b700ea529 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -598,7 +598,7 @@ DataTypePtr QueryFuzzer::fuzzDataType(DataTypePtr type) { auto key_type = fuzzDataType(type_map->getKeyType()); auto value_type = fuzzDataType(type_map->getValueType()); - if (!DataTypeMap::checkKeyType(key_type)) + if (!DataTypeMap::isValidKeyType(key_type)) key_type = type_map->getKeyType(); return std::make_shared(key_type, value_type); diff --git a/src/Common/CopyableAtomic.h b/src/Common/CopyableAtomic.h index 227fffe927f..199f0cac134 100644 --- a/src/Common/CopyableAtomic.h +++ b/src/Common/CopyableAtomic.h @@ -13,8 +13,9 @@ struct CopyableAtomic : value(other.value.load()) {} - explicit CopyableAtomic(T && value_) - : value(std::forward(value_)) + template U> + explicit CopyableAtomic(U && value_) + : value(std::forward(value_)) {} CopyableAtomic & operator=(const CopyableAtomic & other) @@ -23,9 +24,10 @@ struct CopyableAtomic return *this; } - CopyableAtomic & operator=(bool value_) + template U> + CopyableAtomic & operator=(U && value_) { - value = value_; + value = std::forward(value_); return *this; } diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 28902bc8591..8de9f8dfa1c 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -305,7 +305,7 @@ String MonitorCommand::run() print(ret, "ephemerals_count", state_machine.getTotalEphemeralNodesCount()); print(ret, "approximate_data_size", state_machine.getApproximateDataSize()); print(ret, "key_arena_size", state_machine.getKeyArenaSize()); - print(ret, "latest_snapshot_size", state_machine.getLatestSnapshotBufSize()); + print(ret, "latest_snapshot_size", state_machine.getLatestSnapshotSize()); #if defined(OS_LINUX) || defined(OS_DARWIN) print(ret, "open_file_descriptor_count", getCurrentProcessFDCount()); diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 84e1632e9b1..b4389da082d 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -332,9 +332,10 @@ void KeeperDispatcher::snapshotThread() if (shutdown_called) break; - if (snapshot_file_info.path.empty()) + if (!snapshot_file_info) continue; + chassert(snapshot_file_info->disk != nullptr); if (isLeader()) snapshot_s3.uploadSnapshot(snapshot_file_info); } diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index f25ccab86b1..8bf42b434cd 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -618,7 +618,7 @@ KeeperSnapshotManager::KeeperSnapshotManager( LOG_TRACE(log, "Found {} on {}", snapshot_file, disk->getName()); size_t snapshot_up_to = getSnapshotPathUpToLogIdx(snapshot_file); - auto [_, inserted] = existing_snapshots.insert_or_assign(snapshot_up_to, SnapshotFileInfo{snapshot_file, disk}); + auto [_, inserted] = existing_snapshots.insert_or_assign(snapshot_up_to, std::make_shared(snapshot_file, disk)); if (!inserted) LOG_WARNING( @@ -651,7 +651,7 @@ KeeperSnapshotManager::KeeperSnapshotManager( moveSnapshotsIfNeeded(); } -SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx) +SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx) { ReadBufferFromNuraftBuffer reader(buffer); @@ -672,11 +672,12 @@ SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::bu disk->removeFile(tmp_snapshot_file_name); - existing_snapshots.emplace(up_to_log_idx, SnapshotFileInfo{snapshot_file_name, disk}); + auto snapshot_file_info = std::make_shared(snapshot_file_name, disk); + existing_snapshots.emplace(up_to_log_idx, snapshot_file_info); removeOutdatedSnapshotsIfNeeded(); moveSnapshotsIfNeeded(); - return {snapshot_file_name, disk}; + return snapshot_file_info; } nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBufferFromDisk() @@ -690,7 +691,7 @@ nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBuff } catch (const DB::Exception &) { - const auto & [path, disk] = latest_itr->second; + const auto & [path, disk, size] = *latest_itr->second; disk->removeFile(path); existing_snapshots.erase(latest_itr->first); tryLogCurrentException(__PRETTY_FUNCTION__); @@ -702,7 +703,7 @@ nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBuff nuraft::ptr KeeperSnapshotManager::deserializeSnapshotBufferFromDisk(uint64_t up_to_log_idx) const { - const auto & [snapshot_path, snapshot_disk] = existing_snapshots.at(up_to_log_idx); + const auto & [snapshot_path, snapshot_disk, size] = *existing_snapshots.at(up_to_log_idx); WriteBufferFromNuraftBuffer writer; auto reader = snapshot_disk->readFile(snapshot_path); copyData(*reader, writer); @@ -794,18 +795,18 @@ void KeeperSnapshotManager::moveSnapshotsIfNeeded() { if (idx == latest_snapshot_idx) { - if (file_info.disk != latest_snapshot_disk) + if (file_info->disk != latest_snapshot_disk) { - moveSnapshotBetweenDisks(file_info.disk, file_info.path, latest_snapshot_disk, file_info.path, keeper_context); - file_info.disk = latest_snapshot_disk; + moveSnapshotBetweenDisks(file_info->disk, file_info->path, latest_snapshot_disk, file_info->path, keeper_context); + file_info->disk = latest_snapshot_disk; } } else { - if (file_info.disk != disk) + if (file_info->disk != disk) { - moveSnapshotBetweenDisks(file_info.disk, file_info.path, disk, file_info.path, keeper_context); - file_info.disk = disk; + moveSnapshotBetweenDisks(file_info->disk, file_info->path, disk, file_info->path, keeper_context); + file_info->disk = disk; } } } @@ -817,12 +818,12 @@ void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx) auto itr = existing_snapshots.find(log_idx); if (itr == existing_snapshots.end()) throw Exception(ErrorCodes::UNKNOWN_SNAPSHOT, "Unknown snapshot with log index {}", log_idx); - const auto & [path, disk] = itr->second; + const auto & [path, disk, size] = *itr->second; disk->removeFileIfExists(path); existing_snapshots.erase(itr); } -SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot) +SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot) { auto up_to_log_idx = snapshot.snapshot_meta->get_last_log_idx(); auto snapshot_file_name = getSnapshotFileName(up_to_log_idx, compress_snapshots_zstd); @@ -847,7 +848,8 @@ SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStor disk->removeFile(tmp_snapshot_file_name); - existing_snapshots.emplace(up_to_log_idx, SnapshotFileInfo{snapshot_file_name, disk}); + auto snapshot_file_info = std::make_shared(snapshot_file_name, disk); + existing_snapshots.emplace(up_to_log_idx, snapshot_file_info); try { @@ -859,7 +861,7 @@ SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStor tryLogCurrentException(log, "Failed to cleanup and/or move older snapshots"); } - return {snapshot_file_name, disk}; + return snapshot_file_info; } size_t KeeperSnapshotManager::getLatestSnapshotIndex() const @@ -869,23 +871,23 @@ size_t KeeperSnapshotManager::getLatestSnapshotIndex() const return 0; } -SnapshotFileInfo KeeperSnapshotManager::getLatestSnapshotInfo() const +SnapshotFileInfoPtr KeeperSnapshotManager::getLatestSnapshotInfo() const { if (!existing_snapshots.empty()) { - const auto & [path, disk] = existing_snapshots.at(getLatestSnapshotIndex()); + const auto & [path, disk, size] = *existing_snapshots.at(getLatestSnapshotIndex()); try { if (disk->exists(path)) - return {path, disk}; + return std::make_shared(path, disk); } catch (...) { tryLogCurrentException(log); } } - return {"", nullptr}; + return nullptr; } } diff --git a/src/Coordination/KeeperSnapshotManager.h b/src/Coordination/KeeperSnapshotManager.h index 8ba0f92a564..36bf6c8775c 100644 --- a/src/Coordination/KeeperSnapshotManager.h +++ b/src/Coordination/KeeperSnapshotManager.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include namespace DB @@ -93,12 +94,20 @@ public: struct SnapshotFileInfo { + SnapshotFileInfo(std::string path_, DiskPtr disk_) + : path(std::move(path_)) + , disk(std::move(disk_)) + {} + std::string path; DiskPtr disk; + mutable std::atomic size{0}; }; +using SnapshotFileInfoPtr = std::shared_ptr; + using KeeperStorageSnapshotPtr = std::shared_ptr; -using CreateSnapshotCallback = std::function; +using CreateSnapshotCallback = std::function(KeeperStorageSnapshotPtr &&, bool)>; using SnapshotMetaAndStorage = std::pair; @@ -121,10 +130,10 @@ public: nuraft::ptr serializeSnapshotToBuffer(const KeeperStorageSnapshot & snapshot) const; /// Serialize already compressed snapshot to disk (return path) - SnapshotFileInfo serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx); + SnapshotFileInfoPtr serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx); /// Serialize snapshot directly to disk - SnapshotFileInfo serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot); + SnapshotFileInfoPtr serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot); SnapshotDeserializationResult deserializeSnapshotFromBuffer(nuraft::ptr buffer) const; @@ -143,7 +152,7 @@ public: /// The most fresh snapshot log index we have size_t getLatestSnapshotIndex() const; - SnapshotFileInfo getLatestSnapshotInfo() const; + SnapshotFileInfoPtr getLatestSnapshotInfo() const; private: void removeOutdatedSnapshotsIfNeeded(); @@ -159,7 +168,7 @@ private: /// How many snapshots to keep before remove const size_t snapshots_to_keep; /// All existing snapshots in our path (log_index -> path) - std::map existing_snapshots; + std::map existing_snapshots; /// Compress snapshots in common ZSTD format instead of custom ClickHouse block LZ4 format const bool compress_snapshots_zstd; /// Superdigest for deserialization of storage diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index b984b8ad18e..9991bef7be5 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -147,7 +147,7 @@ std::shared_ptr KeeperSnapshotManagerS void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapshot_file_info) { - const auto & [snapshot_path, snapshot_disk] = snapshot_file_info; + const auto & [snapshot_path, snapshot_disk, snapshot_size] = snapshot_file_info; try { auto s3_client = getSnapshotS3Client(); @@ -169,9 +169,9 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh ); }; - LOG_INFO(log, "Will try to upload snapshot on {} to S3", snapshot_file_info.path); + LOG_INFO(log, "Will try to upload snapshot on {} to S3", snapshot_path); - auto snapshot_file = snapshot_disk->readFile(snapshot_file_info.path); + auto snapshot_file = snapshot_disk->readFile(snapshot_path); auto snapshot_name = fs::path(snapshot_path).filename().string(); auto lock_file = fmt::format(".{}_LOCK", snapshot_name); @@ -261,31 +261,33 @@ void KeeperSnapshotManagerS3::snapshotS3Thread() while (!shutdown_called) { - SnapshotFileInfo snapshot_file_info; + SnapshotFileInfoPtr snapshot_file_info; if (!snapshots_s3_queue.pop(snapshot_file_info)) break; if (shutdown_called) break; - uploadSnapshotImpl(snapshot_file_info); + uploadSnapshotImpl(*snapshot_file_info); } } -void KeeperSnapshotManagerS3::uploadSnapshot(const SnapshotFileInfo & file_info, bool async_upload) +void KeeperSnapshotManagerS3::uploadSnapshot(const SnapshotFileInfoPtr & file_info, bool async_upload) { + chassert(file_info); + if (getSnapshotS3Client() == nullptr) return; if (async_upload) { if (!snapshots_s3_queue.push(file_info)) - LOG_WARNING(log, "Failed to add snapshot {} to S3 queue", file_info.path); + LOG_WARNING(log, "Failed to add snapshot {} to S3 queue", file_info->path); return; } - uploadSnapshotImpl(file_info); + uploadSnapshotImpl(*file_info); } void KeeperSnapshotManagerS3::startup(const Poco::Util::AbstractConfiguration & config, const MultiVersion::Version & macros) diff --git a/src/Coordination/KeeperSnapshotManagerS3.h b/src/Coordination/KeeperSnapshotManagerS3.h index d03deb60c1a..be7a9d4bd32 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.h +++ b/src/Coordination/KeeperSnapshotManagerS3.h @@ -12,8 +12,6 @@ #include #include - -#include #endif namespace DB @@ -27,13 +25,13 @@ public: /// 'macros' are used to substitute macros in endpoint of disks void updateS3Configuration(const Poco::Util::AbstractConfiguration & config, const MultiVersion::Version & macros); - void uploadSnapshot(const SnapshotFileInfo & file_info, bool async_upload = true); + void uploadSnapshot(const SnapshotFileInfoPtr & file_info, bool async_upload = true); /// 'macros' are used to substitute macros in endpoint of disks void startup(const Poco::Util::AbstractConfiguration & config, const MultiVersion::Version & macros); void shutdown(); private: - using SnapshotS3Queue = ConcurrentBoundedQueue; + using SnapshotS3Queue = ConcurrentBoundedQueue; SnapshotS3Queue snapshots_s3_queue; /// Upload new snapshots to S3 @@ -63,7 +61,7 @@ public: KeeperSnapshotManagerS3() = default; void updateS3Configuration(const Poco::Util::AbstractConfiguration &, const MultiVersion::Version &) {} - void uploadSnapshot(const SnapshotFileInfo &, [[maybe_unused]] bool async_upload = true) {} + void uploadSnapshot(const SnapshotFileInfoPtr &, [[maybe_unused]] bool async_upload = true) {} void startup(const Poco::Util::AbstractConfiguration &, const MultiVersion::Version &) {} diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 3dbdb329b93..e4d661dfe17 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -90,8 +91,9 @@ void KeeperStateMachine::init() latest_snapshot_buf = snapshot_manager.deserializeSnapshotBufferFromDisk(latest_log_index); auto snapshot_deserialization_result = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_buf); latest_snapshot_info = snapshot_manager.getLatestSnapshotInfo(); + chassert(latest_snapshot_info); - if (isLocalDisk(*latest_snapshot_info.disk)) + if (isLocalDisk(*latest_snapshot_info->disk)) latest_snapshot_buf = nullptr; storage = std::move(snapshot_deserialization_result.storage); @@ -603,7 +605,11 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res } ProfileEvents::increment(ProfileEvents::KeeperSnapshotCreations); - LOG_DEBUG(log, "Created persistent snapshot {} with path {}", latest_snapshot_meta->get_last_log_idx(), latest_snapshot_info.path); + LOG_DEBUG( + log, + "Created persistent snapshot {} with path {}", + latest_snapshot_meta->get_last_log_idx(), + latest_snapshot_info->path); } } } @@ -627,7 +633,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res when_done(ret, exception); - return ret ? latest_snapshot_info : SnapshotFileInfo{}; + return ret ? latest_snapshot_info : nullptr; }; if (keeper_context->getServerState() == KeeperContext::Phase::SHUTDOWN) @@ -635,9 +641,9 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res LOG_INFO(log, "Creating a snapshot during shutdown because 'create_snapshot_on_exit' is enabled."); auto snapshot_file_info = snapshot_task.create_snapshot(std::move(snapshot_task.snapshot), /*execute_only_cleanup=*/false); - if (!snapshot_file_info.path.empty() && snapshot_manager_s3) + if (snapshot_file_info && snapshot_manager_s3) { - LOG_INFO(log, "Uploading snapshot {} during shutdown because 'upload_snapshot_on_exit' is enabled.", snapshot_file_info.path); + LOG_INFO(log, "Uploading snapshot {} during shutdown because 'upload_snapshot_on_exit' is enabled.", snapshot_file_info->path); snapshot_manager_s3->uploadSnapshot(snapshot_file_info, /* asnyc_upload */ false); } @@ -672,7 +678,7 @@ void KeeperStateMachine::save_logical_snp_obj( latest_snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk(data, s.get_last_log_idx()); latest_snapshot_meta = cloned_meta; latest_snapshot_buf = std::move(cloned_buffer); - LOG_DEBUG(log, "Saved snapshot {} to path {}", s.get_last_log_idx(), latest_snapshot_info.path); + LOG_DEBUG(log, "Saved snapshot {} to path {}", s.get_last_log_idx(), latest_snapshot_info->path); obj_id++; ProfileEvents::increment(ProfileEvents::KeeperSaveSnapshot); } @@ -733,7 +739,7 @@ int KeeperStateMachine::read_logical_snp_obj( return -1; } - const auto & [path, disk] = latest_snapshot_info; + const auto & [path, disk, size] = *latest_snapshot_info; if (isLocalDisk(*disk)) { auto full_path = fs::path(disk->getPath()) / path; @@ -862,12 +868,27 @@ uint64_t KeeperStateMachine::getKeyArenaSize() const return storage->getArenaDataSize(); } -uint64_t KeeperStateMachine::getLatestSnapshotBufSize() const +uint64_t KeeperStateMachine::getLatestSnapshotSize() const { - std::lock_guard lock(snapshots_lock); - if (latest_snapshot_buf) - return latest_snapshot_buf->size(); - return 0; + auto snapshot_info = [&] + { + std::lock_guard lock(snapshots_lock); + return latest_snapshot_info; + }(); + + if (snapshot_info == nullptr || snapshot_info->disk == nullptr) + return 0; + + /// there is a possibility multiple threads can try to get size + /// this can happen in rare cases while it's not a heavy operation + size_t size = snapshot_info->size.load(std::memory_order_relaxed); + if (size == 0) + { + size = snapshot_info->disk->getFileSize(snapshot_info->path); + snapshot_info->size.store(size, std::memory_order_relaxed); + } + + return size; } ClusterConfigPtr KeeperStateMachine::getClusterConfig() const diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 3727beadb98..ee6109f0a17 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -124,7 +124,7 @@ public: uint64_t getTotalEphemeralNodesCount() const; uint64_t getApproximateDataSize() const; uint64_t getKeyArenaSize() const; - uint64_t getLatestSnapshotBufSize() const; + uint64_t getLatestSnapshotSize() const; void recalculateStorageStats(); @@ -135,7 +135,7 @@ private: /// In our state machine we always have a single snapshot which is stored /// in memory in compressed (serialized) format. SnapshotMetadataPtr latest_snapshot_meta = nullptr; - SnapshotFileInfo latest_snapshot_info; + std::shared_ptr latest_snapshot_info; nuraft::ptr latest_snapshot_buf = nullptr; /// Main state machine logic diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp index 4d7ab63f966..ea46927344a 100644 --- a/src/DataTypes/DataTypeMap.cpp +++ b/src/DataTypes/DataTypeMap.cpp @@ -66,11 +66,8 @@ DataTypeMap::DataTypeMap(const DataTypePtr & key_type_, const DataTypePtr & valu void DataTypeMap::assertKeyType() const { - if (!checkKeyType(key_type)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Type of Map key must be a type, that can be represented by integer " - "or String or FixedString (possibly LowCardinality) or UUID or IPv6," - " but {} given", key_type->getName()); + if (!isValidKeyType(key_type)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Map cannot have a key of type {}", key_type->getName()); } @@ -116,7 +113,7 @@ bool DataTypeMap::equals(const IDataType & rhs) const return nested->equals(*rhs_map.nested); } -bool DataTypeMap::checkKeyType(DataTypePtr key_type) +bool DataTypeMap::isValidKeyType(DataTypePtr key_type) { return !isNullableOrLowCardinalityNullable(key_type); } diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h index 4866c3e78cc..c506591ba79 100644 --- a/src/DataTypes/DataTypeMap.h +++ b/src/DataTypes/DataTypeMap.h @@ -52,7 +52,7 @@ public: SerializationPtr doGetDefaultSerialization() const override; - static bool checkKeyType(DataTypePtr key_type); + static bool isValidKeyType(DataTypePtr key_type); void forEachChild(const ChildCallback & callback) const override; diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 02c0aa6dd77..6cbcae2bebe 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -1222,7 +1222,7 @@ namespace return nullptr; auto key_type = removeNullable(key_types.back()); - if (!DataTypeMap::checkKeyType(key_type)) + if (!DataTypeMap::isValidKeyType(key_type)) return nullptr; return std::make_shared(key_type, value_types.back()); diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp index bad8cb18525..7a3fdf5160e 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp @@ -325,7 +325,7 @@ void SLRUFileCachePriority::downgrade(IteratorPtr iterator, const CachePriorityG candidate_it->getEntry()->toString()); } - const size_t entry_size = candidate_it->entry->size; + const size_t entry_size = candidate_it->getEntry()->size; if (!probationary_queue.canFit(entry_size, 1, lock)) { throw Exception(ErrorCodes::LOGICAL_ERROR, @@ -483,7 +483,10 @@ SLRUFileCachePriority::SLRUIterator::SLRUIterator( SLRUFileCachePriority::EntryPtr SLRUFileCachePriority::SLRUIterator::getEntry() const { - return entry; + auto entry_ptr = entry.lock(); + if (!entry_ptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry pointer expired"); + return entry_ptr; } size_t SLRUFileCachePriority::SLRUIterator::increasePriority(const CachePriorityGuard::Lock & lock) diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h index ee3cafe322d..2102a0ec558 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.h +++ b/src/Interpreters/Cache/SLRUFileCachePriority.h @@ -125,7 +125,10 @@ private: SLRUFileCachePriority * cache_priority; LRUFileCachePriority::LRUIterator lru_iterator; - const EntryPtr entry; + /// Entry itself is stored by lru_iterator.entry. + /// We have it as a separate field to use entry without requiring any lock + /// (which will be required if we wanted to get entry from lru_iterator.getEntry()). + const std::weak_ptr entry; /// Atomic, /// but needed only in order to do FileSegment::getInfo() without any lock, /// which is done for system tables and logging. diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index d3803c1d2ad..7a13f072e4a 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1837,8 +1837,10 @@ bool ReadFromMergeTree::requestOutputEachPartitionThroughSeparatePort() ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() const { - auto result_ptr = analyzed_result_ptr ? analyzed_result_ptr : selectRangesToRead(); - return *result_ptr; + if (!analyzed_result_ptr) + analyzed_result_ptr = selectRangesToRead(); + + return *analyzed_result_ptr; } bool ReadFromMergeTree::isQueryWithSampling() const diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index b2f952d58e1..b541481fa62 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -276,7 +276,7 @@ private: Pipe spreadMarkRangesAmongStreamsFinal( RangesInDataParts && parts, size_t num_streams, const Names & origin_column_names, const Names & column_names, ActionsDAGPtr & out_projection); - AnalysisResultPtr analyzed_result_ptr; + mutable AnalysisResultPtr analyzed_result_ptr; VirtualFields shared_virtual_fields; bool is_parallel_reading_from_replicas; diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 5faa8d4b48b..378a1944396 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -229,7 +229,7 @@ bool DataPartStorageOnDiskBase::isBroken() const bool DataPartStorageOnDiskBase::isReadonly() const { - return volume->getDisk()->isReadOnly(); + return volume->getDisk()->isReadOnly() || volume->getDisk()->isWriteOnce(); } void DataPartStorageOnDiskBase::syncRevision(UInt64 revision) const diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/S3Queue/S3QueueTableMetadata.h index d2ebe8382e4..d53b60570ae 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.h +++ b/src/Storages/S3Queue/S3QueueTableMetadata.h @@ -38,7 +38,7 @@ struct S3QueueTableMetadata String toString() const; void checkEquals(const S3QueueTableMetadata & from_zk) const; - static void checkEquals(const S3QueueSettings & lhs, const S3QueueSettings & rhs); + static void checkEquals(const S3QueueSettings & current, const S3QueueSettings & expected); private: void checkImmutableFieldsEquals(const S3QueueTableMetadata & from_zk) const; diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 9ce8923f58f..56a84272a63 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -701,7 +701,6 @@ class CiCache: len(self.jobs_to_wait) > MAX_JOB_NUM_TO_WAIT and round_cnt < MAX_ROUNDS_TO_WAIT ): - await_finished: Set[str] = set() round_cnt += 1 GHActions.print_in_group( f"Wait pending jobs, round [{round_cnt}/{MAX_ROUNDS_TO_WAIT}]:", @@ -713,6 +712,7 @@ class CiCache: expired_sec = 0 start_at = int(time.time()) while expired_sec < TIMEOUT and self.jobs_to_wait: + await_finished: Set[str] = set() time.sleep(poll_interval_sec) self.update() for job_name, job_config in self.jobs_to_wait.items(): @@ -759,11 +759,6 @@ class CiCache: print( f"...awaiting continues... seconds left [{TIMEOUT - expired_sec}]" ) - if await_finished: - GHActions.print_in_group( - f"Finished jobs, round [{round_cnt}]: [{list(await_finished)}]", - list(await_finished), - ) GHActions.print_in_group( "Remaining jobs:", diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index cb40f7899a4..ca46088eb45 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -593,8 +593,6 @@ class CIConfig: stage_type = CIStages.BUILDS_2 elif self.is_docs_job(job_name): stage_type = CIStages.TESTS_1 - elif job_name == JobNames.BUILD_CHECK_SPECIAL: - stage_type = CIStages.TESTS_2 elif self.is_test_job(job_name): if job_name in CI_CONFIG.test_configs: required_build = CI_CONFIG.test_configs[job_name].required_build @@ -854,6 +852,14 @@ class CIConfig: f"The requirement '{test_config}' for " f"'{test_name}' is not found in builds" ) + if ( + test_config.required_build + and test_config.required_build + not in self.builds_report_config[JobNames.BUILD_CHECK].builds + ): + errors.append( + f"Test job' required build must be from [{JobNames.BUILD_CHECK}] list" + ) if errors: raise KeyError("config contains errors", errors) @@ -1068,6 +1074,8 @@ CI_CONFIG = CIConfig( Build.PACKAGE_MSAN, Build.PACKAGE_DEBUG, Build.BINARY_RELEASE, + Build.PACKAGE_RELEASE_COVERAGE, + Build.FUZZERS, ] ), JobNames.BUILD_CHECK_SPECIAL: BuildReportConfig( @@ -1084,8 +1092,6 @@ CI_CONFIG = CIConfig( Build.BINARY_LOONGARCH64, Build.BINARY_AMD64_COMPAT, Build.BINARY_AMD64_MUSL, - Build.PACKAGE_RELEASE_COVERAGE, - Build.FUZZERS, ] ), }, diff --git a/tests/integration/test_keeper_snapshots/test.py b/tests/integration/test_keeper_snapshots/test.py index f6f746c892e..6dfb2078559 100644 --- a/tests/integration/test_keeper_snapshots/test.py +++ b/tests/integration/test_keeper_snapshots/test.py @@ -17,7 +17,6 @@ node = cluster.add_instance( "node", main_configs=["configs/enable_keeper.xml"], stay_alive=True, - with_zookeeper=True, ) @@ -211,3 +210,46 @@ def test_invalid_snapshot(started_cluster): node_zk.close() except: pass + + +def test_snapshot_size(started_cluster): + keeper_utils.wait_until_connected(started_cluster, node) + node_zk = None + try: + node_zk = get_connection_zk("node") + + node_zk.create("/test_state_size", b"somevalue") + strs = [] + for i in range(100): + strs.append(random_string(123).encode()) + node_zk.create("/test_state_size/node" + str(i), strs[i]) + + node_zk.stop() + node_zk.close() + + keeper_utils.send_4lw_cmd(started_cluster, node, "csnp") + node.wait_for_log_line("Created persistent snapshot") + + def get_snapshot_size(): + return int( + next( + filter( + lambda line: "zk_latest_snapshot_size" in line, + keeper_utils.send_4lw_cmd(started_cluster, node, "mntr").split( + "\n" + ), + ) + ).split("\t")[1] + ) + + assert get_snapshot_size() != 0 + restart_clickhouse() + assert get_snapshot_size() != 0 + finally: + try: + if node_zk is not None: + node_zk.stop() + node_zk.close() + + except: + pass diff --git a/tests/integration/test_unknown_column_dist_table_with_alias/__init__.py b/tests/integration/test_unknown_column_dist_table_with_alias/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_unknown_column_dist_table_with_alias/configs/clusters.xml b/tests/integration/test_unknown_column_dist_table_with_alias/configs/clusters.xml new file mode 100644 index 00000000000..754d765f23f --- /dev/null +++ b/tests/integration/test_unknown_column_dist_table_with_alias/configs/clusters.xml @@ -0,0 +1,12 @@ + + + + + + localhost + 9000 + + + + + diff --git a/tests/integration/test_unknown_column_dist_table_with_alias/test.py b/tests/integration/test_unknown_column_dist_table_with_alias/test.py new file mode 100644 index 00000000000..0d3890f3e09 --- /dev/null +++ b/tests/integration/test_unknown_column_dist_table_with_alias/test.py @@ -0,0 +1,35 @@ +import pytest +from helpers.cluster import ClickHouseCluster +import logging + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node", main_configs=["configs/clusters.xml"]) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +@pytest.mark.parametrize("prefer_localhost_replica", [0, 1]) +def test_distributed_table_with_alias(start_cluster, prefer_localhost_replica): + node.query( + """ + DROP TABLE IF EXISTS local; + DROP TABLE IF EXISTS dist; + CREATE TABLE local(`dummy` UInt8) ENGINE = MergeTree ORDER BY tuple(); + CREATE TABLE dist AS local ENGINE = Distributed(localhost_cluster, currentDatabase(), local); + """ + ) + + node.query( + "WITH 'Hello' AS `alias` SELECT `alias` FROM dist GROUP BY `alias`;", + settings={"prefer_localhost_replica": prefer_localhost_replica}, + ) diff --git a/tests/queries/0_stateless/00825_protobuf_format_map.sh b/tests/queries/0_stateless/00825_protobuf_format_map.sh index 81d1cf2e305..82ebcc6959a 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_map.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_map.sh @@ -10,8 +10,6 @@ set -eo pipefail # Run the client. $CLICKHOUSE_CLIENT --multiquery < k = array(1,2), map(array(1,2), 4, array(3,4), 5)); SELECT mapExists((k, v) -> k = map(1,2), map(map(1,2), 4, map(3,4), 5)); SELECT mapExists((k, v) -> k = tuple(1,2), map(tuple(1,2), 4, tuple(3,4), 5)); +SELECT mapAll((k, v) -> k = 0.1::Float32, map(0.1::Float32, 4, 0.2::Float32, 5)); +SELECT mapAll((k, v) -> k = 0.1::Float64, map(0.1::Float64, 4, 0.2::Float64, 5)); +SELECT mapAll((k, v) -> k = array(1,2), map(array(1,2), 4, array(3,4), 5)); +SELECT mapAll((k, v) -> k = map(1,2), map(map(1,2), 4, map(3,4), 5)); +SELECT mapAll((k, v) -> k = tuple(1,2), map(tuple(1,2), 4, tuple(3,4), 5)); + SELECT mapSort((k, v) -> k, map(0.1::Float32, 4, 0.2::Float32, 5)); SELECT mapSort((k, v) -> k, map(0.1::Float64, 4, 0.2::Float64, 5)); SELECT mapSort((k, v) -> k, map(array(1,2), 4, array(3,4), 5)); @@ -41,6 +47,9 @@ SELECT mapConcat(map(tuple(1,2), 4), map(tuple(3,4), 5)); SELECT mapExists((k, v) -> k LIKE '%3', col) FROM table_map ORDER BY id; SELECT mapExists((k, v) -> k LIKE '%2' AND v < 1000, col) FROM table_map ORDER BY id; +SELECT mapAll((k, v) -> k LIKE '%3', col) FROM table_map ORDER BY id; +SELECT mapAll((k, v) -> k LIKE '%2' AND v < 1000, col) FROM table_map ORDER BY id; + SELECT mapSort(col) FROM table_map ORDER BY id; SELECT mapSort((k, v) -> v, col) FROM table_map ORDER BY id; SELECT mapPartialSort((k, v) -> k, 2, col) FROM table_map ORDER BY id; @@ -50,6 +59,8 @@ SELECT mapApply((x, y) -> (x, x + 1), map(1, 0, 2, 0)); SELECT mapApply((x, y) -> (x, x + 1), materialize(map(1, 0, 2, 0))); SELECT mapApply((x, y) -> ('x', 'y'), map(1, 0, 2, 0)); SELECT mapApply((x, y) -> ('x', 'y'), materialize(map(1, 0, 2, 0))); +SELECT mapApply((x, y) -> (x, x + 1), map(1.0, 0, 2.0, 0)); +SELECT mapApply((x, y) -> (x, x + 1), materialize(map(1.0, 0, 2.0, 0))); SELECT mapUpdate(map('k1', 1, 'k2', 2), map('k1', 11, 'k2', 22)); SELECT mapUpdate(materialize(map('k1', 1, 'k2', 2)), map('k1', 11, 'k2', 22)); diff --git a/tests/queries/0_stateless/02524_fuzz_and_fuss_2.sql b/tests/queries/0_stateless/02524_fuzz_and_fuss_2.sql index 9988eef0ad3..7b49378d4da 100644 --- a/tests/queries/0_stateless/02524_fuzz_and_fuss_2.sql +++ b/tests/queries/0_stateless/02524_fuzz_and_fuss_2.sql @@ -9,6 +9,6 @@ ENGINE = Memory; INSERT INTO data_a_02187 SELECT * FROM system.one -SETTINGS max_block_size = '1', min_insert_block_size_rows = '65536', min_insert_block_size_bytes = '0', max_insert_threads = '0', max_threads = '3', receive_timeout = '10', receive_data_timeout_ms = '10000', connections_with_failover_max_tries = '0', extremes = '1', use_uncompressed_cache = '0', optimize_move_to_prewhere = '1', optimize_move_to_prewhere_if_final = '0', replication_alter_partitions_sync = '2', totals_mode = 'before_having', allow_suspicious_low_cardinality_types = '1', compile_expressions = '1', min_count_to_compile_expression = '0', group_by_two_level_threshold = '100', distributed_aggregation_memory_efficient = '0', distributed_group_by_no_merge = '1', optimize_distributed_group_by_sharding_key = '1', optimize_skip_unused_shards = '1', optimize_skip_unused_shards_rewrite_in = '1', force_optimize_skip_unused_shards = '2', optimize_skip_unused_shards_nesting = '1', force_optimize_skip_unused_shards_nesting = '2', merge_tree_min_rows_for_concurrent_read = '10000', force_primary_key = '1', network_compression_method = 'ZSTD', network_zstd_compression_level = '7', log_queries = '0', log_queries_min_type = 'QUERY_FINISH', distributed_product_mode = 'local', insert_quorum = '2', insert_quorum_timeout = '0', insert_quorum_parallel = '0', select_sequential_consistency = '1', join_use_nulls = '1', any_join_distinct_right_table_keys = '1', preferred_max_column_in_block_size_bytes = '32', distributed_foreground_insert = '1', insert_allow_materialized_columns = '1', use_index_for_in_with_subqueries = '1', joined_subquery_requires_alias = '0', empty_result_for_aggregation_by_empty_set = '1', allow_suspicious_codecs = '1', query_profiler_real_time_period_ns = '0', query_profiler_cpu_time_period_ns = '0', opentelemetry_start_trace_probability = '1', max_rows_to_read = '1000000', read_overflow_mode = 'break', max_rows_to_group_by = '10', group_by_overflow_mode = 'any', max_rows_to_sort = '100', sort_overflow_mode = 'break', max_result_rows = '10', max_execution_time = '3', max_execution_speed = '1', max_bytes_in_join = '100', join_algorithm = 'partial_merge', max_memory_usage = '1099511627776', log_query_threads = '1', send_logs_level = 'fatal', enable_optimize_predicate_expression = '1', prefer_localhost_replica = '1', optimize_read_in_order = '1', optimize_aggregation_in_order = '1', read_in_order_two_level_merge_threshold = '1', allow_introspection_functions = '1', check_query_single_value_result = '1', allow_experimental_live_view = '1', default_table_engine = 'Memory', mutations_sync = '2', convert_query_to_cnf = '0', optimize_arithmetic_operations_in_aggregate_functions = '1', optimize_duplicate_order_by_and_distinct = '0', optimize_multiif_to_if = '0', optimize_functions_to_subcolumns = '1', optimize_using_constraints = '1', optimize_substitute_columns = '1', optimize_append_index = '1', transform_null_in = '1', data_type_default_nullable = '1', cast_keep_nullable = '1', cast_ipv4_ipv6_default_on_conversion_error = '0', system_events_show_zero_values = '1', enable_global_with_statement = '1', optimize_on_insert = '0', optimize_rewrite_sum_if_to_count_if = '1', distributed_ddl_output_mode = 'throw', union_default_mode = 'ALL', optimize_aggregators_of_group_by_keys = '1', optimize_group_by_function_keys = '1', short_circuit_function_evaluation = 'enable', async_insert = '1', enable_filesystem_cache = '0', allow_deprecated_database_ordinary = '1', allow_deprecated_syntax_for_merge_tree = '1', allow_experimental_nlp_functions = '1', allow_experimental_object_type = '1', allow_experimental_map_type = '1', optimize_use_projections = '1', input_format_null_as_default = '1', input_format_ipv4_default_on_conversion_error = '0', input_format_ipv6_default_on_conversion_error = '0', output_format_json_named_tuples_as_objects = '1', output_format_write_statistics = '0', output_format_pretty_row_numbers = '1'; +SETTINGS max_block_size = '1', min_insert_block_size_rows = '65536', min_insert_block_size_bytes = '0', max_insert_threads = '0', max_threads = '3', receive_timeout = '10', receive_data_timeout_ms = '10000', connections_with_failover_max_tries = '0', extremes = '1', use_uncompressed_cache = '0', optimize_move_to_prewhere = '1', optimize_move_to_prewhere_if_final = '0', replication_alter_partitions_sync = '2', totals_mode = 'before_having', allow_suspicious_low_cardinality_types = '1', compile_expressions = '1', min_count_to_compile_expression = '0', group_by_two_level_threshold = '100', distributed_aggregation_memory_efficient = '0', distributed_group_by_no_merge = '1', optimize_distributed_group_by_sharding_key = '1', optimize_skip_unused_shards = '1', optimize_skip_unused_shards_rewrite_in = '1', force_optimize_skip_unused_shards = '2', optimize_skip_unused_shards_nesting = '1', force_optimize_skip_unused_shards_nesting = '2', merge_tree_min_rows_for_concurrent_read = '10000', force_primary_key = '1', network_compression_method = 'ZSTD', network_zstd_compression_level = '7', log_queries = '0', log_queries_min_type = 'QUERY_FINISH', distributed_product_mode = 'local', insert_quorum = '2', insert_quorum_timeout = '0', insert_quorum_parallel = '0', select_sequential_consistency = '1', join_use_nulls = '1', any_join_distinct_right_table_keys = '1', preferred_max_column_in_block_size_bytes = '32', distributed_foreground_insert = '1', insert_allow_materialized_columns = '1', use_index_for_in_with_subqueries = '1', joined_subquery_requires_alias = '0', empty_result_for_aggregation_by_empty_set = '1', allow_suspicious_codecs = '1', query_profiler_real_time_period_ns = '0', query_profiler_cpu_time_period_ns = '0', opentelemetry_start_trace_probability = '1', max_rows_to_read = '1000000', read_overflow_mode = 'break', max_rows_to_group_by = '10', group_by_overflow_mode = 'any', max_rows_to_sort = '100', sort_overflow_mode = 'break', max_result_rows = '10', max_execution_time = '3', max_execution_speed = '1', max_bytes_in_join = '100', join_algorithm = 'partial_merge', max_memory_usage = '1099511627776', log_query_threads = '1', send_logs_level = 'fatal', enable_optimize_predicate_expression = '1', prefer_localhost_replica = '1', optimize_read_in_order = '1', optimize_aggregation_in_order = '1', read_in_order_two_level_merge_threshold = '1', allow_introspection_functions = '1', check_query_single_value_result = '1', allow_experimental_live_view = '1', default_table_engine = 'Memory', mutations_sync = '2', convert_query_to_cnf = '0', optimize_arithmetic_operations_in_aggregate_functions = '1', optimize_duplicate_order_by_and_distinct = '0', optimize_multiif_to_if = '0', optimize_functions_to_subcolumns = '1', optimize_using_constraints = '1', optimize_substitute_columns = '1', optimize_append_index = '1', transform_null_in = '1', data_type_default_nullable = '1', cast_keep_nullable = '1', cast_ipv4_ipv6_default_on_conversion_error = '0', system_events_show_zero_values = '1', enable_global_with_statement = '1', optimize_on_insert = '0', optimize_rewrite_sum_if_to_count_if = '1', distributed_ddl_output_mode = 'throw', union_default_mode = 'ALL', optimize_aggregators_of_group_by_keys = '1', optimize_group_by_function_keys = '1', short_circuit_function_evaluation = 'enable', async_insert = '1', enable_filesystem_cache = '0', allow_deprecated_database_ordinary = '1', allow_deprecated_syntax_for_merge_tree = '1', allow_experimental_nlp_functions = '1', allow_experimental_object_type = '1', optimize_use_projections = '1', input_format_null_as_default = '1', input_format_ipv4_default_on_conversion_error = '0', input_format_ipv6_default_on_conversion_error = '0', output_format_json_named_tuples_as_objects = '1', output_format_write_statistics = '0', output_format_pretty_row_numbers = '1'; DROP TABLE data_a_02187; diff --git a/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.reference b/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.reference new file mode 100644 index 00000000000..9dc0605fd5a --- /dev/null +++ b/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.reference @@ -0,0 +1,8 @@ +Map(Nothing, ...) is non-comparable --> not usable as primary key +But Map(Nothing, ...) can be a non-primary-key, it is quite useless though ... +Map(Float32, ...) and Map(LC(String)) are okay as primary key +{1:'a'} {'b':'b'} +{2:'aa'} {'bb':'bb'} +Map(Float32, ...) and Map(LC(String)) as non-primary-key +{1:'a'} {'b':'b'} +{3:'aaa'} {'bb':'bb'} diff --git a/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.sql b/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.sql new file mode 100644 index 00000000000..a3cd59df1cd --- /dev/null +++ b/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.sql @@ -0,0 +1,33 @@ +-- Tests maps with "unusual" key types (Float32, Nothing, LowCardinality(String)) + +SET mutations_sync = 2; + +DROP TABLE IF EXISTS tab; + +SELECT 'Map(Nothing, ...) is non-comparable --> not usable as primary key'; +CREATE TABLE tab (m1 Map(Nothing, String)) ENGINE = MergeTree ORDER BY m1; -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY } + +SELECT 'But Map(Nothing, ...) can be a non-primary-key, it is quite useless though ...'; +CREATE TABLE tab (m3 Map(Nothing, String)) ENGINE = MergeTree ORDER BY tuple(); +-- INSERT INTO tab VALUES (map('', 'd')); -- { serverError NOT_IMPLEMENTED } -- The client can't serialize the data and fails. The query + -- doesn't reach the server and we can't check via 'serverError' :-/ +DROP TABLE tab; + +SELECT 'Map(Float32, ...) and Map(LC(String)) are okay as primary key'; +CREATE TABLE tab (m1 Map(Float32, String), m2 Map(LowCardinality(String), String)) ENGINE = MergeTree ORDER BY (m1, m2); +INSERT INTO tab VALUES (map(1.0, 'a'), map('b', 'b')); +INSERT INTO tab VALUES (map(2.0, 'aa'), map('bb', 'bb')); + +-- Test merge +OPTIMIZE TABLE tab FINAL; +SELECT * FROM tab ORDER BY m1, m2; + +DROP TABLE tab; + +SELECT 'Map(Float32, ...) and Map(LC(String)) as non-primary-key'; +CREATE TABLE tab (m1 Map(Float32, String), m2 Map(LowCardinality(String), String)) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO tab VALUES (map(1.0, 'a'), map('b', 'b')), (map(2.0, 'aa'), map('bb', 'bb')); +ALTER TABLE tab UPDATE m1 = map(3.0, 'aaa') WHERE m1 = map(2.0, 'aa'); +SELECT * FROM tab ORDER BY m1, m2; + +DROP TABLE tab; diff --git a/utils/security-generator/generate_security.py b/utils/security-generator/generate_security.py index 2b37e28257a..21c6b72e476 100755 --- a/utils/security-generator/generate_security.py +++ b/utils/security-generator/generate_security.py @@ -98,7 +98,7 @@ def generate_supported_versions() -> str: lts.append(version) to_append = f"| {version} | ✔️ |" if to_append: - if len(regular) == max_regular or len(lts) == max_lts: + if len(regular) == max_regular and len(lts) == max_lts: supported_year = year table.append(to_append) continue