Bloom filter map added support for has function

2024-11-22 07:31:57 +00:00 · 2021-09-23 01:10:14 +03:00 · 2021-09-23 01:10:14 +03:00 · d5a7e92b97
commit d5a7e92b97
parent 1bf375e2b7
4 changed files with 37 additions and 17 deletions
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -100,8 +100,8 @@ For a description of parameters, see the [CREATE query description](../../../sql
    -   `min_merge_bytes_to_use_direct_io` — The minimum data volume for merge operation that is required for using direct I/O access to the storage disk. When merging data parts, ClickHouse calculates the total storage volume of all the data to be merged. If the volume exceeds `min_merge_bytes_to_use_direct_io` bytes, ClickHouse reads and writes the data to the storage disk using the direct I/O interface (`O_DIRECT` option). If `min_merge_bytes_to_use_direct_io = 0`, then direct I/O is disabled. Default value: `10 * 1024 * 1024 * 1024` bytes.
        <a name="mergetree_setting-merge_with_ttl_timeout"></a>
    -   `merge_with_ttl_timeout` — Minimum delay in seconds before repeating a merge with delete TTL. Default value: `14400` seconds (4 hours).
-    -   `merge_with_recompression_ttl_timeout` — Minimum delay in seconds before repeating a merge with recompression TTL. Default value: `14400` seconds (4 hours).    
-    -   `try_fetch_recompressed_part_timeout` — Timeout (in seconds) before starting merge with recompression. During this time ClickHouse tries to fetch recompressed part from replica which assigned this merge with recompression. Default value: `7200` seconds (2 hours).    
+    -   `merge_with_recompression_ttl_timeout` — Minimum delay in seconds before repeating a merge with recompression TTL. Default value: `14400` seconds (4 hours).
+    -   `try_fetch_recompressed_part_timeout` — Timeout (in seconds) before starting merge with recompression. During this time ClickHouse tries to fetch recompressed part from replica which assigned this merge with recompression. Default value: `7200` seconds (2 hours).
    -   `write_final_mark` — Enables or disables writing the final index mark at the end of data part (after the last byte). Default value: 1. Don’t turn it off.
    -   `merge_max_block_size` — Maximum number of rows in block for merge operations. Default value: 8192.
    -   `storage_policy` — Storage policy. See [Using Multiple Block Devices for Data Storage](#table_engine-mergetree-multiple-volumes).
@ -335,7 +335,16 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234

    The optional `false_positive` parameter is the probability of receiving a false positive response from the filter. Possible values: (0, 1). Default value: 0.025.

-    Supported data types: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`, `UUID`.
+    Supported data types: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`, `UUID`, `Map`.
+
+    For `Map` data type client can specify if index should be created for keys or values using [mapKeys](../../../sql-reference/functions/tuple-map-functions.md#mapkeys) or [mapValues](../../../sql-reference/functions/tuple-map-functions.md#mapvalues)  function.
+
+    Example of index creation for `Map` data type
+
+```
+INDEX map_key_index mapKeys(map_column) TYPE bloom_filter GRANULARITY 1
+INDEX map_key_index mapValues(map_column) TYPE bloom_filter GRANULARITY 1
+```

    The following functions can use it: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md).

@ -398,7 +407,7 @@ Projections are an experimental feature. To enable them you must set the [allow_
 Projections are not supported in the `SELECT` statements with the [FINAL](../../../sql-reference/statements/select/from.md#select-from-final) modifier.

 ### Projection Query {#projection-query}
-A projection query is what defines a projection. It implicitly selects data from the parent table. 
+A projection query is what defines a projection. It implicitly selects data from the parent table.
 **Syntax**

 ```sql
@ -548,7 +557,7 @@ ORDER BY d
 TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1;
 ```

-Creating a table, where expired rows are recompressed: 
+Creating a table, where expired rows are recompressed:

 ```sql
 CREATE TABLE table_for_recompression
--- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
@ -577,7 +577,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
        return true;
    }

-    if (function_name == "mapContains")
+    if (function_name == "mapContains" || function_name == "has")
    {
        const auto & col_name = assert_cast<ASTIdentifier *>(key_ast.get())->name();
        auto map_keys_index_column_name = fmt::format("mapKeys({})", col_name);
--- a/tests/queries/0_stateless/2021_map_bloom_filter_index.reference
+++ b/tests/queries/0_stateless/2021_map_bloom_filter_index.reference
@ -23,6 +23,14 @@ NOT IN with non existing key
 0	{'K0':'V0'}
 1	{'K1':'V1'}
 NOT IN with non existing key and default value
+MapContains with existing key
+0	{'K0':'V0'}
+MapContains with non existing key
+MapContains with non existing key and default value
+Has with existing key
+0	{'K0':'V0'}
+Has with non existing key
+Has with non existing key and default value
 Map bloom filter mapValues
 IN with existing key
 0	{'K0':'V0'}
@ -36,7 +44,3 @@ NOT IN with non existing key
 0	{'K0':'V0'}
 1	{'K1':'V1'}
 NOT IN with non existing key and default value
-MapContains with existing key
-0	{'K0':'V0'}
-MapContains with non existing key
-MapContains with non existing key and default value
--- a/tests/queries/0_stateless/2021_map_bloom_filter_index.sql
+++ b/tests/queries/0_stateless/2021_map_bloom_filter_index.sql
@ -36,6 +36,20 @@ SELECT * FROM map_test_index_map_keys WHERE map['K2'] NOT IN 'V2';
 SELECT 'NOT IN with non existing key and default value';
 SELECT * FROM map_test_index_map_keys WHERE map['K3'] NOT IN '';

+SELECT 'MapContains with existing key';
+SELECT * FROM map_test_index_map_keys WHERE mapContains(map, 'K0');
+SELECT 'MapContains with non existing key';
+SELECT * FROM map_test_index_map_keys WHERE mapContains(map, 'K2');
+SELECT 'MapContains with non existing key and default value';
+SELECT * FROM map_test_index_map_keys WHERE mapContains(map, '');
+
+SELECT 'Has with existing key';
+SELECT * FROM map_test_index_map_keys WHERE has(map, 'K0');
+SELECT 'Has with non existing key';
+SELECT * FROM map_test_index_map_keys WHERE has(map, 'K2');
+SELECT 'Has with non existing key and default value';
+SELECT * FROM map_test_index_map_keys WHERE has(map, '');
+
 DROP TABLE map_test_index_map_keys;

 DROP TABLE IF EXISTS map_test_index_map_values;
@ -63,11 +77,4 @@ SELECT * FROM map_test_index_map_values WHERE map['K2'] NOT IN 'V2';
 SELECT 'NOT IN with non existing key and default value';
 SELECT * FROM map_test_index_map_values WHERE map['K3'] NOT IN '';

-SELECT 'MapContains with existing key';
-SELECT * FROM map_test_index_map_values WHERE mapContains(map, 'K0');
-SELECT 'MapContains with non existing key';
-SELECT * FROM map_test_index_map_values WHERE mapContains(map, 'K2');
-SELECT 'MapContains with non existing key and default value';
-SELECT * FROM map_test_index_map_values WHERE mapContains(map, 'K3');
-
 DROP TABLE map_test_index_map_values;