mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Merge branch 'master' into utf8mb4-in-mysql-client
This commit is contained in:
commit
7b8589dbde
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -184,7 +184,7 @@
|
||||
url = https://github.com/ClickHouse-Extras/krb5
|
||||
[submodule "contrib/cyrus-sasl"]
|
||||
path = contrib/cyrus-sasl
|
||||
url = https://github.com/cyrusimap/cyrus-sasl
|
||||
url = https://github.com/ClickHouse-Extras/cyrus-sasl
|
||||
branch = cyrus-sasl-2.1
|
||||
[submodule "contrib/croaring"]
|
||||
path = contrib/croaring
|
||||
|
@ -84,10 +84,12 @@
|
||||
# define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined")))
|
||||
# define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address")))
|
||||
# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
|
||||
# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined")))
|
||||
#else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it.
|
||||
# define NO_SANITIZE_UNDEFINED
|
||||
# define NO_SANITIZE_ADDRESS
|
||||
# define NO_SANITIZE_THREAD
|
||||
# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED ALWAYS_INLINE
|
||||
#endif
|
||||
|
||||
/// A template function for suppressing warnings about unused variables or function results.
|
||||
|
@ -31,7 +31,7 @@ static void *volatile vdso_func = (void *)getcpu_init;
|
||||
int sched_getcpu(void)
|
||||
{
|
||||
int r;
|
||||
unsigned cpu;
|
||||
unsigned cpu = 0;
|
||||
|
||||
#ifdef VDSO_GETCPU_SYM
|
||||
getcpu_f f = (getcpu_f)vdso_func;
|
||||
|
@ -62,6 +62,7 @@ RUN python3 -m pip install \
|
||||
avro \
|
||||
cassandra-driver \
|
||||
confluent-kafka \
|
||||
dict2xml \
|
||||
dicttoxml \
|
||||
docker \
|
||||
docker-compose==1.22.0 \
|
||||
|
@ -53,10 +53,12 @@ function run_tests()
|
||||
if [ "$NUM_TRIES" -gt "1" ]; then
|
||||
ADDITIONAL_OPTIONS+=('--skip')
|
||||
ADDITIONAL_OPTIONS+=('00000_no_tests_to_skip')
|
||||
ADDITIONAL_OPTIONS+=('--jobs')
|
||||
ADDITIONAL_OPTIONS+=('4')
|
||||
fi
|
||||
|
||||
clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
|
||||
--test-runs "$NUM_TRIES" --jobs 4 \
|
||||
--test-runs "$NUM_TRIES" \
|
||||
"$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
|
||||
| ts '%Y-%m-%d %H:%M:%S' \
|
||||
| tee -a test_output/test_result.txt
|
||||
|
@ -7,8 +7,6 @@ toc_title: EmbeddedRocksDB
|
||||
|
||||
This engine allows integrating ClickHouse with [rocksdb](http://rocksdb.org/).
|
||||
|
||||
`EmbeddedRocksDB` lets you:
|
||||
|
||||
## Creating a Table {#table_engine-EmbeddedRocksDB-creating-a-table}
|
||||
|
||||
``` sql
|
||||
@ -23,6 +21,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
Required parameters:
|
||||
|
||||
- `primary_key_name` – any column name in the column list.
|
||||
- `primary key` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a `rocksdb key`.
|
||||
- columns other than the primary key will be serialized in binary as `rocksdb` value in corresponding order.
|
||||
- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from `rocksdb`.
|
||||
|
||||
Example:
|
||||
|
||||
@ -38,8 +39,4 @@ ENGINE = EmbeddedRocksDB
|
||||
PRIMARY KEY key
|
||||
```
|
||||
|
||||
## Description {#description}
|
||||
|
||||
- `primary key` must be specified, it only supports one column in primary key. The primary key will serialized in binary as rocksdb key.
|
||||
- columns other than the primary key will be serialized in binary as rocksdb value in corresponding order.
|
||||
- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from rocksdb.
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/embedded-rocksdb/) <!--hide-->
|
||||
|
@ -428,7 +428,7 @@ Possible values:
|
||||
|
||||
- `'basic'` — Use basic parser.
|
||||
|
||||
ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `'2019-08-20 10:18:56'` or `2019-08-20`.
|
||||
ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `2019-08-20 10:18:56` or `2019-08-20`.
|
||||
|
||||
Default value: `'basic'`.
|
||||
|
||||
@ -443,19 +443,19 @@ Allows choosing different output formats of the text representation of date and
|
||||
|
||||
Possible values:
|
||||
|
||||
- `'simple'` - Simple output format.
|
||||
- `simple` - Simple output format.
|
||||
|
||||
Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `'2019-08-20 10:18:56'`. Calculation is performed according to the data type's time zone (if present) or server time zone.
|
||||
Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `2019-08-20 10:18:56`. The calculation is performed according to the data type's time zone (if present) or server time zone.
|
||||
|
||||
- `'iso'` - ISO output format.
|
||||
- `iso` - ISO output format.
|
||||
|
||||
Clickhouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `'2019-08-20T10:18:56Z'`. Note that output is in UTC (`Z` means UTC).
|
||||
Clickhouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `2019-08-20T10:18:56Z`. Note that output is in UTC (`Z` means UTC).
|
||||
|
||||
- `'unix_timestamp'` - Unix timestamp output format.
|
||||
- `unix_timestamp` - Unix timestamp output format.
|
||||
|
||||
Clickhouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `'1566285536'`.
|
||||
Clickhouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `1566285536`.
|
||||
|
||||
Default value: `'simple'`.
|
||||
Default value: `simple`.
|
||||
|
||||
See also:
|
||||
|
||||
|
@ -241,7 +241,7 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN)
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `window` — Length of the sliding window in seconds.
|
||||
- `window` — Length of the sliding window. The unit of `window` depends on the timestamp itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`.
|
||||
- `mode` - It is an optional argument.
|
||||
- `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values.
|
||||
- `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it’s value can’t exceed the Int64 maximum, which is 2^63 - 1).
|
||||
|
@ -11,7 +11,7 @@ Key length depends on encryption mode. It is 16, 24, and 32 bytes long for `-128
|
||||
|
||||
Initialization vector length is always 16 bytes (bytes in excess of 16 are ignored).
|
||||
|
||||
Note that these functions work slowly.
|
||||
Note that these functions work slowly until ClickHouse 21.1.
|
||||
|
||||
## encrypt {#encrypt}
|
||||
|
||||
@ -41,7 +41,7 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Ciphered String. [String](../../sql-reference/data-types/string.md#string).
|
||||
- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Examples**
|
||||
|
||||
@ -52,57 +52,38 @@ Query:
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
`comment` String,
|
||||
`secret` String
|
||||
)
|
||||
ENGINE = Memory
|
||||
```
|
||||
|
||||
Insert this data:
|
||||
Insert some data (please avoid storing the keys/ivs in the database as this undermines the whole concept of encryption), also storing 'hints' is unsafe too and used only for illustrative purposes:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
INSERT INTO encryption_test VALUES('aes-256-cfb128 no IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212')),\
|
||||
('aes-256-cfb128 no IV, different key', encrypt('aes-256-cfb128', 'Secret', 'keykeykeykeykeykeykeykeykeykeyke')),\
|
||||
('aes-256-cfb128 with IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')),\
|
||||
('aes-256-cbc no IV', encrypt('aes-256-cbc', 'Secret', '12345678910121314151617181920212'));
|
||||
```
|
||||
|
||||
Example without `iv`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test;
|
||||
SELECT comment, hex(secret) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐
|
||||
│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │
|
||||
│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │
|
||||
│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │
|
||||
└─────────────┴──────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Example with `iv`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐
|
||||
│ aes-256-ctr │ │
|
||||
│ aes-256-ctr │ 7FB039F7 │
|
||||
│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │
|
||||
└─────────────┴───────────────────────────────────────────────┘
|
||||
┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
|
||||
│ aes-256-cfb128 no IV │ B4972BDC4459 │
|
||||
│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │
|
||||
│ aes-256-cfb128 with IV │ 5E6CB398F653 │
|
||||
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
|
||||
└─────────────────────────────────────┴──────────────────────────────────┘
|
||||
```
|
||||
|
||||
Example with `-gcm`:
|
||||
@ -110,40 +91,26 @@ Example with `-gcm`:
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
|
||||
INSERT INTO encryption_test VALUES('aes-256-gcm', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')), \
|
||||
('aes-256-gcm with AAD', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv', 'aad'));
|
||||
|
||||
SELECT comment, hex(secret) FROM encryption_test WHERE comment LIKE '%gcm%';
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐
|
||||
│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │
|
||||
│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │
|
||||
│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │
|
||||
└─────────────┴────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Example with `-gcm` mode and with `aad`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐
|
||||
│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │
|
||||
│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │
|
||||
│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │
|
||||
└─────────────┴────────────────────────────────────────────────────────────────────────┘
|
||||
┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
|
||||
│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
|
||||
│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
|
||||
└──────────────────────┴──────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## aes_encrypt_mysql {#aes_encrypt_mysql}
|
||||
|
||||
Compatible with mysql encryption and can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
|
||||
Compatible with mysql encryption and resulting ciphertext can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
|
||||
|
||||
Will produce same ciphertext as `encrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_encrypt_mysql` will stick to what MySQL's `aes_encrypt` does: 'fold' `key` and ignore excess bits of `IV`.
|
||||
|
||||
Supported encryption modes:
|
||||
|
||||
@ -156,7 +123,7 @@ Supported encryption modes:
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
``` sql
|
||||
aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
|
||||
```
|
||||
|
||||
@ -164,78 +131,98 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
|
||||
|
||||
- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Optinal, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Ciphered String. [String](../../sql-reference/data-types/string.md#string).
|
||||
- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
|
||||
**Examples**
|
||||
|
||||
Create this table:
|
||||
Given equal input `encrypt` and `aes_encrypt_mysql` produce the same ciphertext:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
SELECT encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') = aes_encrypt_mysql('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') AS ciphertexts_equal;
|
||||
```
|
||||
|
||||
Insert this data:
|
||||
Result:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
┌─ciphertexts_equal─┐
|
||||
│ 1 │
|
||||
└───────────────────┘
|
||||
```
|
||||
|
||||
Example without `iv`:
|
||||
|
||||
But `encrypt` fails when `key` or `iv` is longer than expected:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test;
|
||||
SELECT encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐
|
||||
│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │
|
||||
│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │
|
||||
│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │
|
||||
└─────────────┴──────────────────────────────────────────────────────────────────┘
|
||||
Received exception from server (version 21.1.2):
|
||||
Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123').
|
||||
```
|
||||
|
||||
Example with `iv`:
|
||||
While `aes_encrypt_mysql` produces MySQL-compatitalbe output:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test;
|
||||
SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123')) AS ciphertext;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─ciphertext───┐
|
||||
│ 24E9E4966469 │
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
Notice how supplying even longer `IV` produces the same result
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456')) AS ciphertext
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐
|
||||
│ aes-256-cfb128 │ │
|
||||
│ aes-256-cfb128 │ 7FB039F7 │
|
||||
│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │
|
||||
└────────────────┴────────────────────────────────────────────────────────────┘
|
||||
┌─ciphertext───┐
|
||||
│ 24E9E4966469 │
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
Which is binary equal to what MySQL produces on same inputs:
|
||||
|
||||
``` sql
|
||||
mysql> SET block_encryption_mode='aes-256-cfb128';
|
||||
Query OK, 0 rows affected (0.00 sec)
|
||||
|
||||
mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
|
||||
+------------------------+
|
||||
| ciphertext |
|
||||
+------------------------+
|
||||
| 0x24E9E4966469 |
|
||||
+------------------------+
|
||||
1 row in set (0.00 sec)
|
||||
```
|
||||
|
||||
## decrypt {#decrypt}
|
||||
|
||||
This function decrypts data using these modes:
|
||||
This function decrypts ciphertext into a plaintext using these modes:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
- aes-128-cbc, aes-192-cbc, aes-256-cbc
|
||||
@ -247,7 +234,7 @@ This function decrypts data using these modes:
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
``` sql
|
||||
decrypt('mode', 'ciphertext', 'key' [, iv, aad])
|
||||
```
|
||||
|
||||
@ -265,51 +252,56 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad])
|
||||
|
||||
**Examples**
|
||||
|
||||
Create this table:
|
||||
Re-using table from [encrypt](./encryption-functions.md#encrypt).
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
|
||||
Insert this data:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
|
||||
SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test;
|
||||
SELECT comment, hex(secret) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐
|
||||
│ aes-128-ecb │ │
|
||||
│ aes-128-ecb │ text │
|
||||
│ aes-128-ecb │ What Is ClickHouse? │
|
||||
└─────────────┴─────────────────────────────────────────────────────────────────────┘
|
||||
``` text
|
||||
┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
|
||||
│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
|
||||
│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
|
||||
└──────────────────────┴──────────────────────────────────────────────┘
|
||||
┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
|
||||
│ aes-256-cfb128 no IV │ B4972BDC4459 │
|
||||
│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │
|
||||
│ aes-256-cfb128 with IV │ 5E6CB398F653 │
|
||||
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
|
||||
└─────────────────────────────────────┴──────────────────────────────────┘
|
||||
```
|
||||
|
||||
Now let's try to decrypt all that data.
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920212') as plaintext FROM encryption_test
|
||||
```
|
||||
|
||||
Result:
|
||||
``` text
|
||||
┌─comment─────────────────────────────┬─plaintext─┐
|
||||
│ aes-256-cfb128 no IV │ Secret │
|
||||
│ aes-256-cfb128 no IV, different key │ <20>4<EFBFBD>
|
||||
<20> │
|
||||
│ aes-256-cfb128 with IV │ <20><><EFBFBD>6<EFBFBD>~ │
|
||||
│aes-256-cbc no IV │ <20>2*4<>h3c<33>4w<34><77>@
|
||||
└─────────────────────────────────────┴───────────┘
|
||||
```
|
||||
|
||||
Notice how only portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption.
|
||||
|
||||
## aes_decrypt_mysql {#aes_decrypt_mysql}
|
||||
|
||||
Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function.
|
||||
|
||||
Will produce same plaintext as `decrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_decrypt_mysql` will stick to what MySQL's `aes_decrypt` does: 'fold' `key` and ignore excess bits of `IV`.
|
||||
|
||||
Supported decryption modes:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
@ -321,7 +313,7 @@ Supported decryption modes:
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
``` sql
|
||||
aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
|
||||
```
|
||||
|
||||
@ -338,44 +330,30 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
|
||||
|
||||
**Examples**
|
||||
|
||||
Create this table:
|
||||
|
||||
Query:
|
||||
|
||||
Let's decrypt data we've previously encrypted with MySQL:
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
mysql> SET block_encryption_mode='aes-256-cfb128';
|
||||
Query OK, 0 rows affected (0.00 sec)
|
||||
|
||||
Insert this data:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
|
||||
+------------------------+
|
||||
| ciphertext |
|
||||
+------------------------+
|
||||
| 0x24E9E4966469 |
|
||||
+------------------------+
|
||||
1 row in set (0.00 sec)
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test;
|
||||
SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐
|
||||
│ aes-128-cbc │ │
|
||||
│ aes-128-cbc │ text │
|
||||
│ aes-128-cbc │ What Is ClickHouse? │
|
||||
└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘
|
||||
┌─plaintext─┐
|
||||
│ Secret │
|
||||
└───────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/encryption_functions/) <!--hide-->
|
||||
|
@ -0,0 +1,44 @@
|
||||
---
|
||||
toc_priority: 6
|
||||
toc_title: EmbeddedRocksDB
|
||||
---
|
||||
|
||||
# Движок EmbeddedRocksDB {#EmbeddedRocksDB-engine}
|
||||
|
||||
Этот движок позволяет интегрировать ClickHouse с [rocksdb](http://rocksdb.org/).
|
||||
|
||||
## Создание таблицы {#table_engine-EmbeddedRocksDB-creating-a-table}
|
||||
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
|
||||
...
|
||||
) ENGINE = EmbeddedRocksDB
|
||||
PRIMARY KEY(primary_key_name);
|
||||
```
|
||||
|
||||
Обязательные параметры:
|
||||
|
||||
- `primary_key_name` может быть любое имя столбца из списка столбцов.
|
||||
- Указание первичного ключа `primary key` является обязательным. Он будет сериализован в двоичном формате как ключ `rocksdb`.
|
||||
- Поддерживается только один столбец в первичном ключе.
|
||||
- Столбцы, которые отличаются от первичного ключа, будут сериализованы в двоичном формате как значение `rockdb` в соответствующем порядке.
|
||||
- Запросы с фильтрацией по ключу `equals` или `in` оптимизируются для поиска по нескольким ключам из `rocksdb`.
|
||||
|
||||
Пример:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE test
|
||||
(
|
||||
`key` String,
|
||||
`v1` UInt32,
|
||||
`v2` String,
|
||||
`v3` Float32,
|
||||
)
|
||||
ENGINE = EmbeddedRocksDB
|
||||
PRIMARY KEY key;
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/embedded-rocksdb/) <!--hide-->
|
@ -406,21 +406,46 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- `'best_effort'` — включает расширенный парсинг.
|
||||
- `best_effort` — включает расширенный парсинг.
|
||||
|
||||
ClickHouse может парсить базовый формат `YYYY-MM-DD HH:MM:SS` и все форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). Например, `'2018-06-08T01:02:03.000Z'`.
|
||||
ClickHouse может парсить базовый формат `YYYY-MM-DD HH:MM:SS` и все форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). Например, `2018-06-08T01:02:03.000Z`.
|
||||
|
||||
- `'basic'` — используется базовый парсер.
|
||||
- `basic` — используется базовый парсер.
|
||||
|
||||
ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS` или `YYYY-MM-DD`. Например, `'2019-08-20 10:18:56'` или `2019-08-20`.
|
||||
ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS` или `YYYY-MM-DD`. Например, `2019-08-20 10:18:56` или `2019-08-20`.
|
||||
|
||||
Значение по умолчанию: `'basic'`.
|
||||
Значение по умолчанию: `basic`.
|
||||
|
||||
См. также:
|
||||
|
||||
- [Тип данных DateTime.](../../sql-reference/data-types/datetime.md)
|
||||
- [Функции для работы с датой и временем.](../../sql-reference/functions/date-time-functions.md)
|
||||
|
||||
## date_time_output_format {#settings-date_time_output_format}
|
||||
|
||||
Позволяет выбрать разные выходные форматы текстового представления даты и времени.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- `simple` - простой выходной формат.
|
||||
|
||||
Выходные дата и время Clickhouse в формате `YYYY-MM-DD hh:mm:ss`. Например, `2019-08-20 10:18:56`. Расчет выполняется в соответствии с часовым поясом типа данных (если он есть) или часовым поясом сервера.
|
||||
|
||||
- `iso` - выходной формат ISO.
|
||||
|
||||
Выходные дата и время Clickhouse в формате [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ`. Например, `2019-08-20T10:18:56Z`. Обратите внимание, что выходные данные отображаются в формате UTC (`Z` означает UTC).
|
||||
|
||||
- `unix_timestamp` - выходной формат Unix.
|
||||
|
||||
Выходные дата и время в формате [Unix](https://en.wikipedia.org/wiki/Unix_time). Например `1566285536`.
|
||||
|
||||
Значение по умолчанию: `simple`.
|
||||
|
||||
См. также:
|
||||
|
||||
- [Тип данных DateTime](../../sql-reference/data-types/datetime.md)
|
||||
- [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md)
|
||||
|
||||
## join_default_strictness {#settings-join_default_strictness}
|
||||
|
||||
Устанавливает строгость по умолчанию для [JOIN](../../sql-reference/statements/select/join.md#select-join).
|
||||
|
@ -27,7 +27,7 @@ DateTime([timezone])
|
||||
|
||||
Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`.
|
||||
|
||||
ClickHouse отображает значения типа `DateTime` в формате `YYYY-MM-DD hh:mm:ss`. Отображение можно поменять с помощью функции [formatDateTime](../../sql-reference/data-types/datetime.md#formatdatetime).
|
||||
ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/settings.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime).
|
||||
|
||||
При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format).
|
||||
|
||||
|
@ -401,7 +401,7 @@ TTL date_time + INTERVAL 15 HOUR
|
||||
|
||||
### 列 TTL {#mergetree-column-ttl}
|
||||
|
||||
当列中的值过期时, ClickHouse会将它们替换成该列数据类型的默认值。如果数据片段中列的所有值均已过期,则ClickHouse 会从文件系统中的数据片段中此列。
|
||||
当列中的值过期时, ClickHouse会将它们替换成该列数据类型的默认值。如果数据片段中列的所有值均已过期,则ClickHouse 会从文件系统中的数据片段中删除此列。
|
||||
|
||||
`TTL`子句不能被用于主键字段。
|
||||
|
||||
|
@ -190,6 +190,7 @@ add_object_library(clickhouse_processors_sources Processors/Sources)
|
||||
add_object_library(clickhouse_processors_merges Processors/Merges)
|
||||
add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms)
|
||||
add_object_library(clickhouse_processors_queryplan Processors/QueryPlan)
|
||||
add_object_library(clickhouse_processors_queryplan_optimizations Processors/QueryPlan/Optimizations)
|
||||
|
||||
set (DBMS_COMMON_LIBRARIES)
|
||||
# libgcc_s does not provide an implementation of an atomics library. Instead,
|
||||
|
@ -0,0 +1,2 @@
|
||||
add_executable(test-connect test_connect.cpp)
|
||||
target_link_libraries (test-connect PRIVATE dbms)
|
99
src/Client/tests/test_connect.cpp
Normal file
99
src/Client/tests/test_connect.cpp
Normal file
@ -0,0 +1,99 @@
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
#include <atomic>
|
||||
#include <Poco/Net/StreamSocket.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
|
||||
/** In a loop it connects to the server and immediately breaks the connection.
|
||||
* Using the SO_LINGER option, we ensure that the connection is terminated by sending a RST packet (not FIN).
|
||||
* Long time ago this behavior caused a bug in the TCPServer implementation in the Poco library.
|
||||
*/
|
||||
int main(int argc, char ** argv)
|
||||
try
|
||||
{
|
||||
size_t num_iterations = 1;
|
||||
size_t num_threads = 1;
|
||||
std::string host = "localhost";
|
||||
uint16_t port = 9000;
|
||||
|
||||
if (argc >= 2)
|
||||
num_iterations = DB::parse<size_t>(argv[1]);
|
||||
|
||||
if (argc >= 3)
|
||||
num_threads = DB::parse<size_t>(argv[2]);
|
||||
|
||||
if (argc >= 4)
|
||||
host = argv[3];
|
||||
|
||||
if (argc >= 5)
|
||||
port = DB::parse<uint16_t>(argv[4]);
|
||||
|
||||
std::atomic_bool cancel{false};
|
||||
std::vector<std::thread> threads(num_threads);
|
||||
for (auto & thread : threads)
|
||||
{
|
||||
thread = std::thread([&]
|
||||
{
|
||||
for (size_t i = 0; i < num_iterations && !cancel.load(std::memory_order_relaxed); ++i)
|
||||
{
|
||||
std::cerr << ".";
|
||||
|
||||
Poco::Net::SocketAddress address(host, port);
|
||||
|
||||
int fd = socket(PF_INET, SOCK_STREAM, IPPROTO_IP);
|
||||
|
||||
if (fd < 0)
|
||||
DB::throwFromErrno("Cannot create socket", 0);
|
||||
|
||||
linger linger_value;
|
||||
linger_value.l_onoff = 1;
|
||||
linger_value.l_linger = 0;
|
||||
|
||||
if (0 != setsockopt(fd, SOL_SOCKET, SO_LINGER, &linger_value, sizeof(linger_value)))
|
||||
DB::throwFromErrno("Cannot set linger", 0);
|
||||
|
||||
try
|
||||
{
|
||||
Stopwatch watch;
|
||||
|
||||
int res = connect(fd, address.addr(), address.length());
|
||||
|
||||
if (res != 0 && errno != EINPROGRESS && errno != EWOULDBLOCK)
|
||||
{
|
||||
close(fd);
|
||||
DB::throwFromErrno("Cannot connect", 0);
|
||||
}
|
||||
|
||||
close(fd);
|
||||
|
||||
if (watch.elapsedSeconds() > 0.1)
|
||||
{
|
||||
std::cerr << watch.elapsedSeconds() << "\n";
|
||||
cancel = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
std::cerr << e.displayText() << "\n";
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
for (auto & thread : threads)
|
||||
thread.join();
|
||||
|
||||
std::cerr << "\n";
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
std::cerr << e.displayText() << "\n";
|
||||
}
|
@ -289,7 +289,8 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
|
||||
|
||||
while (filt_pos < filt_end_sse)
|
||||
{
|
||||
int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
|
||||
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
|
||||
mask = ~mask;
|
||||
|
||||
if (0 == mask)
|
||||
{
|
||||
|
@ -356,7 +356,8 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
|
||||
|
||||
while (filt_pos < filt_end_sse)
|
||||
{
|
||||
int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
|
||||
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
|
||||
mask = ~mask;
|
||||
|
||||
if (0 == mask)
|
||||
{
|
||||
|
@ -17,13 +17,17 @@ namespace DB
|
||||
static UInt64 toBits64(const Int8 * bytes64)
|
||||
{
|
||||
static const __m128i zero16 = _mm_setzero_si128();
|
||||
return static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16)))
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16)))
|
||||
<< 16)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16)))
|
||||
<< 32)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16)))
|
||||
<< 48);
|
||||
UInt64 res =
|
||||
static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16)))
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16))) << 16)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16))) << 32)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16))) << 48);
|
||||
|
||||
return ~res;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -49,7 +53,7 @@ size_t countBytesInFilter(const UInt8 * filt, size_t sz)
|
||||
#endif
|
||||
|
||||
for (; pos < end; ++pos)
|
||||
count += *pos > 0;
|
||||
count += *pos != 0;
|
||||
|
||||
return count;
|
||||
}
|
||||
@ -82,7 +86,7 @@ size_t countBytesInFilterWithNull(const IColumn::Filter & filt, const UInt8 * nu
|
||||
#endif
|
||||
|
||||
for (; pos < end; ++pos)
|
||||
count += (*pos & ~*pos2) > 0;
|
||||
count += (*pos & ~*pos2) != 0;
|
||||
|
||||
return count;
|
||||
}
|
||||
@ -232,9 +236,10 @@ namespace
|
||||
|
||||
while (filt_pos < filt_end_aligned)
|
||||
{
|
||||
const auto mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
|
||||
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)),
|
||||
zero_vec));
|
||||
mask = ~mask;
|
||||
|
||||
if (mask == 0)
|
||||
{
|
||||
|
@ -120,9 +120,10 @@ inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_siz
|
||||
|
||||
for (size_t offset = min_size; offset < max_size; offset += 16)
|
||||
{
|
||||
uint16_t mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
|
||||
uint16_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(longest + offset)),
|
||||
zero16));
|
||||
mask = ~mask;
|
||||
|
||||
if (mask)
|
||||
{
|
||||
|
@ -163,6 +163,7 @@ private:
|
||||
friend class ActionsDAG;
|
||||
};
|
||||
|
||||
using BlockPtr = std::shared_ptr<Block>;
|
||||
using Blocks = std::vector<Block>;
|
||||
using BlocksList = std::list<Block>;
|
||||
using BlocksPtr = std::shared_ptr<Blocks>;
|
||||
|
@ -6,8 +6,10 @@
|
||||
#include <Core/MySQL/PacketsProtocolText.h>
|
||||
#include <Core/MySQL/PacketsReplication.h>
|
||||
#include <Core/MySQL/MySQLReplication.h>
|
||||
#include <Common/DNSResolver.h>
|
||||
#include <Poco/String.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using namespace Generic;
|
||||
|
@ -7,7 +7,6 @@
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Poco/Net/NetException.h>
|
||||
#include <Poco/Net/StreamSocket.h>
|
||||
#include <Common/DNSResolver.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/NetException.h>
|
||||
#include <Core/MySQL/IMySQLWritePacket.h>
|
||||
|
@ -13,11 +13,13 @@
|
||||
#include <IO/WriteBufferFromOStream.h>
|
||||
#include <ext/range.h>
|
||||
#include <ext/size.h>
|
||||
#include <ext/map.h>
|
||||
#include <ext/chrono_io.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include "CacheDictionary.inc.h"
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -130,8 +132,8 @@ const IDictionarySource * CacheDictionary::getSource() const
|
||||
void CacheDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
|
||||
{
|
||||
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_value);
|
||||
|
||||
getItemsNumberImpl<UInt64, UInt64>(*hierarchical_attribute, ids, out, [&](const size_t) { return null_value; });
|
||||
DictionaryDefaultValueExtractor<UInt64> default_value_extractor(null_value);
|
||||
getItemsNumberImpl<UInt64, UInt64>(*hierarchical_attribute, ids, out, default_value_extractor);
|
||||
}
|
||||
|
||||
|
||||
@ -249,34 +251,384 @@ void CacheDictionary::isInConstantVector(const Key child_id, const PaddedPODArra
|
||||
out[i] = std::find(ancestors.begin(), ancestors.end(), ancestor_ids[i]) != ancestors.end();
|
||||
}
|
||||
|
||||
void CacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
|
||||
ColumnPtr CacheDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
ColumnPtr result;
|
||||
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto & keys = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
auto keys_size = keys.size();
|
||||
|
||||
auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
const auto null_value = StringRef{std::get<String>(attribute.null_value)};
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
getItemsString(attribute, ids, out, [&](const size_t) { return null_value; });
|
||||
const auto & null_value = std::get<AttributeType>(attribute.null_value);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
getItemsString(attribute, keys, column.get(), default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
getItemsNumberImpl<AttributeType, AttributeType>(attribute, keys, out, default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void CacheDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
|
||||
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
|
||||
void CacheDictionary::getItemsNumberImpl(
|
||||
Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ResultArrayType<OutputType> & out,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
/// First fill everything with default values
|
||||
const auto rows = ext::size(ids);
|
||||
for (const auto row : ext::range(0, rows))
|
||||
out[row] = default_value_extractor[row];
|
||||
|
||||
getItemsString(attribute, ids, out, [&](const size_t row) { return def->getDataAt(row); });
|
||||
/// Maybe there are duplicate keys, so we remember their indices.
|
||||
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
|
||||
|
||||
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
|
||||
|
||||
size_t cache_hit = 0;
|
||||
size_t cache_not_found_count = 0;
|
||||
size_t cache_expired_cound = 0;
|
||||
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
auto insert_to_answer_routine = [&](size_t row, size_t idx)
|
||||
{
|
||||
auto & cell = cells[idx];
|
||||
if (!cell.isDefault())
|
||||
out[row] = static_cast<OutputType>(attribute_array[idx]);
|
||||
};
|
||||
|
||||
/// fetch up-to-date values, decide which ones require update
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
|
||||
/** cell should be updated if either:
|
||||
* 1. ids do not match,
|
||||
* 2. cell has expired,
|
||||
* 3. explicit defaults were specified and cell was set default. */
|
||||
|
||||
const auto [cell_idx, state] = findCellIdxForGet(id, now);
|
||||
|
||||
if (state == ResultState::FoundAndValid)
|
||||
{
|
||||
++cache_hit;
|
||||
insert_to_answer_routine(row, cell_idx);
|
||||
}
|
||||
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
|
||||
{
|
||||
++cache_not_found_count;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
}
|
||||
else if (state == ResultState::FoundButExpired)
|
||||
{
|
||||
cache_expired_cound++;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
|
||||
if (allow_read_expired_keys)
|
||||
insert_to_answer_routine(row, cell_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_cound);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows - cache_not_found_count - cache_expired_cound, std::memory_order_release);
|
||||
|
||||
if (!cache_not_found_count)
|
||||
{
|
||||
/// Nothing to update - return
|
||||
if (!cache_expired_cound)
|
||||
return;
|
||||
|
||||
/// Update async only if allow_read_expired_keys_is_enabledadd condvar usage and better code
|
||||
if (allow_read_expired_keys)
|
||||
{
|
||||
std::vector<Key> required_expired_ids;
|
||||
required_expired_ids.reserve(cache_expired_cound);
|
||||
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
/// request new values
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
|
||||
/// Nothing to do - return
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/// From this point we have to update all keys sync.
|
||||
/// Maybe allow_read_expired_keys_from_cache_dictionary is disabled
|
||||
/// and there no cache_not_found_ids but some cache_expired.
|
||||
|
||||
std::vector<Key> required_ids;
|
||||
required_ids.reserve(cache_not_found_count + cache_expired_cound);
|
||||
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
/// Request new values
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
waitForCurrentUpdateFinish(update_unit_ptr);
|
||||
|
||||
/// Add updated keys to answer.
|
||||
|
||||
const size_t attribute_index = getAttributeIndex(attribute.name);
|
||||
|
||||
for (auto & [key, value] : update_unit_ptr->found_ids)
|
||||
{
|
||||
if (value.found)
|
||||
{
|
||||
for (const size_t row : cache_expired_or_not_found_ids[key])
|
||||
out[row] = std::get<OutputType>(value.values[attribute_index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CacheDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
|
||||
void CacheDictionary::getItemsString(
|
||||
Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ColumnString * out,
|
||||
DictionaryDefaultValueExtractor<String> & default_value_extractor) const
|
||||
{
|
||||
auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto rows = ext::size(ids);
|
||||
|
||||
getItemsString(attribute, ids, out, [&](const size_t) { return StringRef{def}; });
|
||||
/// Save on some allocations.
|
||||
out->getOffsets().reserve(rows);
|
||||
|
||||
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
|
||||
|
||||
auto found_outdated_values = false;
|
||||
|
||||
/// Perform optimistic version, fallback to pessimistic if failed.
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
/// Fetch up-to-date values, discard on fail.
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
const auto [cell_idx, state] = findCellIdxForGet(id, now);
|
||||
|
||||
if (state == ResultState::FoundAndValid)
|
||||
{
|
||||
auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
}
|
||||
else
|
||||
{
|
||||
found_outdated_values = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Optimistic code completed successfully.
|
||||
if (!found_outdated_values)
|
||||
{
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows, std::memory_order_release);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, ids.size());
|
||||
return;
|
||||
}
|
||||
|
||||
/// Now onto the pessimistic one, discard possible partial results from the optimistic path.
|
||||
out->getChars().resize_assume_reserved(0);
|
||||
out->getOffsets().resize_assume_reserved(0);
|
||||
|
||||
/// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
|
||||
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
|
||||
/// we are going to store every string separately
|
||||
std::unordered_map<Key, String> local_cache;
|
||||
|
||||
size_t cache_not_found_count = 0;
|
||||
size_t cache_expired_count = 0;
|
||||
|
||||
size_t total_length = 0;
|
||||
size_t cache_hit = 0;
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
auto insert_value_routine = [&](size_t row, size_t id, size_t cell_idx)
|
||||
{
|
||||
const auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
|
||||
|
||||
/// Do not store default, but count it in total length.
|
||||
if (!cell.isDefault())
|
||||
local_cache[id] = String{string_ref};
|
||||
|
||||
total_length += string_ref.size + 1;
|
||||
};
|
||||
|
||||
for (const auto row : ext::range(0, ids.size()))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
const auto [cell_idx, state] = findCellIdxForGet(id, now);
|
||||
|
||||
if (state == ResultState::FoundAndValid)
|
||||
{
|
||||
++cache_hit;
|
||||
insert_value_routine(row, id, cell_idx);
|
||||
}
|
||||
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
|
||||
{
|
||||
++cache_not_found_count;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
}
|
||||
else if (state == ResultState::FoundButExpired)
|
||||
{
|
||||
++cache_expired_count;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
|
||||
if (allow_read_expired_keys)
|
||||
insert_value_routine(row, id, cell_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_count);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows - cache_expired_count - cache_not_found_count, std::memory_order_release);
|
||||
|
||||
/// Async update of expired keys.
|
||||
if (!cache_not_found_count)
|
||||
{
|
||||
if (allow_read_expired_keys && cache_expired_count)
|
||||
{
|
||||
std::vector<Key> required_expired_ids;
|
||||
required_expired_ids.reserve(cache_expired_count);
|
||||
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
|
||||
/// Insert all found keys and defaults to output array.
|
||||
out->getChars().reserve(total_length);
|
||||
|
||||
for (const auto row : ext::range(0, ext::size(ids)))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
StringRef value;
|
||||
|
||||
/// Previously we stored found keys in map.
|
||||
const auto it = local_cache.find(id);
|
||||
if (it != local_cache.end())
|
||||
value = StringRef(it->second);
|
||||
else
|
||||
value = default_value_extractor[row];
|
||||
|
||||
out->insertData(value.data, value.size);
|
||||
}
|
||||
|
||||
/// Nothing to do else.
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/// We will request both cache_not_found_ids and cache_expired_ids sync.
|
||||
std::vector<Key> required_ids;
|
||||
required_ids.reserve(cache_not_found_count + cache_expired_count);
|
||||
std::transform(
|
||||
std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
waitForCurrentUpdateFinish(update_unit_ptr);
|
||||
|
||||
const size_t attribute_index = getAttributeIndex(attribute.name);
|
||||
|
||||
/// Only calculate the total length.
|
||||
for (auto & [key, value] : update_unit_ptr->found_ids)
|
||||
{
|
||||
if (value.found)
|
||||
{
|
||||
const auto found_value_ref = std::get<String>(value.values[attribute_index]);
|
||||
total_length += (found_value_ref.size() + 1) * cache_expired_or_not_found_ids[key].size();
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto row : cache_expired_or_not_found_ids[key])
|
||||
total_length += default_value_extractor[row].size + 1;
|
||||
}
|
||||
}
|
||||
|
||||
out->getChars().reserve(total_length);
|
||||
|
||||
for (const auto row : ext::range(0, ext::size(ids)))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
StringRef value;
|
||||
|
||||
/// We have two maps: found in cache and found in source.
|
||||
const auto local_it = local_cache.find(id);
|
||||
if (local_it != local_cache.end())
|
||||
value = StringRef(local_it->second);
|
||||
else
|
||||
{
|
||||
const auto found_it = update_unit_ptr->found_ids.find(id);
|
||||
|
||||
/// Previously we didn't store defaults in local cache.
|
||||
if (found_it != update_unit_ptr->found_ids.end() && found_it->second.found)
|
||||
value = std::get<String>(found_it->second.values[attribute_index]);
|
||||
else
|
||||
value = default_value_extractor[row];
|
||||
}
|
||||
|
||||
out->insertData(value.data, value.size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class... Ts>
|
||||
struct Overloaded : Ts... {using Ts::operator()...;};
|
||||
|
||||
@ -375,8 +727,14 @@ size_t CacheDictionary::findCellIdxForSet(const Key & id) const
|
||||
return oldest_id;
|
||||
}
|
||||
|
||||
void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
|
||||
ColumnUInt8::Ptr CacheDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
|
||||
{
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
|
||||
auto result = ColumnUInt8::create(ext::size(ids));
|
||||
auto& out = result->getData();
|
||||
|
||||
/// There are three types of ids.
|
||||
/// - Valid ids. These ids are presented in local cache and their lifetime is not expired.
|
||||
/// - CacheExpired ids. Ids that are in local cache, but their values are rotted (lifetime is expired).
|
||||
@ -444,7 +802,7 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
|
||||
{
|
||||
/// Nothing to update - return;
|
||||
if (!cache_expired_count)
|
||||
return;
|
||||
return result;
|
||||
|
||||
if (allow_read_expired_keys)
|
||||
{
|
||||
@ -458,7 +816,7 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
/// Update is async - no need to wait.
|
||||
return;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
@ -483,6 +841,8 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
|
||||
for (const auto row : cache_expired_or_not_found_ids[key])
|
||||
out[row] = true;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@ -707,7 +1067,7 @@ PaddedPODArray<CacheDictionary::Key> CacheDictionary::getCachedIds() const
|
||||
|
||||
BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<CacheDictionary, Key>;
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<Key>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getCachedIds(), column_names);
|
||||
}
|
||||
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
@ -119,77 +120,20 @@ public:
|
||||
|
||||
std::exception_ptr getLastException() const override;
|
||||
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
|
||||
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void
|
||||
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
|
||||
const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
|
||||
|
||||
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
private:
|
||||
@ -260,12 +204,18 @@ private:
|
||||
/* NOLINTNEXTLINE(readability-convert-member-functions-to-static) */
|
||||
Attribute createAttributeWithTypeAndName(const AttributeUnderlyingType type, const String & name, const Field & null_value);
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
|
||||
void getItemsNumberImpl(
|
||||
Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
|
||||
Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ResultArrayType<OutputType> & out,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename DefaultGetter>
|
||||
void getItemsString(Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const;
|
||||
void getItemsString(
|
||||
Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ColumnString * out,
|
||||
DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
|
||||
|
||||
PaddedPODArray<Key> getCachedIds() const;
|
||||
|
||||
@ -456,5 +406,6 @@ private:
|
||||
mutable std::condition_variable is_update_finished;
|
||||
|
||||
std::atomic<bool> finished{false};
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,368 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include "CacheDictionary.h"
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Common/ProfilingScopedRWLock.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <ext/chrono_io.h>
|
||||
#include <ext/map.h>
|
||||
#include <ext/range.h>
|
||||
#include <ext/size.h>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event DictCacheKeysRequested;
|
||||
extern const Event DictCacheKeysRequestedMiss;
|
||||
extern const Event DictCacheKeysRequestedFound;
|
||||
extern const Event DictCacheKeysExpired;
|
||||
extern const Event DictCacheKeysNotFound;
|
||||
extern const Event DictCacheKeysHit;
|
||||
extern const Event DictCacheRequestTimeNs;
|
||||
extern const Event DictCacheRequests;
|
||||
extern const Event DictCacheLockWriteNs;
|
||||
extern const Event DictCacheLockReadNs;
|
||||
}
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric DictCacheRequests;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultGetter>
|
||||
void CacheDictionary::getItemsNumberImpl(
|
||||
Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
|
||||
{
|
||||
/// First fill everything with default values
|
||||
const auto rows = ext::size(ids);
|
||||
for (const auto row : ext::range(0, rows))
|
||||
out[row] = get_default(row);
|
||||
|
||||
/// Maybe there are duplicate keys, so we remember their indices.
|
||||
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
|
||||
|
||||
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
|
||||
|
||||
size_t cache_hit = 0;
|
||||
size_t cache_not_found_count = 0;
|
||||
size_t cache_expired_cound = 0;
|
||||
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
auto insert_to_answer_routine = [&](size_t row, size_t idx)
|
||||
{
|
||||
auto & cell = cells[idx];
|
||||
if (!cell.isDefault())
|
||||
out[row] = static_cast<OutputType>(attribute_array[idx]);
|
||||
};
|
||||
|
||||
/// fetch up-to-date values, decide which ones require update
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
|
||||
/** cell should be updated if either:
|
||||
* 1. ids do not match,
|
||||
* 2. cell has expired,
|
||||
* 3. explicit defaults were specified and cell was set default. */
|
||||
|
||||
const auto [cell_idx, state] = findCellIdxForGet(id, now);
|
||||
|
||||
if (state == ResultState::FoundAndValid)
|
||||
{
|
||||
++cache_hit;
|
||||
insert_to_answer_routine(row, cell_idx);
|
||||
}
|
||||
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
|
||||
{
|
||||
++cache_not_found_count;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
}
|
||||
else if (state == ResultState::FoundButExpired)
|
||||
{
|
||||
cache_expired_cound++;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
|
||||
if (allow_read_expired_keys)
|
||||
insert_to_answer_routine(row, cell_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_cound);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows - cache_not_found_count - cache_expired_cound, std::memory_order_release);
|
||||
|
||||
if (!cache_not_found_count)
|
||||
{
|
||||
/// Nothing to update - return
|
||||
if (!cache_expired_cound)
|
||||
return;
|
||||
|
||||
/// Update async only if allow_read_expired_keys_is_enabledadd condvar usage and better code
|
||||
if (allow_read_expired_keys)
|
||||
{
|
||||
std::vector<Key> required_expired_ids;
|
||||
required_expired_ids.reserve(cache_expired_cound);
|
||||
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
/// request new values
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
|
||||
/// Nothing to do - return
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/// From this point we have to update all keys sync.
|
||||
/// Maybe allow_read_expired_keys_from_cache_dictionary is disabled
|
||||
/// and there no cache_not_found_ids but some cache_expired.
|
||||
|
||||
std::vector<Key> required_ids;
|
||||
required_ids.reserve(cache_not_found_count + cache_expired_cound);
|
||||
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
/// Request new values
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
waitForCurrentUpdateFinish(update_unit_ptr);
|
||||
|
||||
/// Add updated keys to answer.
|
||||
|
||||
const size_t attribute_index = getAttributeIndex(attribute.name);
|
||||
|
||||
for (auto & [key, value] : update_unit_ptr->found_ids)
|
||||
{
|
||||
if (value.found)
|
||||
{
|
||||
for (const size_t row : cache_expired_or_not_found_ids[key])
|
||||
out[row] = std::get<OutputType>(value.values[attribute_index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename DefaultGetter>
|
||||
void CacheDictionary::getItemsString(
|
||||
Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const
|
||||
{
|
||||
const auto rows = ext::size(ids);
|
||||
|
||||
/// Save on some allocations.
|
||||
out->getOffsets().reserve(rows);
|
||||
|
||||
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
|
||||
|
||||
auto found_outdated_values = false;
|
||||
|
||||
/// Perform optimistic version, fallback to pessimistic if failed.
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
/// Fetch up-to-date values, discard on fail.
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
const auto [cell_idx, state] = findCellIdxForGet(id, now);
|
||||
|
||||
if (state == ResultState::FoundAndValid)
|
||||
{
|
||||
auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
}
|
||||
else
|
||||
{
|
||||
found_outdated_values = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Optimistic code completed successfully.
|
||||
if (!found_outdated_values)
|
||||
{
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows, std::memory_order_release);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, ids.size());
|
||||
return;
|
||||
}
|
||||
|
||||
/// Now onto the pessimistic one, discard possible partial results from the optimistic path.
|
||||
out->getChars().resize_assume_reserved(0);
|
||||
out->getOffsets().resize_assume_reserved(0);
|
||||
|
||||
/// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
|
||||
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
|
||||
/// we are going to store every string separately
|
||||
std::unordered_map<Key, String> local_cache;
|
||||
|
||||
size_t cache_not_found_count = 0;
|
||||
size_t cache_expired_count = 0;
|
||||
|
||||
size_t total_length = 0;
|
||||
size_t cache_hit = 0;
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
auto insert_value_routine = [&](size_t row, size_t id, size_t cell_idx)
|
||||
{
|
||||
const auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
|
||||
|
||||
/// Do not store default, but count it in total length.
|
||||
if (!cell.isDefault())
|
||||
local_cache[id] = String{string_ref};
|
||||
|
||||
total_length += string_ref.size + 1;
|
||||
};
|
||||
|
||||
for (const auto row : ext::range(0, ids.size()))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
const auto [cell_idx, state] = findCellIdxForGet(id, now);
|
||||
|
||||
if (state == ResultState::FoundAndValid)
|
||||
{
|
||||
++cache_hit;
|
||||
insert_value_routine(row, id, cell_idx);
|
||||
}
|
||||
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
|
||||
{
|
||||
++cache_not_found_count;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
}
|
||||
else if (state == ResultState::FoundButExpired)
|
||||
{
|
||||
++cache_expired_count;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
|
||||
if (allow_read_expired_keys)
|
||||
insert_value_routine(row, id, cell_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_count);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows - cache_expired_count - cache_not_found_count, std::memory_order_release);
|
||||
|
||||
/// Async update of expired keys.
|
||||
if (!cache_not_found_count)
|
||||
{
|
||||
if (allow_read_expired_keys && cache_expired_count)
|
||||
{
|
||||
std::vector<Key> required_expired_ids;
|
||||
required_expired_ids.reserve(cache_expired_count);
|
||||
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
|
||||
/// Insert all found keys and defaults to output array.
|
||||
out->getChars().reserve(total_length);
|
||||
|
||||
for (const auto row : ext::range(0, ext::size(ids)))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
StringRef value;
|
||||
|
||||
/// Previously we stored found keys in map.
|
||||
const auto it = local_cache.find(id);
|
||||
if (it != local_cache.end())
|
||||
value = StringRef(it->second);
|
||||
else
|
||||
value = get_default(row);
|
||||
|
||||
out->insertData(value.data, value.size);
|
||||
}
|
||||
|
||||
/// Nothing to do else.
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/// We will request both cache_not_found_ids and cache_expired_ids sync.
|
||||
std::vector<Key> required_ids;
|
||||
required_ids.reserve(cache_not_found_count + cache_expired_count);
|
||||
std::transform(
|
||||
std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
waitForCurrentUpdateFinish(update_unit_ptr);
|
||||
|
||||
const size_t attribute_index = getAttributeIndex(attribute.name);
|
||||
|
||||
/// Only calculate the total length.
|
||||
for (auto & [key, value] : update_unit_ptr->found_ids)
|
||||
{
|
||||
if (value.found)
|
||||
{
|
||||
const auto found_value_ref = std::get<String>(value.values[attribute_index]);
|
||||
total_length += (found_value_ref.size() + 1) * cache_expired_or_not_found_ids[key].size();
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto row : cache_expired_or_not_found_ids[key])
|
||||
total_length += get_default(row).size + 1;
|
||||
}
|
||||
}
|
||||
|
||||
out->getChars().reserve(total_length);
|
||||
|
||||
for (const auto row : ext::range(0, ext::size(ids)))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
StringRef value;
|
||||
|
||||
/// We have two maps: found in cache and found in source.
|
||||
const auto local_it = local_cache.find(id);
|
||||
if (local_it != local_cache.end())
|
||||
value = StringRef(local_it->second);
|
||||
else
|
||||
{
|
||||
const auto found_it = update_unit_ptr->found_ids.find(id);
|
||||
|
||||
/// Previously we didn't store defaults in local cache.
|
||||
if (found_it != update_unit_ptr->found_ids.end() && found_it->second.found)
|
||||
value = std::get<String>(found_it->second.values[attribute_index]);
|
||||
else
|
||||
value = get_default(row);
|
||||
}
|
||||
|
||||
out->insertData(value.data, value.size);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
#include <Dictionaries/CacheDictionary.h>
|
||||
#include <Dictionaries/CacheDictionary.inc.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
#define DEFINE(TYPE) \
|
||||
void CacheDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) \
|
||||
const \
|
||||
{ \
|
||||
auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
const auto null_value = std::get<TYPE>(attribute.null_value); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t) { return null_value; }); \
|
||||
}
|
||||
|
||||
DEFINE(UInt8)
|
||||
DEFINE(UInt16)
|
||||
DEFINE(UInt32)
|
||||
DEFINE(UInt64)
|
||||
DEFINE(UInt128)
|
||||
DEFINE(Int8)
|
||||
DEFINE(Int16)
|
||||
DEFINE(Int32)
|
||||
DEFINE(Int64)
|
||||
DEFINE(Float32)
|
||||
DEFINE(Float64)
|
||||
DEFINE(Decimal32)
|
||||
DEFINE(Decimal64)
|
||||
DEFINE(Decimal128)
|
||||
|
||||
#undef DEFINE
|
||||
}
|
@ -1,34 +0,0 @@
|
||||
#include <Dictionaries/CacheDictionary.h>
|
||||
#include <Dictionaries/CacheDictionary.inc.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
#define DEFINE(TYPE) \
|
||||
void CacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
|
||||
DEFINE(UInt8)
|
||||
DEFINE(UInt16)
|
||||
DEFINE(UInt32)
|
||||
DEFINE(UInt64)
|
||||
DEFINE(UInt128)
|
||||
DEFINE(Int8)
|
||||
DEFINE(Int16)
|
||||
DEFINE(Int32)
|
||||
DEFINE(Int64)
|
||||
DEFINE(Float32)
|
||||
DEFINE(Float64)
|
||||
DEFINE(Decimal32)
|
||||
DEFINE(Decimal64)
|
||||
DEFINE(Decimal128)
|
||||
|
||||
#undef DEFINE
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
#include <Dictionaries/CacheDictionary.h>
|
||||
#include <Dictionaries/CacheDictionary.inc.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
#define DEFINE(TYPE) \
|
||||
void CacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t) { return def; }); \
|
||||
}
|
||||
|
||||
DEFINE(UInt8)
|
||||
DEFINE(UInt16)
|
||||
DEFINE(UInt32)
|
||||
DEFINE(UInt64)
|
||||
DEFINE(UInt128)
|
||||
DEFINE(Int8)
|
||||
DEFINE(Int16)
|
||||
DEFINE(Int32)
|
||||
DEFINE(Int64)
|
||||
DEFINE(Float32)
|
||||
DEFINE(Float64)
|
||||
DEFINE(Decimal32)
|
||||
DEFINE(Decimal64)
|
||||
DEFINE(Decimal128)
|
||||
|
||||
#undef DEFINE
|
||||
}
|
@ -10,7 +10,8 @@
|
||||
#include <ext/range.h>
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -70,48 +71,50 @@ ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(
|
||||
createAttributes();
|
||||
}
|
||||
|
||||
|
||||
void ComplexKeyCacheDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto null_value = StringRef{std::get<String>(attribute.null_values)};
|
||||
|
||||
getItemsString(attribute, key_columns, out, [&](const size_t) { return null_value; });
|
||||
}
|
||||
|
||||
void ComplexKeyCacheDictionary::getString(
|
||||
ColumnPtr ComplexKeyCacheDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsString(attribute, key_columns, out, [&](const size_t row) { return def->getDataAt(row); });
|
||||
}
|
||||
|
||||
void ComplexKeyCacheDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
ColumnPtr result;
|
||||
|
||||
auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
getItemsString(attribute, key_columns, out, [&](const size_t) { return StringRef{def}; });
|
||||
auto keys_size = key_columns.front()->size();
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto & null_value = std::get<AttributeType>(attribute.null_values);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
getItemsString(attribute, key_columns, out, default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
getItemsNumberImpl<AttributeType, AttributeType>(attribute, key_columns, out, default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// returns cell_idx (always valid for replacing), 'cell is valid' flag, 'cell is outdated' flag,
|
||||
@ -158,15 +161,21 @@ ComplexKeyCacheDictionary::findCellIdx(const StringRef & key, const CellMetadata
|
||||
return {oldest_id, false, false};
|
||||
}
|
||||
|
||||
void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
|
||||
ColumnUInt8::Ptr ComplexKeyCacheDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
|
||||
auto result = ColumnUInt8::create(rows_num);
|
||||
auto& out = result->getData();
|
||||
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
out[row] = false;
|
||||
|
||||
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
|
||||
MapType<std::vector<size_t>> outdated_keys;
|
||||
|
||||
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
Arena temporary_keys_pool;
|
||||
@ -212,7 +221,7 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
|
||||
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
|
||||
|
||||
if (outdated_keys.empty())
|
||||
return;
|
||||
return result;
|
||||
|
||||
std::vector<size_t> required_rows(outdated_keys.size());
|
||||
std::transform(
|
||||
@ -233,8 +242,395 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
|
||||
for (const auto out_idx : outdated_keys[key])
|
||||
out[out_idx] = false;
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
|
||||
void ComplexKeyCacheDictionary::getItemsNumberImpl(
|
||||
Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
PaddedPODArray<OutputType> & out,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
|
||||
MapType<std::vector<size_t>> outdated_keys;
|
||||
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
|
||||
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
Arena temporary_keys_pool;
|
||||
PODArray<StringRef> keys_array(rows_num);
|
||||
|
||||
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
/// fetch up-to-date values, decide which ones require update
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
keys_array[row] = key;
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
/** cell should be updated if either:
|
||||
* 1. keys (or hash) do not match,
|
||||
* 2. cell has expired,
|
||||
* 3. explicit defaults were specified and cell was set default. */
|
||||
|
||||
if (!find_result.valid)
|
||||
{
|
||||
outdated_keys[key].push_back(row);
|
||||
if (find_result.outdated)
|
||||
++cache_expired;
|
||||
else
|
||||
++cache_not_found;
|
||||
}
|
||||
else
|
||||
{
|
||||
++cache_hit;
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
const auto & cell = cells[cell_idx];
|
||||
out[row] = cell.isDefault() ? default_value_extractor[row] : static_cast<OutputType>(attribute_array[cell_idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
query_count.fetch_add(rows_num, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
|
||||
|
||||
if (outdated_keys.empty())
|
||||
return;
|
||||
|
||||
std::vector<size_t> required_rows(outdated_keys.size());
|
||||
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
|
||||
{
|
||||
return pair.getMapped().front();
|
||||
});
|
||||
|
||||
/// request new values
|
||||
update(
|
||||
key_columns,
|
||||
keys_array,
|
||||
required_rows,
|
||||
[&](const StringRef key, const size_t cell_idx)
|
||||
{
|
||||
for (const auto row : outdated_keys[key])
|
||||
out[row] = static_cast<OutputType>(attribute_array[cell_idx]);
|
||||
},
|
||||
[&](const StringRef key, const size_t)
|
||||
{
|
||||
for (const auto row : outdated_keys[key])
|
||||
out[row] = default_value_extractor[row];
|
||||
});
|
||||
}
|
||||
|
||||
void ComplexKeyCacheDictionary::getItemsString(
|
||||
Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ColumnString * out,
|
||||
DictionaryDefaultValueExtractor<String> & default_value_extractor) const
|
||||
{
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
/// save on some allocations
|
||||
out->getOffsets().reserve(rows_num);
|
||||
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
Arena temporary_keys_pool;
|
||||
|
||||
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
|
||||
|
||||
auto found_outdated_values = false;
|
||||
|
||||
/// perform optimistic version, fallback to pessimistic if failed
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
/// fetch up-to-date values, discard on fail
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
SCOPE_EXIT(temporary_keys_pool.rollback(key.size));
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
if (!find_result.valid)
|
||||
{
|
||||
found_outdated_values = true;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
const auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// optimistic code completed successfully
|
||||
if (!found_outdated_values)
|
||||
{
|
||||
query_count.fetch_add(rows_num, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows_num, std::memory_order_release);
|
||||
return;
|
||||
}
|
||||
|
||||
/// now onto the pessimistic one, discard possible partial results from the optimistic path
|
||||
out->getChars().resize_assume_reserved(0);
|
||||
out->getOffsets().resize_assume_reserved(0);
|
||||
|
||||
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
|
||||
MapType<std::vector<size_t>> outdated_keys;
|
||||
/// we are going to store every string separately
|
||||
MapType<StringRef> map;
|
||||
PODArray<StringRef> keys_array(rows_num);
|
||||
|
||||
size_t total_length = 0;
|
||||
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
keys_array[row] = key;
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
if (!find_result.valid)
|
||||
{
|
||||
outdated_keys[key].push_back(row);
|
||||
if (find_result.outdated)
|
||||
++cache_expired;
|
||||
else
|
||||
++cache_not_found;
|
||||
}
|
||||
else
|
||||
{
|
||||
++cache_hit;
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
const auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
|
||||
|
||||
if (!cell.isDefault())
|
||||
map[key] = copyIntoArena(string_ref, temporary_keys_pool);
|
||||
|
||||
total_length += string_ref.size + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
|
||||
query_count.fetch_add(rows_num, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
|
||||
|
||||
/// request new values
|
||||
if (!outdated_keys.empty())
|
||||
{
|
||||
std::vector<size_t> required_rows(outdated_keys.size());
|
||||
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
|
||||
{
|
||||
return pair.getMapped().front();
|
||||
});
|
||||
|
||||
update(
|
||||
key_columns,
|
||||
keys_array,
|
||||
required_rows,
|
||||
[&](const StringRef key, const size_t cell_idx)
|
||||
{
|
||||
const StringRef attribute_value = attribute_array[cell_idx];
|
||||
|
||||
/// We must copy key and value to own memory, because it may be replaced with another
|
||||
/// in next iterations of inner loop of update.
|
||||
const StringRef copied_key = copyIntoArena(key, temporary_keys_pool);
|
||||
const StringRef copied_value = copyIntoArena(attribute_value, temporary_keys_pool);
|
||||
|
||||
map[copied_key] = copied_value;
|
||||
total_length += (attribute_value.size + 1) * outdated_keys[key].size();
|
||||
},
|
||||
[&](const StringRef key, const size_t)
|
||||
{
|
||||
for (const auto row : outdated_keys[key])
|
||||
total_length += default_value_extractor[row].size + 1;
|
||||
});
|
||||
}
|
||||
|
||||
out->getChars().reserve(total_length);
|
||||
|
||||
for (const auto row : ext::range(0, ext::size(keys_array)))
|
||||
{
|
||||
const StringRef key = keys_array[row];
|
||||
auto * const it = map.find(key);
|
||||
const auto string_ref = it ? it->getMapped() : default_value_extractor[row];
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename PresentKeyHandler, typename AbsentKeyHandler>
|
||||
void ComplexKeyCacheDictionary::update(
|
||||
const Columns & in_key_columns,
|
||||
const PODArray<StringRef> & in_keys,
|
||||
const std::vector<size_t> & in_requested_rows,
|
||||
PresentKeyHandler && on_cell_updated,
|
||||
AbsentKeyHandler && on_key_not_found) const
|
||||
{
|
||||
MapType<bool> remaining_keys{in_requested_rows.size()};
|
||||
for (const auto row : in_requested_rows)
|
||||
remaining_keys.insert({in_keys[row], false});
|
||||
|
||||
std::uniform_int_distribution<UInt64> distribution(dict_lifetime.min_sec, dict_lifetime.max_sec);
|
||||
|
||||
const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
|
||||
{
|
||||
Stopwatch watch;
|
||||
auto stream = source_ptr->loadKeys(in_key_columns, in_requested_rows);
|
||||
stream->readPrefix();
|
||||
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
|
||||
const auto attributes_size = attributes.size();
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
/// cache column pointers
|
||||
const auto key_columns = ext::map<Columns>(
|
||||
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
|
||||
|
||||
const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
|
||||
{
|
||||
return block.safeGetByPosition(keys_size + attribute_idx).column;
|
||||
});
|
||||
|
||||
const auto rows_num = block.rows();
|
||||
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
auto key = allocKey(row, key_columns, keys);
|
||||
const auto hash = StringRefHash{}(key);
|
||||
const auto find_result = findCellIdx(key, now, hash);
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
auto & cell = cells[cell_idx];
|
||||
|
||||
for (const auto attribute_idx : ext::range(0, attributes.size()))
|
||||
{
|
||||
const auto & attribute_column = *attribute_columns[attribute_idx];
|
||||
auto & attribute = attributes[attribute_idx];
|
||||
|
||||
setAttributeValue(attribute, cell_idx, attribute_column[row]);
|
||||
}
|
||||
|
||||
/// if cell id is zero and zero does not map to this cell, then the cell is unused
|
||||
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
|
||||
element_count.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
/// handle memory allocated for old key
|
||||
if (key == cell.key)
|
||||
{
|
||||
freeKey(key);
|
||||
key = cell.key;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// new key is different from the old one
|
||||
if (cell.key.data)
|
||||
freeKey(cell.key);
|
||||
|
||||
cell.key = key;
|
||||
}
|
||||
|
||||
cell.hash = hash;
|
||||
|
||||
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
|
||||
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
|
||||
else
|
||||
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
|
||||
|
||||
/// inform caller
|
||||
on_cell_updated(key, cell_idx);
|
||||
/// mark corresponding id as found
|
||||
remaining_keys[key] = true;
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, in_requested_rows.size());
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed());
|
||||
}
|
||||
|
||||
size_t found_num = 0;
|
||||
size_t not_found_num = 0;
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
/// Check which ids have not been found and require setting null_value
|
||||
for (const auto & key_found_pair : remaining_keys)
|
||||
{
|
||||
if (key_found_pair.getMapped())
|
||||
{
|
||||
++found_num;
|
||||
continue;
|
||||
}
|
||||
|
||||
++not_found_num;
|
||||
|
||||
auto key = key_found_pair.getKey();
|
||||
const auto hash = StringRefHash{}(key);
|
||||
const auto find_result = findCellIdx(key, now, hash);
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
auto & cell = cells[cell_idx];
|
||||
|
||||
/// Set null_value for each attribute
|
||||
for (auto & attribute : attributes)
|
||||
setDefaultAttributeValue(attribute, cell_idx);
|
||||
|
||||
/// Check if cell had not been occupied before and increment element counter if it hadn't
|
||||
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
|
||||
element_count.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
if (key == cell.key)
|
||||
key = cell.key;
|
||||
else
|
||||
{
|
||||
if (cell.key.data)
|
||||
freeKey(cell.key);
|
||||
|
||||
/// copy key from temporary pool
|
||||
key = copyKey(key);
|
||||
cell.key = key;
|
||||
}
|
||||
|
||||
cell.hash = hash;
|
||||
|
||||
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
|
||||
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
|
||||
else
|
||||
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
|
||||
|
||||
cell.setDefault();
|
||||
|
||||
/// inform caller that the cell has not been found
|
||||
on_key_not_found(key, cell_idx);
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num);
|
||||
}
|
||||
|
||||
|
||||
void ComplexKeyCacheDictionary::createAttributes()
|
||||
{
|
||||
const auto attributes_size = dict_struct.attributes.size();
|
||||
@ -263,6 +659,102 @@ ComplexKeyCacheDictionary::Attribute & ComplexKeyCacheDictionary::getAttribute(c
|
||||
return attributes[it->second];
|
||||
}
|
||||
|
||||
void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute, const size_t idx) const
|
||||
{
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
const auto & null_value_ref = std::get<String>(attribute.null_values);
|
||||
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
|
||||
|
||||
if (string_ref.data != null_value_ref.data())
|
||||
{
|
||||
if (string_ref.data)
|
||||
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
|
||||
|
||||
string_ref = StringRef{null_value_ref};
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::get<ContainerPtrType<AttributeType>>(attribute.arrays)[idx] = std::get<AttributeType>(attribute.null_values);
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
ComplexKeyCacheDictionary::Attribute
|
||||
ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||
{
|
||||
Attribute attr{type, {}, {}};
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
attr.null_values = null_value.get<String>();
|
||||
attr.arrays = std::make_unique<ContainerType<StringRef>>(size);
|
||||
bytes_allocated += size * sizeof(StringRef);
|
||||
if (!string_arena)
|
||||
string_arena = std::make_unique<ArenaWithFreeLists>();
|
||||
}
|
||||
else
|
||||
{
|
||||
attr.null_values = AttributeType(null_value.get<NearestFieldType<AttributeType>>()); /* NOLINT */
|
||||
attr.arrays = std::make_unique<ContainerType<AttributeType>>(size); /* NOLINT */
|
||||
bytes_allocated += size * sizeof(AttributeType);
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const size_t idx, const Field & value) const
|
||||
{
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
const auto & string = value.get<String>();
|
||||
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
|
||||
const auto & null_value_ref = std::get<String>(attribute.null_values);
|
||||
|
||||
/// free memory unless it points to a null_value
|
||||
if (string_ref.data && string_ref.data != null_value_ref.data())
|
||||
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
|
||||
|
||||
const auto str_size = string.size();
|
||||
if (str_size != 0)
|
||||
{
|
||||
auto * str_ptr = string_arena->alloc(str_size);
|
||||
std::copy(string.data(), string.data() + str_size, str_ptr);
|
||||
string_ref = StringRef{str_ptr, str_size};
|
||||
}
|
||||
else
|
||||
string_ref = {};
|
||||
}
|
||||
else
|
||||
{
|
||||
std::get<ContainerPtrType<AttributeType>>(attribute.arrays)[idx] = value.get<NearestFieldType<AttributeType>>();
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
StringRef ComplexKeyCacheDictionary::allocKey(const size_t row, const Columns & key_columns, StringRefs & keys) const
|
||||
{
|
||||
if (key_size_is_fixed)
|
||||
@ -388,7 +880,7 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names &
|
||||
keys.push_back(cells[idx].key);
|
||||
}
|
||||
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyCacheDictionary, UInt64>;
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, keys, column_names);
|
||||
}
|
||||
|
||||
|
@ -23,7 +23,7 @@
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -89,93 +89,16 @@ public:
|
||||
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
|
||||
|
||||
/// In all functions below, key_columns must be full (non-constant) columns.
|
||||
/// See the requirement in IDataType.h for text-serialization functions.
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const;
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const;
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
@ -252,227 +175,18 @@ private:
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
|
||||
void getItemsNumberImpl(
|
||||
Attribute & attribute, const Columns & key_columns, PaddedPODArray<OutputType> & out, DefaultGetter && get_default) const
|
||||
{
|
||||
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
|
||||
MapType<std::vector<size_t>> outdated_keys;
|
||||
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
|
||||
Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
PaddedPODArray<OutputType> & out,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
Arena temporary_keys_pool;
|
||||
PODArray<StringRef> keys_array(rows_num);
|
||||
|
||||
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
/// fetch up-to-date values, decide which ones require update
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
keys_array[row] = key;
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
/** cell should be updated if either:
|
||||
* 1. keys (or hash) do not match,
|
||||
* 2. cell has expired,
|
||||
* 3. explicit defaults were specified and cell was set default. */
|
||||
|
||||
if (!find_result.valid)
|
||||
{
|
||||
outdated_keys[key].push_back(row);
|
||||
if (find_result.outdated)
|
||||
++cache_expired;
|
||||
else
|
||||
++cache_not_found;
|
||||
}
|
||||
else
|
||||
{
|
||||
++cache_hit;
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
const auto & cell = cells[cell_idx];
|
||||
out[row] = cell.isDefault() ? get_default(row) : static_cast<OutputType>(attribute_array[cell_idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
query_count.fetch_add(rows_num, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
|
||||
|
||||
if (outdated_keys.empty())
|
||||
return;
|
||||
|
||||
std::vector<size_t> required_rows(outdated_keys.size());
|
||||
std::transform(
|
||||
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); });
|
||||
|
||||
/// request new values
|
||||
update(
|
||||
key_columns,
|
||||
keys_array,
|
||||
required_rows,
|
||||
[&](const StringRef key, const size_t cell_idx)
|
||||
{
|
||||
for (const auto row : outdated_keys[key])
|
||||
out[row] = static_cast<OutputType>(attribute_array[cell_idx]);
|
||||
},
|
||||
[&](const StringRef key, const size_t)
|
||||
{
|
||||
for (const auto row : outdated_keys[key])
|
||||
out[row] = get_default(row);
|
||||
});
|
||||
}
|
||||
|
||||
template <typename DefaultGetter>
|
||||
void getItemsString(Attribute & attribute, const Columns & key_columns, ColumnString * out, DefaultGetter && get_default) const
|
||||
{
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
/// save on some allocations
|
||||
out->getOffsets().reserve(rows_num);
|
||||
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
Arena temporary_keys_pool;
|
||||
|
||||
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
|
||||
|
||||
auto found_outdated_values = false;
|
||||
|
||||
/// perform optimistic version, fallback to pessimistic if failed
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
/// fetch up-to-date values, discard on fail
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
SCOPE_EXIT(temporary_keys_pool.rollback(key.size));
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
if (!find_result.valid)
|
||||
{
|
||||
found_outdated_values = true;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
const auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// optimistic code completed successfully
|
||||
if (!found_outdated_values)
|
||||
{
|
||||
query_count.fetch_add(rows_num, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows_num, std::memory_order_release);
|
||||
return;
|
||||
}
|
||||
|
||||
/// now onto the pessimistic one, discard possible partial results from the optimistic path
|
||||
out->getChars().resize_assume_reserved(0);
|
||||
out->getOffsets().resize_assume_reserved(0);
|
||||
|
||||
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
|
||||
MapType<std::vector<size_t>> outdated_keys;
|
||||
/// we are going to store every string separately
|
||||
MapType<StringRef> map;
|
||||
PODArray<StringRef> keys_array(rows_num);
|
||||
|
||||
size_t total_length = 0;
|
||||
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
keys_array[row] = key;
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
if (!find_result.valid)
|
||||
{
|
||||
outdated_keys[key].push_back(row);
|
||||
if (find_result.outdated)
|
||||
++cache_expired;
|
||||
else
|
||||
++cache_not_found;
|
||||
}
|
||||
else
|
||||
{
|
||||
++cache_hit;
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
const auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
|
||||
|
||||
if (!cell.isDefault())
|
||||
map[key] = copyIntoArena(string_ref, temporary_keys_pool);
|
||||
|
||||
total_length += string_ref.size + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
|
||||
query_count.fetch_add(rows_num, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
|
||||
|
||||
/// request new values
|
||||
if (!outdated_keys.empty())
|
||||
{
|
||||
std::vector<size_t> required_rows(outdated_keys.size());
|
||||
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
|
||||
{
|
||||
return pair.getMapped().front();
|
||||
});
|
||||
|
||||
update(
|
||||
key_columns,
|
||||
keys_array,
|
||||
required_rows,
|
||||
[&](const StringRef key, const size_t cell_idx)
|
||||
{
|
||||
const StringRef attribute_value = attribute_array[cell_idx];
|
||||
|
||||
/// We must copy key and value to own memory, because it may be replaced with another
|
||||
/// in next iterations of inner loop of update.
|
||||
const StringRef copied_key = copyIntoArena(key, temporary_keys_pool);
|
||||
const StringRef copied_value = copyIntoArena(attribute_value, temporary_keys_pool);
|
||||
|
||||
map[copied_key] = copied_value;
|
||||
total_length += (attribute_value.size + 1) * outdated_keys[key].size();
|
||||
},
|
||||
[&](const StringRef key, const size_t)
|
||||
{
|
||||
for (const auto row : outdated_keys[key])
|
||||
total_length += get_default(row).size + 1;
|
||||
});
|
||||
}
|
||||
|
||||
out->getChars().reserve(total_length);
|
||||
|
||||
for (const auto row : ext::range(0, ext::size(keys_array)))
|
||||
{
|
||||
const StringRef key = keys_array[row];
|
||||
const auto it = map.find(key);
|
||||
const auto string_ref = it ? it->getMapped() : get_default(row);
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
}
|
||||
}
|
||||
void getItemsString(
|
||||
Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ColumnString * out,
|
||||
DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
|
||||
|
||||
template <typename PresentKeyHandler, typename AbsentKeyHandler>
|
||||
void update(
|
||||
@ -480,152 +194,7 @@ private:
|
||||
const PODArray<StringRef> & in_keys,
|
||||
const std::vector<size_t> & in_requested_rows,
|
||||
PresentKeyHandler && on_cell_updated,
|
||||
AbsentKeyHandler && on_key_not_found) const
|
||||
{
|
||||
MapType<bool> remaining_keys{in_requested_rows.size()};
|
||||
for (const auto row : in_requested_rows)
|
||||
remaining_keys.insert({in_keys[row], false});
|
||||
|
||||
std::uniform_int_distribution<UInt64> distribution(dict_lifetime.min_sec, dict_lifetime.max_sec);
|
||||
|
||||
const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
|
||||
{
|
||||
Stopwatch watch;
|
||||
auto stream = source_ptr->loadKeys(in_key_columns, in_requested_rows);
|
||||
stream->readPrefix();
|
||||
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
|
||||
const auto attributes_size = attributes.size();
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
/// cache column pointers
|
||||
const auto key_columns = ext::map<Columns>(
|
||||
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
|
||||
|
||||
const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
|
||||
{
|
||||
return block.safeGetByPosition(keys_size + attribute_idx).column;
|
||||
});
|
||||
|
||||
const auto rows_num = block.rows();
|
||||
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
auto key = allocKey(row, key_columns, keys);
|
||||
const auto hash = StringRefHash{}(key);
|
||||
const auto find_result = findCellIdx(key, now, hash);
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
auto & cell = cells[cell_idx];
|
||||
|
||||
for (const auto attribute_idx : ext::range(0, attributes.size()))
|
||||
{
|
||||
const auto & attribute_column = *attribute_columns[attribute_idx];
|
||||
auto & attribute = attributes[attribute_idx];
|
||||
|
||||
setAttributeValue(attribute, cell_idx, attribute_column[row]);
|
||||
}
|
||||
|
||||
/// if cell id is zero and zero does not map to this cell, then the cell is unused
|
||||
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
|
||||
element_count.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
/// handle memory allocated for old key
|
||||
if (key == cell.key)
|
||||
{
|
||||
freeKey(key);
|
||||
key = cell.key;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// new key is different from the old one
|
||||
if (cell.key.data)
|
||||
freeKey(cell.key);
|
||||
|
||||
cell.key = key;
|
||||
}
|
||||
|
||||
cell.hash = hash;
|
||||
|
||||
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
|
||||
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
|
||||
else
|
||||
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
|
||||
|
||||
/// inform caller
|
||||
on_cell_updated(key, cell_idx);
|
||||
/// mark corresponding id as found
|
||||
remaining_keys[key] = true;
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, in_requested_rows.size());
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed());
|
||||
}
|
||||
|
||||
size_t found_num = 0;
|
||||
size_t not_found_num = 0;
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
/// Check which ids have not been found and require setting null_value
|
||||
for (const auto & key_found_pair : remaining_keys)
|
||||
{
|
||||
if (key_found_pair.getMapped())
|
||||
{
|
||||
++found_num;
|
||||
continue;
|
||||
}
|
||||
|
||||
++not_found_num;
|
||||
|
||||
auto key = key_found_pair.getKey();
|
||||
const auto hash = StringRefHash{}(key);
|
||||
const auto find_result = findCellIdx(key, now, hash);
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
auto & cell = cells[cell_idx];
|
||||
|
||||
/// Set null_value for each attribute
|
||||
for (auto & attribute : attributes)
|
||||
setDefaultAttributeValue(attribute, cell_idx);
|
||||
|
||||
/// Check if cell had not been occupied before and increment element counter if it hadn't
|
||||
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
|
||||
element_count.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
if (key == cell.key)
|
||||
key = cell.key;
|
||||
else
|
||||
{
|
||||
if (cell.key.data)
|
||||
freeKey(cell.key);
|
||||
|
||||
/// copy key from temporary pool
|
||||
key = copyKey(key);
|
||||
cell.key = key;
|
||||
}
|
||||
|
||||
cell.hash = hash;
|
||||
|
||||
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
|
||||
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
|
||||
else
|
||||
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
|
||||
|
||||
cell.setDefault();
|
||||
|
||||
/// inform caller that the cell has not been found
|
||||
on_key_not_found(key, cell_idx);
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num);
|
||||
}
|
||||
AbsentKeyHandler && on_key_not_found) const;
|
||||
|
||||
UInt64 getCellIdx(const StringRef key) const;
|
||||
|
||||
|
@ -1,45 +0,0 @@
|
||||
#include "ComplexKeyCacheDictionary.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
ComplexKeyCacheDictionary::Attribute
|
||||
ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||
{
|
||||
Attribute attr{type, {}, {}};
|
||||
|
||||
switch (type)
|
||||
{
|
||||
#define DISPATCH(TYPE) \
|
||||
case AttributeUnderlyingType::ut##TYPE: \
|
||||
attr.null_values = TYPE(null_value.get<NearestFieldType<TYPE>>()); /* NOLINT */ \
|
||||
attr.arrays = std::make_unique<ContainerType<TYPE>>(size); /* NOLINT */ \
|
||||
bytes_allocated += size * sizeof(TYPE); \
|
||||
break;
|
||||
DISPATCH(UInt8)
|
||||
DISPATCH(UInt16)
|
||||
DISPATCH(UInt32)
|
||||
DISPATCH(UInt64)
|
||||
DISPATCH(UInt128)
|
||||
DISPATCH(Int8)
|
||||
DISPATCH(Int16)
|
||||
DISPATCH(Int32)
|
||||
DISPATCH(Int64)
|
||||
DISPATCH(Decimal32)
|
||||
DISPATCH(Decimal64)
|
||||
DISPATCH(Decimal128)
|
||||
DISPATCH(Float32)
|
||||
DISPATCH(Float64)
|
||||
#undef DISPATCH
|
||||
case AttributeUnderlyingType::utString:
|
||||
attr.null_values = null_value.get<String>();
|
||||
attr.arrays = std::make_unique<ContainerType<StringRef>>(size);
|
||||
bytes_allocated += size * sizeof(StringRef);
|
||||
if (!string_arena)
|
||||
string_arena = std::make_unique<ArenaWithFreeLists>();
|
||||
break;
|
||||
}
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
#include <Dictionaries/ComplexKeyCacheDictionary.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
#define DEFINE(TYPE) \
|
||||
void ComplexKeyCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
const auto null_value = std::get<TYPE>(attribute.null_values); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t) { return null_value; }); \
|
||||
}
|
||||
|
||||
DEFINE(UInt8)
|
||||
DEFINE(UInt16)
|
||||
DEFINE(UInt32)
|
||||
DEFINE(UInt64)
|
||||
DEFINE(UInt128)
|
||||
DEFINE(Int8)
|
||||
DEFINE(Int16)
|
||||
DEFINE(Int32)
|
||||
DEFINE(Int64)
|
||||
DEFINE(Float32)
|
||||
DEFINE(Float64)
|
||||
DEFINE(Decimal32)
|
||||
DEFINE(Decimal64)
|
||||
DEFINE(Decimal128)
|
||||
|
||||
#undef DEFINE
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
#include <Dictionaries/ComplexKeyCacheDictionary.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
#define DEFINE(TYPE) \
|
||||
void ComplexKeyCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
|
||||
DEFINE(UInt8)
|
||||
DEFINE(UInt16)
|
||||
DEFINE(UInt32)
|
||||
DEFINE(UInt64)
|
||||
DEFINE(UInt128)
|
||||
DEFINE(Int8)
|
||||
DEFINE(Int16)
|
||||
DEFINE(Int32)
|
||||
DEFINE(Int64)
|
||||
DEFINE(Float32)
|
||||
DEFINE(Float64)
|
||||
DEFINE(Decimal32)
|
||||
DEFINE(Decimal64)
|
||||
DEFINE(Decimal128)
|
||||
|
||||
#undef DEFINE
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
#include <Dictionaries/ComplexKeyCacheDictionary.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
#define DEFINE(TYPE) \
|
||||
void ComplexKeyCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t) { return def; }); \
|
||||
}
|
||||
|
||||
DEFINE(UInt8)
|
||||
DEFINE(UInt16)
|
||||
DEFINE(UInt32)
|
||||
DEFINE(UInt64)
|
||||
DEFINE(UInt128)
|
||||
DEFINE(Int8)
|
||||
DEFINE(Int16)
|
||||
DEFINE(Int32)
|
||||
DEFINE(Int64)
|
||||
DEFINE(Float32)
|
||||
DEFINE(Float64)
|
||||
DEFINE(Decimal32)
|
||||
DEFINE(Decimal64)
|
||||
DEFINE(Decimal128)
|
||||
|
||||
#undef DEFINE
|
||||
}
|
@ -1,78 +0,0 @@
|
||||
#include "ComplexKeyCacheDictionary.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const size_t idx, const Field & value) const
|
||||
{
|
||||
switch (attribute.type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = value.get<UInt64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = value.get<UInt64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = value.get<UInt64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = value.get<UInt64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = value.get<UInt128>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = value.get<Int64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = value.get<Int64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = value.get<Int64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = value.get<Int64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = value.get<Float64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = value.get<Float64>();
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = value.get<Decimal32>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = value.get<Decimal64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = value.get<Decimal128>();
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
const auto & string = value.get<String>();
|
||||
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
|
||||
const auto & null_value_ref = std::get<String>(attribute.null_values);
|
||||
|
||||
/// free memory unless it points to a null_value
|
||||
if (string_ref.data && string_ref.data != null_value_ref.data())
|
||||
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
|
||||
|
||||
const auto str_size = string.size();
|
||||
if (str_size != 0)
|
||||
{
|
||||
auto * str_ptr = string_arena->alloc(str_size);
|
||||
std::copy(string.data(), string.data() + str_size, str_ptr);
|
||||
string_ref = StringRef{str_ptr, str_size};
|
||||
}
|
||||
else
|
||||
string_ref = {};
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,71 +0,0 @@
|
||||
#include "ComplexKeyCacheDictionary.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute, const size_t idx) const
|
||||
{
|
||||
switch (attribute.type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = std::get<UInt8>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = std::get<UInt16>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = std::get<UInt32>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = std::get<UInt64>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = std::get<UInt128>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = std::get<Int8>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = std::get<Int16>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = std::get<Int32>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = std::get<Int64>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = std::get<Float32>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = std::get<Float64>(attribute.null_values);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = std::get<Decimal32>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = std::get<Decimal64>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = std::get<Decimal128>(attribute.null_values);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
const auto & null_value_ref = std::get<String>(attribute.null_values);
|
||||
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
|
||||
|
||||
if (string_ref.data != null_value_ref.data())
|
||||
{
|
||||
if (string_ref.data)
|
||||
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
|
||||
|
||||
string_ref = StringRef{null_value_ref};
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -3,6 +3,9 @@
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
#include <Core/Defines.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -31,194 +34,151 @@ ComplexKeyDirectDictionary::ComplexKeyDirectDictionary(
|
||||
createAttributes();
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void ComplexKeyDirectDictionary::get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
const auto null_value = std::get<TYPE>(attribute.null_values); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void ComplexKeyDirectDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
|
||||
ColumnPtr ComplexKeyDirectDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto & null_value = std::get<StringRef>(attribute.null_values);
|
||||
getItemsStringImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
|
||||
[&](const size_t) { return String(null_value.data, null_value.size); });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void ComplexKeyDirectDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void ComplexKeyDirectDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
ColumnPtr result;
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
getItemsStringImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
|
||||
[&](const size_t row) { const auto ref = def->getDataAt(row); return String(ref.data, ref.size); });
|
||||
}
|
||||
auto keys_size = key_columns.front()->size();
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void ComplexKeyDirectDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const TYPE def, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void ComplexKeyDirectDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const String & def, ColumnString * const out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
ComplexKeyDirectDictionary::getItemsStringImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
|
||||
[&](const size_t) { return def; });
|
||||
}
|
||||
|
||||
|
||||
void ComplexKeyDirectDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
ColumnUInt8::MutablePtr col_null_map_to;
|
||||
ColumnUInt8::Container * vec_null_map_to = nullptr;
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
has<UInt8>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
has<UInt16>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
has<UInt32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
has<UInt64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
has<UInt128>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
has<Int8>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
has<Int16>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
has<Int32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
has<Int64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
has<Float32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
has<Float64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
has<String>(attribute, key_columns, out);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
has<Decimal32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
has<Decimal64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
has<Decimal128>(attribute, key_columns, out);
|
||||
break;
|
||||
col_null_map_to = ColumnUInt8::create(keys_size, false);
|
||||
vec_null_map_to = &col_null_map_to->getData();
|
||||
}
|
||||
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
|
||||
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
|
||||
getItemsImpl<String, String>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const String value, bool is_null)
|
||||
{
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
const auto ref = StringRef{value};
|
||||
out->insertData(ref.data, ref.size);
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
getItemsImpl<AttributeType, AttributeType>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const auto value, bool is_null)
|
||||
{
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
out[row] = value;
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
result = ColumnNullable::create(result, std::move(col_null_map_to));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
ColumnUInt8::Ptr ComplexKeyDirectDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
auto size = key_columns.front()->size();
|
||||
auto result = ColumnUInt8::create(size);
|
||||
auto& out = result->getData();
|
||||
|
||||
const auto rows = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys_array(keys_size);
|
||||
MapType<UInt8> has_key;
|
||||
Arena temporary_keys_pool;
|
||||
std::vector<size_t> to_load(rows);
|
||||
PODArray<StringRef> keys(rows);
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
|
||||
keys[row] = key;
|
||||
has_key[key] = 0;
|
||||
to_load[row] = row;
|
||||
}
|
||||
|
||||
auto stream = source_ptr->loadKeys(key_columns, to_load);
|
||||
|
||||
stream->readPrefix();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
const auto columns = ext::map<Columns>(
|
||||
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
|
||||
|
||||
Arena pool;
|
||||
|
||||
StringRefs keys_temp(keys_size);
|
||||
|
||||
const auto columns_size = columns.front()->size();
|
||||
|
||||
for (const auto row_idx : ext::range(0, columns_size))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
|
||||
if (has_key.has(key))
|
||||
{
|
||||
has_key[key] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
out[row] = has_key[keys[row]];
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void ComplexKeyDirectDictionary::createAttributes()
|
||||
{
|
||||
@ -229,7 +189,7 @@ void ComplexKeyDirectDictionary::createAttributes()
|
||||
{
|
||||
attribute_index_by_name.emplace(attribute.name, attributes.size());
|
||||
attribute_name_by_index.emplace(attributes.size(), attribute.name);
|
||||
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value, attribute.name));
|
||||
attributes.push_back(createAttribute(attribute, attribute.null_value, attribute.name));
|
||||
|
||||
if (attribute.hierarchical)
|
||||
throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
|
||||
@ -237,7 +197,6 @@ void ComplexKeyDirectDictionary::createAttributes()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void ComplexKeyDirectDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
@ -254,59 +213,19 @@ void ComplexKeyDirectDictionary::createAttributeImpl<String>(Attribute & attribu
|
||||
}
|
||||
|
||||
|
||||
ComplexKeyDirectDictionary::Attribute ComplexKeyDirectDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & attr_name)
|
||||
ComplexKeyDirectDictionary::Attribute ComplexKeyDirectDictionary::createAttribute(
|
||||
const DictionaryAttribute & attribute, const Field & null_value, const std::string & attr_name)
|
||||
{
|
||||
Attribute attr{type, {}, {}, attr_name};
|
||||
Attribute attr{attribute.underlying_type, attribute.is_nullable, {}, {}, attr_name};
|
||||
|
||||
switch (type)
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
createAttributeImpl<UInt8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
createAttributeImpl<UInt16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
createAttributeImpl<UInt32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
createAttributeImpl<UInt64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
createAttributeImpl<UInt128>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
createAttributeImpl<Int8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
createAttributeImpl<Int16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
createAttributeImpl<Int32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
createAttributeImpl<Int64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
createAttributeImpl<Float32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
createAttributeImpl<Float64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
createAttributeImpl<String>(attr, null_value);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
createAttributeImpl<AttributeType>(attr, null_value);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
createAttributeImpl<Decimal32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
createAttributeImpl<Decimal64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
createAttributeImpl<Decimal128>(attr, null_value);
|
||||
break;
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
@ -356,14 +275,18 @@ StringRef ComplexKeyDirectDictionary::placeKeysInPool(
|
||||
}
|
||||
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void ComplexKeyDirectDictionary::getItemsImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto rows = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys_array(keys_size);
|
||||
MapType<OutputType> value_by_key;
|
||||
HashMapWithSavedHash<StringRef, bool, StringRefHash> value_is_null;
|
||||
Arena temporary_keys_pool;
|
||||
std::vector<size_t> to_load(rows);
|
||||
PODArray<StringRef> keys(rows);
|
||||
@ -372,8 +295,9 @@ void ComplexKeyDirectDictionary::getItemsImpl(
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
|
||||
keys[row] = key;
|
||||
value_by_key[key] = get_default(row);
|
||||
value_by_key[key] = static_cast<AttributeType>(default_value_extractor[row]);
|
||||
to_load[row] = row;
|
||||
value_is_null[key] = false;
|
||||
}
|
||||
|
||||
auto stream = source_ptr->loadKeys(key_columns, to_load);
|
||||
@ -392,6 +316,11 @@ void ComplexKeyDirectDictionary::getItemsImpl(
|
||||
});
|
||||
for (const size_t attribute_idx : ext::range(0, attributes.size()))
|
||||
{
|
||||
if (attribute.name != attribute_name_by_index.at(attribute_idx))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
const IColumn & attribute_column = *attribute_columns[attribute_idx];
|
||||
Arena pool;
|
||||
|
||||
@ -402,17 +331,15 @@ void ComplexKeyDirectDictionary::getItemsImpl(
|
||||
for (const auto row_idx : ext::range(0, columns_size))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
|
||||
if (value_by_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
|
||||
|
||||
if (value_by_key.has(key))
|
||||
{
|
||||
if (attribute.type == AttributeUnderlyingType::utFloat32)
|
||||
{
|
||||
value_by_key[key] = static_cast<Float32>(attribute_column[row_idx].template get<Float64>());
|
||||
}
|
||||
auto value = attribute_column[row_idx];
|
||||
|
||||
if (value.isNull())
|
||||
value_is_null[key] = true;
|
||||
else
|
||||
{
|
||||
value_by_key[key] = static_cast<OutputType>(attribute_column[row_idx].template get<AttributeType>());
|
||||
}
|
||||
|
||||
value_by_key[key] = static_cast<OutputType>(value.template get<NearestFieldType<AttributeType>>());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -422,78 +349,13 @@ void ComplexKeyDirectDictionary::getItemsImpl(
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
set_value(row, value_by_key[keys[row]]);
|
||||
auto key = keys[row];
|
||||
set_value(row, value_by_key[key], value_is_null[key]);
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void ComplexKeyDirectDictionary::getItemsStringImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
{
|
||||
const auto rows = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys_array(keys_size);
|
||||
MapType<String> value_by_key;
|
||||
Arena temporary_keys_pool;
|
||||
std::vector<size_t> to_load(rows);
|
||||
PODArray<StringRef> keys(rows);
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
|
||||
keys[row] = key;
|
||||
value_by_key[key] = get_default(row);
|
||||
to_load[row] = row;
|
||||
}
|
||||
|
||||
auto stream = source_ptr->loadKeys(key_columns, to_load);
|
||||
const auto attributes_size = attributes.size();
|
||||
|
||||
stream->readPrefix();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
const auto columns = ext::map<Columns>(
|
||||
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
|
||||
|
||||
const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
|
||||
{
|
||||
return block.safeGetByPosition(keys_size + attribute_idx).column;
|
||||
});
|
||||
for (const size_t attribute_idx : ext::range(0, attributes.size()))
|
||||
{
|
||||
const IColumn & attribute_column = *attribute_columns[attribute_idx];
|
||||
Arena pool;
|
||||
|
||||
StringRefs keys_temp(keys_size);
|
||||
|
||||
const auto columns_size = columns.front()->size();
|
||||
|
||||
for (const auto row_idx : ext::range(0, columns_size))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
|
||||
if (value_by_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
|
||||
{
|
||||
const String from_source = attribute_column[row_idx].template get<String>();
|
||||
value_by_key[key] = from_source;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
set_value(row, value_by_key[keys[row]]);
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
|
||||
const ComplexKeyDirectDictionary::Attribute & ComplexKeyDirectDictionary::getAttribute(const std::string & attribute_name) const
|
||||
{
|
||||
const auto it = attribute_index_by_name.find(attribute_name);
|
||||
@ -503,65 +365,6 @@ const ComplexKeyDirectDictionary::Attribute & ComplexKeyDirectDictionary::getAtt
|
||||
return attributes[it->second];
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void ComplexKeyDirectDictionary::has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
const auto rows = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys_array(keys_size);
|
||||
MapType<UInt8> has_key;
|
||||
Arena temporary_keys_pool;
|
||||
std::vector<size_t> to_load(rows);
|
||||
PODArray<StringRef> keys(rows);
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
|
||||
keys[row] = key;
|
||||
has_key[key] = 0;
|
||||
to_load[row] = row;
|
||||
}
|
||||
|
||||
auto stream = source_ptr->loadKeys(key_columns, to_load);
|
||||
|
||||
stream->readPrefix();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
const auto columns = ext::map<Columns>(
|
||||
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
|
||||
|
||||
for (const size_t attribute_idx : ext::range(0, attributes.size()))
|
||||
{
|
||||
Arena pool;
|
||||
|
||||
StringRefs keys_temp(keys_size);
|
||||
|
||||
const auto columns_size = columns.front()->size();
|
||||
|
||||
for (const auto row_idx : ext::range(0, columns_size))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
|
||||
if (has_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
|
||||
{
|
||||
has_key[key] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
out[row] = has_key[keys[row]];
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
|
||||
BlockInputStreamPtr ComplexKeyDirectDictionary::getBlockInputStream(const Names & /* column_names */, size_t /* max_block_size */) const
|
||||
{
|
||||
return source_ptr->loadAll();
|
||||
|
@ -12,14 +12,13 @@
|
||||
#include <ext/range.h>
|
||||
#include <ext/size.h>
|
||||
#include <ext/map.h>
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
|
||||
#include "DictionaryStructure.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using BlockPtr = std::shared_ptr<Block>;
|
||||
|
||||
class ComplexKeyDirectDictionary final : public IDictionaryBase
|
||||
{
|
||||
@ -60,78 +59,16 @@ public:
|
||||
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const TYPE def, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const String & def, ColumnString * const out) const;
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
@ -142,6 +79,8 @@ private:
|
||||
struct Attribute final
|
||||
{
|
||||
AttributeUnderlyingType type;
|
||||
bool is_nullable;
|
||||
|
||||
std::variant<
|
||||
UInt8,
|
||||
UInt16,
|
||||
@ -168,27 +107,21 @@ private:
|
||||
template <typename T>
|
||||
void addAttributeSize(const Attribute & attribute);
|
||||
|
||||
void calculateBytesAllocated();
|
||||
|
||||
template <typename T>
|
||||
void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & name);
|
||||
static Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value, const std::string & name);
|
||||
|
||||
template <typename Pool>
|
||||
StringRef placeKeysInPool(
|
||||
const size_t row, const Columns & key_columns, StringRefs & keys, const std::vector<DictionaryAttribute> & key_attributes, Pool & pool) const;
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void getItemsStringImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
|
||||
template <typename T>
|
||||
void resize(Attribute & attribute, const Key id);
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename T>
|
||||
void setAttributeValueImpl(Attribute & attribute, const Key id, const T & value);
|
||||
@ -197,9 +130,6 @@ private:
|
||||
|
||||
const Attribute & getAttribute(const std::string & attribute_name) const;
|
||||
|
||||
template <typename T>
|
||||
void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
const DictionaryStructure dict_struct;
|
||||
const DictionarySourcePtr source_ptr;
|
||||
const DictionaryLifetime dict_lifetime;
|
||||
|
@ -1,6 +1,10 @@
|
||||
#include "ComplexKeyHashedDictionary.h"
|
||||
#include <ext/map.h>
|
||||
#include <ext/range.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
|
||||
@ -32,216 +36,111 @@ ComplexKeyHashedDictionary::ComplexKeyHashedDictionary(
|
||||
calculateBytesAllocated();
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void ComplexKeyHashedDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
\
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
const auto null_value = std::get<TYPE>(attribute.null_values); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, \
|
||||
key_columns, \
|
||||
[&](const size_t row, const auto value) { out[row] = value; }, \
|
||||
[&](const size_t) { return null_value; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void ComplexKeyHashedDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return null_value; });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void ComplexKeyHashedDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
\
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, \
|
||||
key_columns, \
|
||||
[&](const size_t row, const auto value) { out[row] = value; }, \
|
||||
[&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void ComplexKeyHashedDictionary::getString(
|
||||
ColumnPtr ComplexKeyHashedDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
ColumnPtr result;
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t row) { return def->getDataAt(row); });
|
||||
}
|
||||
auto keys_size = key_columns.front()->size();
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void ComplexKeyHashedDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
\
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
|
||||
ColumnUInt8::MutablePtr col_null_map_to;
|
||||
ColumnUInt8::Container * vec_null_map_to = nullptr;
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
col_null_map_to = ColumnUInt8::create(keys_size, false);
|
||||
vec_null_map_to = &col_null_map_to->getData();
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void ComplexKeyHashedDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
|
||||
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return StringRef{def}; });
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const StringRef value, bool is_null)
|
||||
{
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
out->insertData(value.data, value.size);
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
getItemsImpl<AttributeType, AttributeType>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const auto value, bool is_null)
|
||||
{
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
out[row] = value;
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
result = ColumnNullable::create(result, std::move(col_null_map_to));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void ComplexKeyHashedDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
|
||||
ColumnUInt8::Ptr ComplexKeyHashedDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
auto size = key_columns.front()->size();
|
||||
auto result = ColumnUInt8::create(size);
|
||||
auto& out = result->getData();
|
||||
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
has<UInt8>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
has<UInt16>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
has<UInt32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
has<UInt64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
has<UInt128>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
has<Int8>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
has<Int16>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
has<Int32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
has<Int64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
has<Float32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
has<Float64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
has<StringRef>(attribute, key_columns, out);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
has<Decimal32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
has<Decimal64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
has<Decimal128>(attribute, key_columns, out);
|
||||
break;
|
||||
}
|
||||
has<ValueType>(attribute, key_columns, out);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void ComplexKeyHashedDictionary::createAttributes()
|
||||
@ -252,7 +151,7 @@ void ComplexKeyHashedDictionary::createAttributes()
|
||||
for (const auto & attribute : dict_struct.attributes)
|
||||
{
|
||||
attribute_index_by_name.emplace(attribute.name, attributes.size());
|
||||
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
|
||||
attributes.push_back(createAttribute(attribute, attribute.null_value));
|
||||
|
||||
if (attribute.hierarchical)
|
||||
throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
|
||||
@ -407,66 +306,30 @@ void ComplexKeyHashedDictionary::addAttributeSize(const Attribute & attribute)
|
||||
bucket_count = map_ref.getBufferSizeInCells();
|
||||
}
|
||||
|
||||
template <>
|
||||
void ComplexKeyHashedDictionary::addAttributeSize<String>(const Attribute & attribute)
|
||||
{
|
||||
const auto & map_ref = std::get<ContainerType<StringRef>>(attribute.maps);
|
||||
bytes_allocated += sizeof(ContainerType<StringRef>) + map_ref.getBufferSizeInBytes();
|
||||
bucket_count = map_ref.getBufferSizeInCells();
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
}
|
||||
|
||||
void ComplexKeyHashedDictionary::calculateBytesAllocated()
|
||||
{
|
||||
bytes_allocated += attributes.size() * sizeof(attributes.front());
|
||||
|
||||
for (const auto & attribute : attributes)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
addAttributeSize<UInt8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
addAttributeSize<UInt16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
addAttributeSize<UInt32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
addAttributeSize<UInt64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
addAttributeSize<UInt128>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
addAttributeSize<Int8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
addAttributeSize<Int16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
addAttributeSize<Int32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
addAttributeSize<Int64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
addAttributeSize<Float32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
addAttributeSize<Float64>(attribute);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
addAttributeSize<Decimal32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
addAttributeSize<Decimal64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
addAttributeSize<Decimal128>(attribute);
|
||||
break;
|
||||
addAttributeSize<AttributeType>(attribute);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
bytes_allocated += keys_pool.size();
|
||||
@ -479,73 +342,41 @@ void ComplexKeyHashedDictionary::createAttributeImpl(Attribute & attribute, cons
|
||||
attribute.maps.emplace<ContainerType<T>>();
|
||||
}
|
||||
|
||||
ComplexKeyHashedDictionary::Attribute
|
||||
ComplexKeyHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||
template <>
|
||||
void ComplexKeyHashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
Attribute attr{type, {}, {}, {}};
|
||||
attribute.string_arena = std::make_unique<Arena>();
|
||||
const String & string = null_value.get<String>();
|
||||
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
|
||||
attribute.maps.emplace<ContainerType<StringRef>>();
|
||||
}
|
||||
|
||||
switch (type)
|
||||
ComplexKeyHashedDictionary::Attribute
|
||||
ComplexKeyHashedDictionary::createAttribute(const DictionaryAttribute & attribute, const Field & null_value)
|
||||
{
|
||||
auto nullable_set = attribute.is_nullable ? std::make_unique<NullableSet>() : nullptr;
|
||||
Attribute attr{attribute.underlying_type, attribute.is_nullable, std::move(nullable_set), {}, {}, {}};
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
createAttributeImpl<UInt8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
createAttributeImpl<UInt16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
createAttributeImpl<UInt32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
createAttributeImpl<UInt64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
createAttributeImpl<UInt128>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
createAttributeImpl<Int8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
createAttributeImpl<Int16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
createAttributeImpl<Int32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
createAttributeImpl<Int64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
createAttributeImpl<Float32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
createAttributeImpl<Float64>(attr, null_value);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
createAttributeImpl<AttributeType>(attr, null_value);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
createAttributeImpl<Decimal32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
createAttributeImpl<Decimal64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
createAttributeImpl<Decimal128>(attr, null_value);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
attr.null_values = null_value.get<String>();
|
||||
attr.maps.emplace<ContainerType<StringRef>>();
|
||||
attr.string_arena = std::make_unique<Arena>();
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void ComplexKeyHashedDictionary::getItemsImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto & attr = std::get<ContainerType<AttributeType>>(attribute.maps);
|
||||
|
||||
@ -560,7 +391,18 @@ void ComplexKeyHashedDictionary::getItemsImpl(
|
||||
const auto key = placeKeysInPool(i, key_columns, keys, temporary_keys_pool);
|
||||
|
||||
const auto it = attr.find(key);
|
||||
set_value(i, it ? static_cast<OutputType>(it->getMapped()) : get_default(i));
|
||||
|
||||
if (it)
|
||||
{
|
||||
set_value(i, static_cast<OutputType>(it->getMapped()), false);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (attribute.is_nullable && attribute.nullable_set->find(key) != nullptr)
|
||||
set_value(i, default_value_extractor[i], true);
|
||||
else
|
||||
set_value(i, default_value_extractor[i], false);
|
||||
}
|
||||
|
||||
/// free memory allocated for the key
|
||||
temporary_keys_pool.rollback(key.size);
|
||||
@ -578,51 +420,42 @@ bool ComplexKeyHashedDictionary::setAttributeValueImpl(Attribute & attribute, co
|
||||
return pair.second;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool ComplexKeyHashedDictionary::setAttributeValueImpl<String>(Attribute & attribute, const StringRef key, const String value)
|
||||
{
|
||||
const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
|
||||
return setAttributeValueImpl<StringRef>(attribute, key, StringRef{string_in_arena, value.size()});
|
||||
}
|
||||
|
||||
bool ComplexKeyHashedDictionary::setAttributeValue(Attribute & attribute, const StringRef key, const Field & value)
|
||||
{
|
||||
switch (attribute.type)
|
||||
bool result = false;
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
return setAttributeValueImpl<UInt8>(attribute, key, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
return setAttributeValueImpl<UInt16>(attribute, key, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
return setAttributeValueImpl<UInt32>(attribute, key, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
return setAttributeValueImpl<UInt64>(attribute, key, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
return setAttributeValueImpl<UInt128>(attribute, key, value.get<UInt128>());
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
return setAttributeValueImpl<Int8>(attribute, key, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
return setAttributeValueImpl<Int16>(attribute, key, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
return setAttributeValueImpl<Int32>(attribute, key, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
return setAttributeValueImpl<Int64>(attribute, key, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
return setAttributeValueImpl<Float32>(attribute, key, value.get<Float64>());
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
return setAttributeValueImpl<Float64>(attribute, key, value.get<Float64>());
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
return setAttributeValueImpl<Decimal32>(attribute, key, value.get<Decimal32>());
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
return setAttributeValueImpl<Decimal64>(attribute, key, value.get<Decimal64>());
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
return setAttributeValueImpl<Decimal128>(attribute, key, value.get<Decimal128>());
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
auto & map = std::get<ContainerType<StringRef>>(attribute.maps);
|
||||
const auto & string = value.get<String>();
|
||||
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
const auto pair = map.insert({key, StringRef{string_in_arena, string.size()}});
|
||||
return pair.second;
|
||||
if (value.isNull())
|
||||
{
|
||||
attribute.nullable_set->insert(key);
|
||||
result = true;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
attribute.nullable_set->erase(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
result = setAttributeValueImpl<AttributeType>(attribute, key, value.get<NearestFieldType<AttributeType>>());
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
const ComplexKeyHashedDictionary::Attribute & ComplexKeyHashedDictionary::getAttribute(const std::string & attribute_name) const
|
||||
@ -673,6 +506,9 @@ void ComplexKeyHashedDictionary::has(const Attribute & attribute, const Columns
|
||||
const auto it = attr.find(key);
|
||||
out[i] = static_cast<bool>(it);
|
||||
|
||||
if (attribute.is_nullable && !out[i])
|
||||
out[i] = attribute.nullable_set->find(key) != nullptr;
|
||||
|
||||
/// free memory allocated for the key
|
||||
temporary_keys_pool.rollback(key.size);
|
||||
}
|
||||
@ -684,41 +520,26 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys() const
|
||||
{
|
||||
const Attribute & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
return getKeys<UInt8>(attribute);
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
return getKeys<UInt16>(attribute);
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
return getKeys<UInt32>(attribute);
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
return getKeys<UInt64>(attribute);
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
return getKeys<UInt128>(attribute);
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
return getKeys<Int8>(attribute);
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
return getKeys<Int16>(attribute);
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
return getKeys<Int32>(attribute);
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
return getKeys<Int64>(attribute);
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
return getKeys<Float32>(attribute);
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
return getKeys<Float64>(attribute);
|
||||
case AttributeUnderlyingType::utString:
|
||||
return getKeys<StringRef>(attribute);
|
||||
std::vector<StringRef> result;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
return getKeys<Decimal32>(attribute);
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
return getKeys<Decimal64>(attribute);
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
return getKeys<Decimal128>(attribute);
|
||||
}
|
||||
return {};
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
result = getKeys<StringRef>(attribute);
|
||||
}
|
||||
else
|
||||
{
|
||||
result = getKeys<AttributeType>(attribute);
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -730,12 +551,18 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys(const Attribute & att
|
||||
for (const auto & key : attr)
|
||||
keys.push_back(key.getKey());
|
||||
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
for (const auto & key: *attribute.nullable_set)
|
||||
keys.push_back(key.getKey());
|
||||
}
|
||||
|
||||
return keys;
|
||||
}
|
||||
|
||||
BlockInputStreamPtr ComplexKeyHashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyHashedDictionary, UInt64>;
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getKeys(), column_names);
|
||||
}
|
||||
|
||||
|
@ -7,17 +7,17 @@
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Core/Block.h>
|
||||
#include <common/StringRef.h>
|
||||
#include <ext/range.h>
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
|
||||
#include "DictionaryStructure.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using BlockPtr = std::shared_ptr<Block>;
|
||||
|
||||
class ComplexKeyHashedDictionary final : public IDictionaryBase
|
||||
{
|
||||
@ -60,91 +60,16 @@ public:
|
||||
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const;
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const;
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
@ -152,9 +77,14 @@ private:
|
||||
template <typename Value>
|
||||
using ContainerType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
|
||||
|
||||
using NullableSet = HashSetWithSavedHash<StringRef, StringRefHash>;
|
||||
|
||||
struct Attribute final
|
||||
{
|
||||
AttributeUnderlyingType type;
|
||||
bool is_nullable;
|
||||
std::unique_ptr<NullableSet> nullable_set;
|
||||
|
||||
std::variant<
|
||||
UInt8,
|
||||
UInt16,
|
||||
@ -170,7 +100,7 @@ private:
|
||||
Decimal128,
|
||||
Float32,
|
||||
Float64,
|
||||
String>
|
||||
StringRef>
|
||||
null_values;
|
||||
std::variant<
|
||||
ContainerType<UInt8>,
|
||||
@ -206,18 +136,21 @@ private:
|
||||
void calculateBytesAllocated();
|
||||
|
||||
template <typename T>
|
||||
void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
static Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value);
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void
|
||||
getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsImpl(
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename T>
|
||||
bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
|
||||
static bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
|
||||
|
||||
bool setAttributeValue(Attribute & attribute, const StringRef key, const Field & value);
|
||||
static bool setAttributeValue(Attribute & attribute, const StringRef key, const Field & value);
|
||||
|
||||
const Attribute & getAttribute(const std::string & attribute_name) const;
|
||||
|
||||
|
@ -25,12 +25,10 @@ namespace ErrorCodes
|
||||
/* BlockInputStream implementation for external dictionaries
|
||||
* read() returns blocks consisting of the in-memory contents of the dictionaries
|
||||
*/
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Key>
|
||||
class DictionaryBlockInputStream : public DictionaryBlockInputStreamBase
|
||||
{
|
||||
public:
|
||||
using DictionaryPtr = std::shared_ptr<DictionaryType const>;
|
||||
|
||||
DictionaryBlockInputStream(
|
||||
std::shared_ptr<const IDictionaryBase> dictionary, UInt64 max_block_size, PaddedPODArray<Key> && ids, const Names & column_names);
|
||||
|
||||
@ -60,111 +58,9 @@ protected:
|
||||
Block getBlock(size_t start, size_t size) const override;
|
||||
|
||||
private:
|
||||
// pointer types to getXXX functions
|
||||
// for single key dictionaries
|
||||
template <typename Type>
|
||||
using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, PaddedPODArray<Type> &) const;
|
||||
|
||||
template <typename Type>
|
||||
using DictionaryDecimalGetter
|
||||
= void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, DecimalPaddedPODArray<Type> &) const;
|
||||
|
||||
using DictionaryStringGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, ColumnString *) const;
|
||||
|
||||
// for complex complex key dictionaries
|
||||
template <typename Type>
|
||||
using GetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, PaddedPODArray<Type> & out) const;
|
||||
|
||||
template <typename Type>
|
||||
using DecimalGetterByKey
|
||||
= void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, DecimalPaddedPODArray<Type> & out) const;
|
||||
|
||||
using StringGetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, ColumnString * out) const;
|
||||
|
||||
// call getXXX
|
||||
// for single key dictionaries
|
||||
template <typename Type, typename Container>
|
||||
void callGetter(
|
||||
DictionaryGetter<Type> getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
|
||||
template <typename Type, typename Container>
|
||||
void callGetter(
|
||||
DictionaryDecimalGetter<Type> getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
|
||||
template <typename Container>
|
||||
void callGetter(
|
||||
DictionaryStringGetter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
|
||||
// for complex complex key dictionaries
|
||||
template <typename Type, typename Container>
|
||||
void callGetter(
|
||||
GetterByKey<Type> getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
|
||||
template <typename Type, typename Container>
|
||||
void callGetter(
|
||||
DecimalGetterByKey<Type> getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
|
||||
template <typename Container>
|
||||
void callGetter(
|
||||
StringGetterByKey getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
|
||||
template <template <typename> class Getter, template <typename> class DecimalGetter, typename StringGetter>
|
||||
Block
|
||||
fillBlock(const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const;
|
||||
|
||||
|
||||
template <typename AttributeType, typename Getter>
|
||||
ColumnPtr getColumnFromAttribute(
|
||||
Getter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
template <typename Getter>
|
||||
ColumnPtr getColumnFromStringAttribute(
|
||||
Getter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
ColumnPtr getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const;
|
||||
|
||||
void fillKeyColumns(
|
||||
@ -174,65 +70,54 @@ private:
|
||||
const DictionaryStructure & dictionary_structure,
|
||||
ColumnsWithTypeAndName & columns) const;
|
||||
|
||||
DictionaryPtr dictionary;
|
||||
std::shared_ptr<const IDictionaryBase> dictionary;
|
||||
Names column_names;
|
||||
PaddedPODArray<Key> ids;
|
||||
ColumnsWithTypeAndName key_columns;
|
||||
Poco::Logger * logger;
|
||||
|
||||
using FillBlockFunction = Block (DictionaryBlockInputStream<DictionaryType, Key>::*)(
|
||||
const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const;
|
||||
|
||||
FillBlockFunction fill_block_function;
|
||||
|
||||
Columns data_columns;
|
||||
GetColumnsFunction get_key_columns_function;
|
||||
GetColumnsFunction get_view_columns_function;
|
||||
|
||||
enum class DictionaryKeyType
|
||||
enum class DictionaryInputStreamKeyType
|
||||
{
|
||||
Id,
|
||||
ComplexKey,
|
||||
Callback
|
||||
};
|
||||
|
||||
DictionaryKeyType key_type;
|
||||
DictionaryInputStreamKeyType key_type;
|
||||
};
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
|
||||
template <typename Key>
|
||||
DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
|
||||
std::shared_ptr<const IDictionaryBase> dictionary_, UInt64 max_block_size_, PaddedPODArray<Key> && ids_, const Names & column_names_)
|
||||
: DictionaryBlockInputStreamBase(ids_.size(), max_block_size_)
|
||||
, dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
|
||||
, dictionary(dictionary_)
|
||||
, column_names(column_names_)
|
||||
, ids(std::move(ids_))
|
||||
, logger(&Poco::Logger::get("DictionaryBlockInputStream"))
|
||||
, fill_block_function(
|
||||
&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<DictionaryGetter, DictionaryDecimalGetter, DictionaryStringGetter>)
|
||||
, key_type(DictionaryKeyType::Id)
|
||||
, key_type(DictionaryInputStreamKeyType::Id)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
|
||||
template <typename Key>
|
||||
DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
|
||||
std::shared_ptr<const IDictionaryBase> dictionary_,
|
||||
UInt64 max_block_size_,
|
||||
const std::vector<StringRef> & keys,
|
||||
const Names & column_names_)
|
||||
: DictionaryBlockInputStreamBase(keys.size(), max_block_size_)
|
||||
, dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
|
||||
, dictionary(dictionary_)
|
||||
, column_names(column_names_)
|
||||
, logger(&Poco::Logger::get("DictionaryBlockInputStream"))
|
||||
, fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>)
|
||||
, key_type(DictionaryKeyType::ComplexKey)
|
||||
, key_type(DictionaryInputStreamKeyType::ComplexKey)
|
||||
{
|
||||
const DictionaryStructure & dictionaty_structure = dictionary->getStructure();
|
||||
fillKeyColumns(keys, 0, keys.size(), dictionaty_structure, key_columns);
|
||||
const DictionaryStructure & dictionary_structure = dictionary->getStructure();
|
||||
fillKeyColumns(keys, 0, keys.size(), dictionary_structure, key_columns);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
|
||||
template <typename Key>
|
||||
DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
|
||||
std::shared_ptr<const IDictionaryBase> dictionary_,
|
||||
UInt64 max_block_size_,
|
||||
const Columns & data_columns_,
|
||||
@ -240,24 +125,23 @@ DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
|
||||
GetColumnsFunction && get_key_columns_function_,
|
||||
GetColumnsFunction && get_view_columns_function_)
|
||||
: DictionaryBlockInputStreamBase(data_columns_.front()->size(), max_block_size_)
|
||||
, dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
|
||||
, dictionary(dictionary_)
|
||||
, column_names(column_names_)
|
||||
, logger(&Poco::Logger::get("DictionaryBlockInputStream"))
|
||||
, fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>)
|
||||
, data_columns(data_columns_)
|
||||
, get_key_columns_function(get_key_columns_function_)
|
||||
, get_view_columns_function(get_view_columns_function_)
|
||||
, key_type(DictionaryKeyType::Callback)
|
||||
, get_key_columns_function(std::move(get_key_columns_function_))
|
||||
, get_view_columns_function(std::move(get_view_columns_function_))
|
||||
, key_type(DictionaryInputStreamKeyType::Callback)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, size_t length) const
|
||||
template <typename Key>
|
||||
Block DictionaryBlockInputStream<Key>::getBlock(size_t start, size_t length) const
|
||||
{
|
||||
/// TODO: Rewrite
|
||||
switch (key_type)
|
||||
{
|
||||
case DictionaryKeyType::ComplexKey:
|
||||
case DictionaryInputStreamKeyType::ComplexKey:
|
||||
{
|
||||
Columns columns;
|
||||
ColumnsWithTypeAndName view_columns;
|
||||
@ -268,16 +152,16 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, si
|
||||
columns.emplace_back(column);
|
||||
view_columns.emplace_back(column, key_column.type, key_column.name);
|
||||
}
|
||||
return (this->*fill_block_function)({}, columns, {}, std::move(view_columns));
|
||||
return fillBlock({}, columns, {}, std::move(view_columns));
|
||||
}
|
||||
|
||||
case DictionaryKeyType::Id:
|
||||
case DictionaryInputStreamKeyType::Id:
|
||||
{
|
||||
PaddedPODArray<Key> ids_to_fill(ids.begin() + start, ids.begin() + start + length);
|
||||
return (this->*fill_block_function)(ids_to_fill, {}, {}, {});
|
||||
return fillBlock(ids_to_fill, {}, {}, {});
|
||||
}
|
||||
|
||||
case DictionaryKeyType::Callback:
|
||||
case DictionaryInputStreamKeyType::Callback:
|
||||
{
|
||||
Columns columns;
|
||||
columns.reserve(data_columns.size());
|
||||
@ -294,102 +178,15 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, si
|
||||
columns.push_back(key_column.column);
|
||||
types.push_back(key_column.type);
|
||||
}
|
||||
return (this->*fill_block_function)({}, columns, types, std::move(view_with_type_and_name));
|
||||
return fillBlock({}, columns, types, std::move(view_with_type_and_name));
|
||||
}
|
||||
}
|
||||
|
||||
throw Exception("Unexpected DictionaryKeyType.", ErrorCodes::LOGICAL_ERROR);
|
||||
throw Exception("Unexpected DictionaryInputStreamKeyType.", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Type, typename Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
DictionaryGetter<Type> getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & /*keys*/,
|
||||
const DataTypes & /*data_types*/,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
(dict.*getter)(attribute.name, ids_to_fill, container);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Type, typename Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
DictionaryDecimalGetter<Type> getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & /*keys*/,
|
||||
const DataTypes & /*data_types*/,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
(dict.*getter)(attribute.name, ids_to_fill, container);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
DictionaryStringGetter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & /*keys*/,
|
||||
const DataTypes & /*data_types*/,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
(dict.*getter)(attribute.name, ids_to_fill, container);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Type, typename Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
GetterByKey<Type> getter,
|
||||
const PaddedPODArray<Key> & /*ids_to_fill*/,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
(dict.*getter)(attribute.name, keys, data_types, container);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Type, typename Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
DecimalGetterByKey<Type> getter,
|
||||
const PaddedPODArray<Key> & /*ids_to_fill*/,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
(dict.*getter)(attribute.name, keys, data_types, container);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
StringGetterByKey getter,
|
||||
const PaddedPODArray<Key> & /*ids_to_fill*/,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
(dict.*getter)(attribute.name, keys, data_types, container);
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <template <typename> class Getter, template <typename> class DecimalGetter, typename StringGetter>
|
||||
Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||
template <typename Key>
|
||||
Block DictionaryBlockInputStream<Key>::fillBlock(
|
||||
const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const
|
||||
{
|
||||
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
|
||||
@ -408,9 +205,14 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||
block_columns.push_back(column);
|
||||
|
||||
const DictionaryStructure & structure = dictionary->getStructure();
|
||||
ColumnPtr ids_column = getColumnFromIds(ids_to_fill);
|
||||
|
||||
if (structure.id && names.find(structure.id->name) != names.end())
|
||||
block_columns.emplace_back(getColumnFromIds(ids_to_fill), std::make_shared<DataTypeUInt64>(), structure.id->name);
|
||||
{
|
||||
block_columns.emplace_back(ids_column, std::make_shared<DataTypeUInt64>(), structure.id->name);
|
||||
}
|
||||
|
||||
auto dictionary_key_type = dictionary->getKeyType();
|
||||
|
||||
for (const auto idx : ext::range(0, structure.attributes.size()))
|
||||
{
|
||||
@ -418,126 +220,35 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||
if (names.find(attribute.name) != names.end())
|
||||
{
|
||||
ColumnPtr column;
|
||||
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
|
||||
column = getColumnFromAttribute<TYPE, Getter<TYPE>>(&DictionaryType::get##TYPE, ids_to_fill, keys, data_types, attribute, *dictionary)
|
||||
switch (attribute.underlying_type)
|
||||
|
||||
if (dictionary_key_type == DictionaryKeyType::simple)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt128);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int8);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int16);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Float32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Float64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
{
|
||||
column = getColumnFromAttribute<Decimal32, DecimalGetter<Decimal32>>(
|
||||
&DictionaryType::getDecimal32, ids_to_fill, keys, data_types, attribute, *dictionary);
|
||||
break;
|
||||
}
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
{
|
||||
column = getColumnFromAttribute<Decimal64, DecimalGetter<Decimal64>>(
|
||||
&DictionaryType::getDecimal64, ids_to_fill, keys, data_types, attribute, *dictionary);
|
||||
break;
|
||||
}
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
{
|
||||
column = getColumnFromAttribute<Decimal128, DecimalGetter<Decimal128>>(
|
||||
&DictionaryType::getDecimal128, ids_to_fill, keys, data_types, attribute, *dictionary);
|
||||
break;
|
||||
}
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
column = getColumnFromStringAttribute<StringGetter>(
|
||||
&DictionaryType::getString, ids_to_fill, keys, data_types, attribute, *dictionary);
|
||||
break;
|
||||
}
|
||||
column = dictionary->getColumn(
|
||||
attribute.name,
|
||||
attribute.type,
|
||||
{ids_column},
|
||||
{std::make_shared<DataTypeUInt64>()},
|
||||
nullptr /* default_values_column */);
|
||||
}
|
||||
#undef GET_COLUMN_FORM_ATTRIBUTE
|
||||
else
|
||||
{
|
||||
column = dictionary->getColumn(
|
||||
attribute.name,
|
||||
attribute.type,
|
||||
keys,
|
||||
data_types,
|
||||
nullptr /* default_values_column*/);
|
||||
}
|
||||
|
||||
block_columns.emplace_back(column, attribute.type, attribute.name);
|
||||
}
|
||||
}
|
||||
|
||||
return Block(block_columns);
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename AttributeType, typename Getter>
|
||||
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribute(
|
||||
Getter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
if constexpr (IsDecimalNumber<AttributeType>)
|
||||
{
|
||||
auto size = ids_to_fill.size();
|
||||
if (!keys.empty())
|
||||
size = keys.front()->size();
|
||||
auto column = ColumnDecimal<AttributeType>::create(size, 0); /// NOTE: There's wrong scale here, but it's unused.
|
||||
callGetter(getter, ids_to_fill, keys, data_types, column->getData(), attribute, dict);
|
||||
return column;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto size = ids_to_fill.size();
|
||||
if (!keys.empty())
|
||||
size = keys.front()->size();
|
||||
auto column_vector = ColumnVector<AttributeType>::create(size);
|
||||
callGetter(getter, ids_to_fill, keys, data_types, column_vector->getData(), attribute, dict);
|
||||
return column_vector;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Getter>
|
||||
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromStringAttribute(
|
||||
Getter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
auto column_string = ColumnString::create();
|
||||
auto ptr = column_string.get();
|
||||
callGetter(getter, ids_to_fill, keys, data_types, ptr, attribute, dict);
|
||||
return column_string;
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const
|
||||
template <typename Key>
|
||||
ColumnPtr DictionaryBlockInputStream<Key>::getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const
|
||||
{
|
||||
auto column_vector = ColumnVector<UInt64>::create();
|
||||
column_vector->getData().reserve(ids_to_fill.size());
|
||||
@ -547,8 +258,8 @@ ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(cons
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::fillKeyColumns(
|
||||
template <typename Key>
|
||||
void DictionaryBlockInputStream<Key>::fillKeyColumns(
|
||||
const std::vector<StringRef> & keys,
|
||||
size_t start,
|
||||
size_t size,
|
||||
|
149
src/Dictionaries/DictionaryHelpers.h
Normal file
149
src/Dictionaries/DictionaryHelpers.h
Normal file
@ -0,0 +1,149 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include "DictionaryStructure.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TYPE_MISMATCH;
|
||||
}
|
||||
|
||||
/**
|
||||
* In Dictionaries implementation String attribute is stored in arena and StringRefs are pointing to it.
|
||||
*/
|
||||
template <typename DictionaryAttributeType>
|
||||
using DictionaryValueType =
|
||||
std::conditional_t<std::is_same_v<DictionaryAttributeType, String>, StringRef, DictionaryAttributeType>;
|
||||
|
||||
/**
|
||||
* Used to create column with right type for DictionaryAttributeType.
|
||||
*/
|
||||
template <typename DictionaryAttributeType>
|
||||
class DictionaryAttributeColumnProvider
|
||||
{
|
||||
public:
|
||||
using ColumnType =
|
||||
std::conditional_t<std::is_same_v<DictionaryAttributeType, String>, ColumnString,
|
||||
std::conditional_t<IsDecimalNumber<DictionaryAttributeType>, ColumnDecimal<DictionaryAttributeType>,
|
||||
ColumnVector<DictionaryAttributeType>>>;
|
||||
|
||||
using ColumnPtr = typename ColumnType::MutablePtr;
|
||||
|
||||
static ColumnPtr getColumn(const DictionaryAttribute & dictionary_attribute, size_t size)
|
||||
{
|
||||
if constexpr (std::is_same_v<DictionaryAttributeType, String>)
|
||||
{
|
||||
return ColumnType::create();
|
||||
}
|
||||
if constexpr (IsDecimalNumber<DictionaryAttributeType>)
|
||||
{
|
||||
auto scale = getDecimalScale(*dictionary_attribute.nested_type);
|
||||
return ColumnType::create(size, scale);
|
||||
}
|
||||
else if constexpr (IsNumber<DictionaryAttributeType>)
|
||||
return ColumnType::create(size);
|
||||
else
|
||||
throw Exception{"Unsupported attribute type.", ErrorCodes::TYPE_MISMATCH};
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* DictionaryDefaultValueExtractor used to simplify getting default value for IDictionary function `getColumn`.
|
||||
* Provides interface for getting default value with operator[];
|
||||
*
|
||||
* If default_values_column is null then attribute_default_value will be used.
|
||||
* If default_values_column is not null in constructor than this column values will be used as default values.
|
||||
*/
|
||||
template <typename DictionaryAttributeType>
|
||||
class DictionaryDefaultValueExtractor
|
||||
{
|
||||
using DefaultColumnType = typename DictionaryAttributeColumnProvider<DictionaryAttributeType>::ColumnType;
|
||||
|
||||
public:
|
||||
using DefaultValueType = DictionaryValueType<DictionaryAttributeType>;
|
||||
|
||||
DictionaryDefaultValueExtractor(DictionaryAttributeType attribute_default_value, ColumnPtr default_values_column_ = nullptr)
|
||||
: default_value(std::move(attribute_default_value))
|
||||
{
|
||||
if (default_values_column_ == nullptr)
|
||||
use_default_value_from_column = false;
|
||||
else
|
||||
{
|
||||
if (const auto * const default_col = checkAndGetColumn<DefaultColumnType>(*default_values_column_))
|
||||
{
|
||||
default_values_column = default_col;
|
||||
use_default_value_from_column = true;
|
||||
}
|
||||
else if (const auto * const default_col_const = checkAndGetColumnConst<DefaultColumnType>(default_values_column_.get()))
|
||||
{
|
||||
default_value = default_col_const->template getValue<DictionaryAttributeType>();
|
||||
use_default_value_from_column = false;
|
||||
}
|
||||
else
|
||||
throw Exception{"Type of default column is not the same as dictionary attribute type.", ErrorCodes::TYPE_MISMATCH};
|
||||
}
|
||||
}
|
||||
|
||||
DefaultValueType operator[](size_t row)
|
||||
{
|
||||
if (!use_default_value_from_column)
|
||||
return static_cast<DefaultValueType>(default_value);
|
||||
|
||||
assert(default_values_column != nullptr);
|
||||
|
||||
if constexpr (std::is_same_v<DefaultColumnType, ColumnString>)
|
||||
return default_values_column->getDataAt(row);
|
||||
else
|
||||
return default_values_column->getData()[row];
|
||||
}
|
||||
private:
|
||||
DictionaryAttributeType default_value;
|
||||
const DefaultColumnType * default_values_column = nullptr;
|
||||
bool use_default_value_from_column = false;
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns ColumnVector data as PaddedPodArray.
|
||||
|
||||
* If column is constant parameter backup_storage is used to store values.
|
||||
*/
|
||||
template <typename T>
|
||||
static const PaddedPODArray<T> & getColumnVectorData(
|
||||
const IDictionaryBase * dictionary,
|
||||
const ColumnPtr column,
|
||||
PaddedPODArray<T> & backup_storage)
|
||||
{
|
||||
bool is_const_column = isColumnConst(*column);
|
||||
auto full_column = column->convertToFullColumnIfConst();
|
||||
auto vector_col = checkAndGetColumn<ColumnVector<T>>(full_column.get());
|
||||
|
||||
if (!vector_col)
|
||||
{
|
||||
throw Exception{ErrorCodes::TYPE_MISMATCH,
|
||||
"{}: type mismatch: column has wrong type expected {}",
|
||||
dictionary->getDictionaryID().getNameForLogs(),
|
||||
TypeName<T>::get()};
|
||||
}
|
||||
|
||||
if (is_const_column)
|
||||
{
|
||||
// With type conversion and const columns we need to use backup storage here
|
||||
auto & data = vector_col->getData();
|
||||
backup_storage.assign(data);
|
||||
|
||||
return backup_storage;
|
||||
}
|
||||
else
|
||||
{
|
||||
return vector_col->getData();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -2,6 +2,8 @@
|
||||
#include <Columns/IColumn.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/Operators.h>
|
||||
@ -12,7 +14,6 @@
|
||||
#include <unordered_set>
|
||||
#include <ext/range.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
@ -41,54 +42,46 @@ namespace
|
||||
}
|
||||
|
||||
|
||||
AttributeUnderlyingType getAttributeUnderlyingType(const std::string & type)
|
||||
AttributeUnderlyingType getAttributeUnderlyingType(const DataTypePtr & type)
|
||||
{
|
||||
static const std::unordered_map<std::string, AttributeUnderlyingType> dictionary
|
||||
auto type_index = type->getTypeId();
|
||||
|
||||
switch (type_index)
|
||||
{
|
||||
{"UInt8", AttributeUnderlyingType::utUInt8},
|
||||
{"UInt16", AttributeUnderlyingType::utUInt16},
|
||||
{"UInt32", AttributeUnderlyingType::utUInt32},
|
||||
{"UInt64", AttributeUnderlyingType::utUInt64},
|
||||
{"UUID", AttributeUnderlyingType::utUInt128},
|
||||
{"Int8", AttributeUnderlyingType::utInt8},
|
||||
{"Int16", AttributeUnderlyingType::utInt16},
|
||||
{"Int32", AttributeUnderlyingType::utInt32},
|
||||
{"Int64", AttributeUnderlyingType::utInt64},
|
||||
{"Float32", AttributeUnderlyingType::utFloat32},
|
||||
{"Float64", AttributeUnderlyingType::utFloat64},
|
||||
{"String", AttributeUnderlyingType::utString},
|
||||
{"Date", AttributeUnderlyingType::utUInt16},
|
||||
};
|
||||
case TypeIndex::UInt8: return AttributeUnderlyingType::utUInt8;
|
||||
case TypeIndex::UInt16: return AttributeUnderlyingType::utUInt16;
|
||||
case TypeIndex::UInt32: return AttributeUnderlyingType::utUInt32;
|
||||
case TypeIndex::UInt64: return AttributeUnderlyingType::utUInt64;
|
||||
case TypeIndex::UInt128: return AttributeUnderlyingType::utUInt128;
|
||||
|
||||
const auto it = dictionary.find(type);
|
||||
if (it != std::end(dictionary))
|
||||
return it->second;
|
||||
case TypeIndex::Int8: return AttributeUnderlyingType::utInt8;
|
||||
case TypeIndex::Int16: return AttributeUnderlyingType::utInt16;
|
||||
case TypeIndex::Int32: return AttributeUnderlyingType::utInt32;
|
||||
case TypeIndex::Int64: return AttributeUnderlyingType::utInt64;
|
||||
|
||||
/// Can contain arbitrary scale and timezone parameters.
|
||||
if (type.find("DateTime64") == 0)
|
||||
return AttributeUnderlyingType::utUInt64;
|
||||
case TypeIndex::Float32: return AttributeUnderlyingType::utFloat32;
|
||||
case TypeIndex::Float64: return AttributeUnderlyingType::utFloat64;
|
||||
|
||||
/// Can contain arbitrary timezone as parameter.
|
||||
if (type.find("DateTime") == 0)
|
||||
return AttributeUnderlyingType::utUInt32;
|
||||
case TypeIndex::Decimal32: return AttributeUnderlyingType::utDecimal32;
|
||||
case TypeIndex::Decimal64: return AttributeUnderlyingType::utDecimal64;
|
||||
case TypeIndex::Decimal128: return AttributeUnderlyingType::utDecimal128;
|
||||
|
||||
if (type.find("Decimal") == 0)
|
||||
{
|
||||
size_t start = strlen("Decimal");
|
||||
if (type.find("32", start) == start)
|
||||
return AttributeUnderlyingType::utDecimal32;
|
||||
if (type.find("64", start) == start)
|
||||
return AttributeUnderlyingType::utDecimal64;
|
||||
if (type.find("128", start) == start)
|
||||
return AttributeUnderlyingType::utDecimal128;
|
||||
case TypeIndex::Date: return AttributeUnderlyingType::utUInt16;
|
||||
case TypeIndex::DateTime: return AttributeUnderlyingType::utUInt32;
|
||||
case TypeIndex::DateTime64: return AttributeUnderlyingType::utUInt64;
|
||||
|
||||
case TypeIndex::UUID: return AttributeUnderlyingType::utUInt128;
|
||||
|
||||
case TypeIndex::String: return AttributeUnderlyingType::utString;
|
||||
|
||||
// Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries.
|
||||
// TODO: This should be fixed by fully supporting arrays in dictionaries.
|
||||
case TypeIndex::Array: return AttributeUnderlyingType::utString;
|
||||
|
||||
default: break;
|
||||
}
|
||||
|
||||
// Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries.
|
||||
// TODO: This should be fixed by fully supporting arrays in dictionaries.
|
||||
if (type.find("Array") == 0)
|
||||
return AttributeUnderlyingType::utString;
|
||||
|
||||
throw Exception{"Unknown type " + type, ErrorCodes::UNKNOWN_TYPE};
|
||||
throw Exception{"Unknown type for dictionary" + type->getName(), ErrorCodes::UNKNOWN_TYPE};
|
||||
}
|
||||
|
||||
|
||||
@ -215,16 +208,32 @@ void DictionaryStructure::validateKeyTypes(const DataTypes & key_types) const
|
||||
|
||||
for (const auto i : ext::range(0, key_types.size()))
|
||||
{
|
||||
const auto & expected_type = (*key)[i].type->getName();
|
||||
const auto & actual_type = key_types[i]->getName();
|
||||
const auto & expected_type = (*key)[i].type;
|
||||
const auto & actual_type = key_types[i];
|
||||
|
||||
if (expected_type != actual_type)
|
||||
throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type + ", found "
|
||||
+ actual_type,
|
||||
ErrorCodes::TYPE_MISMATCH};
|
||||
if (!areTypesEqual(expected_type, actual_type))
|
||||
throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type->getName() + ", found "
|
||||
+ actual_type->getName(),
|
||||
ErrorCodes::TYPE_MISMATCH};
|
||||
}
|
||||
}
|
||||
|
||||
const DictionaryAttribute & DictionaryStructure::getAttribute(const String& attribute_name, const DataTypePtr & type) const
|
||||
{
|
||||
auto find_iter
|
||||
= std::find_if(attributes.begin(), attributes.end(), [&](const auto & attribute) { return attribute.name == attribute_name; });
|
||||
|
||||
if (find_iter == attributes.end())
|
||||
throw Exception{"No such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
|
||||
|
||||
const auto & attribute = *find_iter;
|
||||
|
||||
if (!areTypesEqual(attribute.type, type))
|
||||
throw Exception{"Attribute type does not match, expected " + attribute.type->getName() + ", found " + type->getName(),
|
||||
ErrorCodes::TYPE_MISMATCH};
|
||||
|
||||
return *find_iter;
|
||||
}
|
||||
|
||||
std::string DictionaryStructure::getKeyDescription() const
|
||||
{
|
||||
@ -318,9 +327,20 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
|
||||
if ((range_min && name == range_min->name) || (range_max && name == range_max->name))
|
||||
continue;
|
||||
|
||||
|
||||
const auto type_string = config.getString(prefix + "type");
|
||||
const auto type = DataTypeFactory::instance().get(type_string);
|
||||
const auto underlying_type = getAttributeUnderlyingType(type_string);
|
||||
const auto initial_type = DataTypeFactory::instance().get(type_string);
|
||||
auto type = initial_type;
|
||||
bool is_array = false;
|
||||
bool is_nullable = false;
|
||||
|
||||
if (type->isNullable())
|
||||
{
|
||||
is_nullable = true;
|
||||
type = removeNullable(type);
|
||||
}
|
||||
|
||||
const auto underlying_type = getAttributeUnderlyingType(type);
|
||||
|
||||
const auto expression = config.getString(prefix + "expression", "");
|
||||
if (!expression.empty())
|
||||
@ -333,7 +353,9 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
|
||||
try
|
||||
{
|
||||
if (null_value_string.empty())
|
||||
{
|
||||
null_value = type->getDefault();
|
||||
}
|
||||
else
|
||||
{
|
||||
ReadBufferFromString null_value_buffer{null_value_string};
|
||||
@ -365,8 +387,18 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
|
||||
|
||||
has_hierarchy = has_hierarchy || hierarchical;
|
||||
|
||||
res_attributes.emplace_back(
|
||||
DictionaryAttribute{name, underlying_type, type, expression, null_value, hierarchical, injective, is_object_id});
|
||||
res_attributes.emplace_back(DictionaryAttribute{
|
||||
name,
|
||||
underlying_type,
|
||||
initial_type,
|
||||
type,
|
||||
expression,
|
||||
null_value,
|
||||
hierarchical,
|
||||
injective,
|
||||
is_object_id,
|
||||
is_nullable,
|
||||
is_array});
|
||||
}
|
||||
|
||||
return res_attributes;
|
||||
|
@ -42,7 +42,6 @@ std::string toString(const AttributeUnderlyingType type);
|
||||
/// Min and max lifetimes for a dictionary or it's entry
|
||||
using DictionaryLifetime = ExternalLoadableLifetime;
|
||||
|
||||
|
||||
/** Holds the description of a single dictionary attribute:
|
||||
* - name, used for lookup into dictionary and source;
|
||||
* - type, used in conjunction with DataTypeFactory and getAttributeUnderlyingTypeByname;
|
||||
@ -57,13 +56,74 @@ struct DictionaryAttribute final
|
||||
const std::string name;
|
||||
const AttributeUnderlyingType underlying_type;
|
||||
const DataTypePtr type;
|
||||
const DataTypePtr nested_type;
|
||||
const std::string expression;
|
||||
const Field null_value;
|
||||
const bool hierarchical;
|
||||
const bool injective;
|
||||
const bool is_object_id;
|
||||
const bool is_nullable;
|
||||
const bool is_array;
|
||||
};
|
||||
|
||||
template <typename Type>
|
||||
struct DictionaryAttributeType
|
||||
{
|
||||
using AttributeType = Type;
|
||||
};
|
||||
|
||||
template <typename F>
|
||||
void callOnDictionaryAttributeType(AttributeUnderlyingType type, F&& func)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
func(DictionaryAttributeType<UInt8>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
func(DictionaryAttributeType<UInt16>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
func(DictionaryAttributeType<UInt32>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
func(DictionaryAttributeType<UInt64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
func(DictionaryAttributeType<UInt128>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
func(DictionaryAttributeType<Int8>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
func(DictionaryAttributeType<Int16>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
func(DictionaryAttributeType<Int32>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
func(DictionaryAttributeType<Int64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
func(DictionaryAttributeType<Float32>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
func(DictionaryAttributeType<Float64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
func(DictionaryAttributeType<String>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
func(DictionaryAttributeType<Decimal32>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
func(DictionaryAttributeType<Decimal64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
func(DictionaryAttributeType<Decimal128>());
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
struct DictionarySpecialAttribute final
|
||||
{
|
||||
@ -94,10 +154,10 @@ struct DictionaryStructure final
|
||||
DictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
|
||||
|
||||
void validateKeyTypes(const DataTypes & key_types) const;
|
||||
const DictionaryAttribute &getAttribute(const String& attribute_name, const DataTypePtr & type) const;
|
||||
std::string getKeyDescription() const;
|
||||
bool isKeySizeFixed() const;
|
||||
size_t getKeySize() const;
|
||||
|
||||
private:
|
||||
/// range_min and range_max have to be parsed before this function call
|
||||
std::vector<DictionaryAttribute> getAttributes(
|
||||
|
@ -3,7 +3,10 @@
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
#include <Core/Defines.h>
|
||||
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -35,11 +38,13 @@ DirectDictionary::DirectDictionary(
|
||||
void DirectDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
|
||||
{
|
||||
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
|
||||
DictionaryDefaultValueExtractor<UInt64> extractor(null_value);
|
||||
|
||||
getItemsImpl<UInt64, UInt64>(
|
||||
*hierarchical_attribute,
|
||||
ids,
|
||||
[&](const size_t row, const UInt64 value) { out[row] = value; },
|
||||
[&](const size_t) { return null_value; });
|
||||
[&](const size_t row, const UInt64 value, bool) { out[row] = value; },
|
||||
extractor);
|
||||
}
|
||||
|
||||
|
||||
@ -128,395 +133,101 @@ void DirectDictionary::isInConstantVector(const Key child_id, const PaddedPODArr
|
||||
isInImpl(child_id, ancestor_ids, out);
|
||||
}
|
||||
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void DirectDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
const auto null_value = std::get<TYPE>(attribute.null_values); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void DirectDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
|
||||
ColumnPtr DirectDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
ColumnPtr result;
|
||||
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto & null_value = std::get<StringRef>(attribute.null_values);
|
||||
getItemsStringImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
|
||||
[&](const size_t) { return String(null_value.data, null_value.size); });
|
||||
}
|
||||
auto keys_size = ids.size();
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void DirectDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void DirectDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
|
||||
{
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsStringImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
|
||||
[&](const size_t row) { const auto ref = def->getDataAt(row); return String(ref.data, ref.size); });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void DirectDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void DirectDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
|
||||
{
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
DirectDictionary::getItemsStringImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
|
||||
[&](const size_t) { return def; });
|
||||
}
|
||||
|
||||
|
||||
void DirectDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
ColumnUInt8::MutablePtr col_null_map_to;
|
||||
ColumnUInt8::Container * vec_null_map_to = nullptr;
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
has<UInt8>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
has<UInt16>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
has<UInt32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
has<UInt64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
has<UInt128>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
has<Int8>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
has<Int16>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
has<Int32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
has<Int64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
has<Float32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
has<Float64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
has<String>(attribute, ids, out);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
has<Decimal32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
has<Decimal64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
has<Decimal128>(attribute, ids, out);
|
||||
break;
|
||||
col_null_map_to = ColumnUInt8::create(keys_size, false);
|
||||
vec_null_map_to = &col_null_map_to->getData();
|
||||
}
|
||||
}
|
||||
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
void DirectDictionary::createAttributes()
|
||||
{
|
||||
const auto size = dict_struct.attributes.size();
|
||||
attributes.reserve(size);
|
||||
|
||||
for (const auto & attribute : dict_struct.attributes)
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
attribute_index_by_name.emplace(attribute.name, attributes.size());
|
||||
attribute_name_by_index.emplace(attributes.size(), attribute.name);
|
||||
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value, attribute.name));
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
if (attribute.hierarchical)
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
|
||||
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
hierarchical_attribute = &attributes.back();
|
||||
auto * out = column.get();
|
||||
|
||||
if (hierarchical_attribute->type != AttributeUnderlyingType::utUInt64)
|
||||
throw Exception{full_name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void DirectDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
|
||||
}
|
||||
|
||||
template <>
|
||||
void DirectDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
attribute.string_arena = std::make_unique<Arena>();
|
||||
const String & string = null_value.get<String>();
|
||||
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
|
||||
}
|
||||
|
||||
|
||||
DirectDictionary::Attribute DirectDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & attr_name)
|
||||
{
|
||||
Attribute attr{type, {}, {}, attr_name};
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
createAttributeImpl<UInt8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
createAttributeImpl<UInt16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
createAttributeImpl<UInt32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
createAttributeImpl<UInt64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
createAttributeImpl<UInt128>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
createAttributeImpl<Int8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
createAttributeImpl<Int16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
createAttributeImpl<Int32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
createAttributeImpl<Int64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
createAttributeImpl<Float32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
createAttributeImpl<Float64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
createAttributeImpl<String>(attr, null_value);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
createAttributeImpl<Decimal32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
createAttributeImpl<Decimal64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
createAttributeImpl<Decimal128>(attr, null_value);
|
||||
break;
|
||||
}
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void DirectDictionary::getItemsImpl(
|
||||
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
{
|
||||
const auto rows = ext::size(ids);
|
||||
|
||||
HashMap<Key, OutputType> value_by_key;
|
||||
for (const auto row : ext::range(0, rows))
|
||||
value_by_key[ids[row]] = get_default(row);
|
||||
|
||||
std::vector<Key> to_load;
|
||||
to_load.reserve(value_by_key.size());
|
||||
for (auto it = value_by_key.begin(); it != value_by_key.end(); ++it)
|
||||
to_load.emplace_back(static_cast<Key>(it->getKey()));
|
||||
|
||||
auto stream = source_ptr->loadIds(to_load);
|
||||
stream->readPrefix();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
const IColumn & id_column = *block.safeGetByPosition(0).column;
|
||||
|
||||
for (const size_t attribute_idx : ext::range(0, attributes.size()))
|
||||
{
|
||||
const IColumn & attribute_column = *block.safeGetByPosition(attribute_idx + 1).column;
|
||||
|
||||
for (const auto row_idx : ext::range(0, id_column.size()))
|
||||
{
|
||||
const auto key = id_column[row_idx].get<UInt64>();
|
||||
|
||||
if (value_by_key.find(key) != value_by_key.end() && attribute.name == attribute_name_by_index.at(attribute_idx))
|
||||
getItemsImpl<String, String>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t row, const String value, bool is_null)
|
||||
{
|
||||
if (attribute.type == AttributeUnderlyingType::utFloat32)
|
||||
{
|
||||
value_by_key[key] = static_cast<Float32>(attribute_column[row_idx].get<Float64>());
|
||||
}
|
||||
else
|
||||
{
|
||||
value_by_key[key] = static_cast<OutputType>(attribute_column[row_idx].get<AttributeType>());
|
||||
}
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
}
|
||||
}
|
||||
const auto ref = StringRef{value};
|
||||
out->insertData(ref.data, ref.size);
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
set_value(row, value_by_key[ids[row]]);
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void DirectDictionary::getItemsStringImpl(
|
||||
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
{
|
||||
const auto rows = ext::size(ids);
|
||||
|
||||
HashMap<Key, String> value_by_key;
|
||||
for (const auto row : ext::range(0, rows))
|
||||
value_by_key[ids[row]] = get_default(row);
|
||||
|
||||
std::vector<Key> to_load;
|
||||
to_load.reserve(value_by_key.size());
|
||||
for (auto it = value_by_key.begin(); it != value_by_key.end(); ++it)
|
||||
to_load.emplace_back(static_cast<Key>(it->getKey()));
|
||||
|
||||
auto stream = source_ptr->loadIds(to_load);
|
||||
stream->readPrefix();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
const IColumn & id_column = *block.safeGetByPosition(0).column;
|
||||
|
||||
for (const size_t attribute_idx : ext::range(0, attributes.size()))
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
const IColumn & attribute_column = *block.safeGetByPosition(attribute_idx + 1).column;
|
||||
|
||||
for (const auto row_idx : ext::range(0, id_column.size()))
|
||||
{
|
||||
const auto key = id_column[row_idx].get<UInt64>();
|
||||
if (value_by_key.find(key) != value_by_key.end() && attribute.name == attribute_name_by_index.at(attribute_idx))
|
||||
getItemsImpl<AttributeType, AttributeType>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t row, const auto value, bool is_null)
|
||||
{
|
||||
const String from_source = attribute_column[row_idx].get<String>();
|
||||
value_by_key[key] = from_source;
|
||||
}
|
||||
}
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
out[row] = value;
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
result = ColumnNullable::create(result, std::move(col_null_map_to));
|
||||
}
|
||||
stream->readSuffix();
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
set_value(row, value_by_key[ids[row]]);
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
const DirectDictionary::Attribute & DirectDictionary::getAttribute(const std::string & attribute_name) const
|
||||
ColumnUInt8::Ptr DirectDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
|
||||
{
|
||||
const auto it = attribute_index_by_name.find(attribute_name);
|
||||
if (it == std::end(attribute_index_by_name))
|
||||
throw Exception{full_name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
|
||||
return attributes[it->second];
|
||||
}
|
||||
auto result = ColumnUInt8::create(ext::size(ids));
|
||||
auto& out = result->getData();
|
||||
|
||||
|
||||
template <typename T>
|
||||
void DirectDictionary::has(const Attribute &, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
const auto rows = ext::size(ids);
|
||||
|
||||
HashMap<Key, UInt8> has_key;
|
||||
@ -548,6 +259,137 @@ void DirectDictionary::has(const Attribute &, const PaddedPODArray<Key> & ids, P
|
||||
out[row] = has_key[ids[row]];
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void DirectDictionary::createAttributes()
|
||||
{
|
||||
const auto size = dict_struct.attributes.size();
|
||||
attributes.reserve(size);
|
||||
|
||||
for (const auto & attribute : dict_struct.attributes)
|
||||
{
|
||||
attribute_index_by_name.emplace(attribute.name, attributes.size());
|
||||
attribute_name_by_index.emplace(attributes.size(), attribute.name);
|
||||
attributes.push_back(createAttribute(attribute, attribute.null_value, attribute.name));
|
||||
|
||||
if (attribute.hierarchical)
|
||||
{
|
||||
hierarchical_attribute = &attributes.back();
|
||||
|
||||
if (hierarchical_attribute->type != AttributeUnderlyingType::utUInt64)
|
||||
throw Exception{full_name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void DirectDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
|
||||
}
|
||||
|
||||
template <>
|
||||
void DirectDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
attribute.string_arena = std::make_unique<Arena>();
|
||||
const String & string = null_value.get<String>();
|
||||
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
|
||||
}
|
||||
|
||||
|
||||
DirectDictionary::Attribute DirectDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value, const std::string & attr_name)
|
||||
{
|
||||
Attribute attr{attribute.underlying_type, attribute.is_nullable, {}, {}, attr_name};
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
createAttributeImpl<AttributeType>(attr, null_value);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void DirectDictionary::getItemsImpl(
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto rows = ext::size(ids);
|
||||
|
||||
HashMap<Key, OutputType> value_by_key;
|
||||
HashSet<Key> value_is_null;
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
auto key = ids[row];
|
||||
value_by_key[key] = static_cast<AttributeType>(default_value_extractor[row]);
|
||||
}
|
||||
|
||||
std::vector<Key> to_load;
|
||||
to_load.reserve(value_by_key.size());
|
||||
for (auto it = value_by_key.begin(); it != value_by_key.end(); ++it)
|
||||
to_load.emplace_back(static_cast<Key>(it->getKey()));
|
||||
|
||||
auto stream = source_ptr->loadIds(to_load);
|
||||
stream->readPrefix();
|
||||
|
||||
const auto it = attribute_index_by_name.find(attribute.name);
|
||||
if (it == std::end(attribute_index_by_name))
|
||||
throw Exception{full_name + ": no such attribute '" + attribute.name + "'", ErrorCodes::BAD_ARGUMENTS};
|
||||
|
||||
auto attribute_index = it->second;
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
const IColumn & id_column = *block.safeGetByPosition(0).column;
|
||||
|
||||
const IColumn & attribute_column = *block.safeGetByPosition(attribute_index + 1).column;
|
||||
|
||||
for (const auto row_idx : ext::range(0, id_column.size()))
|
||||
{
|
||||
const auto key = id_column[row_idx].get<UInt64>();
|
||||
|
||||
if (value_by_key.find(key) != value_by_key.end())
|
||||
{
|
||||
auto value = attribute_column[row_idx];
|
||||
|
||||
if (value.isNull())
|
||||
value_is_null.insert(key);
|
||||
else
|
||||
value_by_key[key] = static_cast<OutputType>(value.get<NearestFieldType<AttributeType>>());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
auto key = ids[row];
|
||||
set_value(row, value_by_key[key], value_is_null.find(key) != nullptr);
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
const DirectDictionary::Attribute & DirectDictionary::getAttribute(const std::string & attribute_name) const
|
||||
{
|
||||
const auto it = attribute_index_by_name.find(attribute_name);
|
||||
if (it == std::end(attribute_index_by_name))
|
||||
throw Exception{full_name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
|
||||
|
||||
return attributes[it->second];
|
||||
}
|
||||
|
||||
|
||||
|
@ -13,11 +13,10 @@
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using BlockPtr = std::shared_ptr<Block>;
|
||||
|
||||
class DirectDictionary final : public IDictionary
|
||||
{
|
||||
@ -65,76 +64,16 @@ public:
|
||||
void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
|
||||
void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void
|
||||
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
|
||||
const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
|
||||
|
||||
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
@ -142,6 +81,7 @@ private:
|
||||
struct Attribute final
|
||||
{
|
||||
AttributeUnderlyingType type;
|
||||
bool is_nullable;
|
||||
std::variant<
|
||||
UInt8,
|
||||
UInt16,
|
||||
@ -168,23 +108,17 @@ private:
|
||||
template <typename T>
|
||||
void addAttributeSize(const Attribute & attribute);
|
||||
|
||||
void calculateBytesAllocated();
|
||||
|
||||
template <typename T>
|
||||
void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & name);
|
||||
static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value, const std::string & name);
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void getItemsStringImpl(
|
||||
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsImpl(
|
||||
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
|
||||
template <typename T>
|
||||
void resize(Attribute & attribute, const Key id);
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename T>
|
||||
void setAttributeValueImpl(Attribute & attribute, const Key id, const T & value);
|
||||
@ -193,9 +127,6 @@ private:
|
||||
|
||||
const Attribute & getAttribute(const std::string & attribute_name) const;
|
||||
|
||||
template <typename T>
|
||||
void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
Key getValueOrNullByKey(const Key & to_find) const;
|
||||
|
||||
template <typename ChildType, typename AncestorType>
|
||||
|
@ -1,9 +1,14 @@
|
||||
#include "FlatDictionary.h"
|
||||
|
||||
#include <Core/Defines.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
#include <Core/Defines.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -44,12 +49,13 @@ FlatDictionary::FlatDictionary(
|
||||
void FlatDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
|
||||
{
|
||||
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
|
||||
DictionaryDefaultValueExtractor<UInt64> extractor(null_value);
|
||||
|
||||
getItemsImpl<UInt64, UInt64>(
|
||||
*hierarchical_attribute,
|
||||
ids,
|
||||
[&](const size_t row, const UInt64 value) { out[row] = value; },
|
||||
[&](const size_t) { return null_value; });
|
||||
extractor);
|
||||
}
|
||||
|
||||
|
||||
@ -102,186 +108,103 @@ void FlatDictionary::isInConstantVector(const Key child_id, const PaddedPODArray
|
||||
isInImpl(child_id, ancestor_ids, out);
|
||||
}
|
||||
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void FlatDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
const auto null_value = std::get<TYPE>(attribute.null_values); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void FlatDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
|
||||
ColumnPtr FlatDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
ColumnPtr result;
|
||||
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
|
||||
auto size = ids.size();
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
const auto & null_value = std::get<StringRef>(attribute.null_values);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return null_value; });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void FlatDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void FlatDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
|
||||
{
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t row) { return def->getDataAt(row); });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void FlatDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void FlatDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
|
||||
{
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
FlatDictionary::getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return StringRef{def}; });
|
||||
}
|
||||
|
||||
|
||||
void FlatDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
has<UInt8>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
has<UInt16>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
has<UInt32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
has<UInt64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
has<UInt128>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
has<Int8>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
has<Int16>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
has<Int32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
has<Int64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
has<Float32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
has<Float64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
has<String>(attribute, ids, out);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
has<Decimal32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
has<Decimal64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
has<Decimal128>(attribute, ids, out);
|
||||
break;
|
||||
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
|
||||
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, size);
|
||||
|
||||
if constexpr (std::is_same_v<ValueType, StringRef>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
|
||||
getItemsImpl<ValueType, ValueType>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
getItemsImpl<ValueType, ValueType>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t row, const auto value) { out[row] = value; },
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
if (attribute.nullable_set)
|
||||
{
|
||||
ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, false);
|
||||
ColumnUInt8::Container& vec_null_map_to = col_null_map_to->getData();
|
||||
|
||||
for (size_t row = 0; row < ids.size(); ++row)
|
||||
{
|
||||
auto id = ids[row];
|
||||
|
||||
if (attribute.nullable_set->find(id) != nullptr)
|
||||
vec_null_map_to[row] = true;
|
||||
}
|
||||
|
||||
result = ColumnNullable::create(result, std::move(col_null_map_to));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
ColumnUInt8::Ptr FlatDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
|
||||
{
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
|
||||
auto result = ColumnUInt8::create(ext::size(ids));
|
||||
auto& out = result->getData();
|
||||
|
||||
const auto ids_count = ext::size(ids);
|
||||
|
||||
for (const auto i : ext::range(0, ids_count))
|
||||
{
|
||||
const auto id = ids[i];
|
||||
out[i] = id < loaded_ids.size() && loaded_ids[id];
|
||||
}
|
||||
|
||||
query_count.fetch_add(ids_count, std::memory_order_relaxed);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void FlatDictionary::createAttributes()
|
||||
{
|
||||
const auto size = dict_struct.attributes.size();
|
||||
@ -290,7 +213,7 @@ void FlatDictionary::createAttributes()
|
||||
for (const auto & attribute : dict_struct.attributes)
|
||||
{
|
||||
attribute_index_by_name.emplace(attribute.name, attributes.size());
|
||||
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
|
||||
attributes.push_back(createAttribute(attribute, attribute.null_value));
|
||||
|
||||
if (attribute.hierarchical)
|
||||
{
|
||||
@ -416,6 +339,14 @@ void FlatDictionary::addAttributeSize(const Attribute & attribute)
|
||||
bucket_count = array_ref.capacity();
|
||||
}
|
||||
|
||||
template <>
|
||||
void FlatDictionary::addAttributeSize<String>(const Attribute & attribute)
|
||||
{
|
||||
const auto & array_ref = std::get<ContainerType<StringRef>>(attribute.arrays);
|
||||
bytes_allocated += sizeof(PaddedPODArray<StringRef>) + array_ref.allocated_bytes();
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
bucket_count = array_ref.capacity();
|
||||
}
|
||||
|
||||
void FlatDictionary::calculateBytesAllocated()
|
||||
{
|
||||
@ -423,60 +354,15 @@ void FlatDictionary::calculateBytesAllocated()
|
||||
|
||||
for (const auto & attribute : attributes)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
addAttributeSize<UInt8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
addAttributeSize<UInt16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
addAttributeSize<UInt32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
addAttributeSize<UInt64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
addAttributeSize<UInt128>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
addAttributeSize<Int8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
addAttributeSize<Int16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
addAttributeSize<Int32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
addAttributeSize<Int64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
addAttributeSize<Float32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
addAttributeSize<Float64>(attribute);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
addAttributeSize<Decimal32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
addAttributeSize<Decimal64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
addAttributeSize<Decimal128>(attribute);
|
||||
break;
|
||||
addAttributeSize<AttributeType>(attribute);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
}
|
||||
|
||||
@ -500,67 +386,31 @@ void FlatDictionary::createAttributeImpl<String>(Attribute & attribute, const Fi
|
||||
}
|
||||
|
||||
|
||||
FlatDictionary::Attribute FlatDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||
FlatDictionary::Attribute FlatDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
|
||||
{
|
||||
Attribute attr{type, {}, {}, {}};
|
||||
auto nullable_set = attribute.is_nullable ? std::make_optional<NullableSet>() : std::optional<NullableSet>{};
|
||||
Attribute attr{attribute.underlying_type, std::move(nullable_set), {}, {}, {}};
|
||||
|
||||
switch (type)
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
createAttributeImpl<UInt8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
createAttributeImpl<UInt16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
createAttributeImpl<UInt32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
createAttributeImpl<UInt64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
createAttributeImpl<UInt128>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
createAttributeImpl<Int8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
createAttributeImpl<Int16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
createAttributeImpl<Int32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
createAttributeImpl<Int64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
createAttributeImpl<Float32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
createAttributeImpl<Float64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
createAttributeImpl<String>(attr, null_value);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
createAttributeImpl<Decimal32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
createAttributeImpl<Decimal64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
createAttributeImpl<Decimal128>(attr, null_value);
|
||||
break;
|
||||
}
|
||||
createAttributeImpl<AttributeType>(attr, null_value);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void FlatDictionary::getItemsImpl(
|
||||
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto & attr = std::get<ContainerType<AttributeType>>(attribute.arrays);
|
||||
const auto rows = ext::size(ids);
|
||||
@ -568,7 +418,7 @@ void FlatDictionary::getItemsImpl(
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
set_value(row, id < ext::size(attr) && loaded_ids[id] ? static_cast<OutputType>(attr[id]) : get_default(row));
|
||||
set_value(row, id < ext::size(attr) && loaded_ids[id] ? static_cast<OutputType>(attr[id]) : default_value_extractor[row]);
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
@ -592,7 +442,6 @@ void FlatDictionary::resize(Attribute & attribute, const Key id)
|
||||
template <typename T>
|
||||
void FlatDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const T & value)
|
||||
{
|
||||
resize<T>(attribute, id);
|
||||
auto & array = std::get<ContainerType<T>>(attribute.arrays);
|
||||
array[id] = value;
|
||||
loaded_ids[id] = true;
|
||||
@ -601,64 +450,38 @@ void FlatDictionary::setAttributeValueImpl(Attribute & attribute, const Key id,
|
||||
template <>
|
||||
void FlatDictionary::setAttributeValueImpl<String>(Attribute & attribute, const Key id, const String & value)
|
||||
{
|
||||
resize<StringRef>(attribute, id);
|
||||
const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
|
||||
auto & array = std::get<ContainerType<StringRef>>(attribute.arrays);
|
||||
array[id] = StringRef{string_in_arena, value.size()};
|
||||
loaded_ids[id] = true;
|
||||
setAttributeValueImpl(attribute, id, StringRef{string_in_arena, value.size()});
|
||||
}
|
||||
|
||||
void FlatDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
setAttributeValueImpl<String>(attribute, id, value.get<String>());
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ResizeType = std::conditional_t<std::is_same_v<AttributeType, String>, StringRef, AttributeType>;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal32>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>());
|
||||
break;
|
||||
}
|
||||
resize<ResizeType>(attribute, id);
|
||||
|
||||
if (attribute.nullable_set)
|
||||
{
|
||||
if (value.isNull())
|
||||
{
|
||||
attribute.nullable_set->insert(id);
|
||||
loaded_ids[id] = true;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
attribute.nullable_set->erase(id);
|
||||
}
|
||||
}
|
||||
|
||||
setAttributeValueImpl<AttributeType>(attribute, id, value.get<NearestFieldType<AttributeType>>());
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
|
||||
@ -671,27 +494,13 @@ const FlatDictionary::Attribute & FlatDictionary::getAttribute(const std::string
|
||||
return attributes[it->second];
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void FlatDictionary::has(const Attribute &, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
const auto ids_count = ext::size(ids);
|
||||
|
||||
for (const auto i : ext::range(0, ids_count))
|
||||
{
|
||||
const auto id = ids[i];
|
||||
out[i] = id < loaded_ids.size() && loaded_ids[id];
|
||||
}
|
||||
|
||||
query_count.fetch_add(ids_count, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
|
||||
PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const
|
||||
{
|
||||
const auto ids_count = ext::size(loaded_ids);
|
||||
|
||||
PaddedPODArray<Key> ids;
|
||||
ids.reserve(ids_count);
|
||||
|
||||
for (auto idx : ext::range(0, ids_count))
|
||||
if (loaded_ids[idx])
|
||||
ids.push_back(idx);
|
||||
@ -700,7 +509,7 @@ PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const
|
||||
|
||||
BlockInputStreamPtr FlatDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<FlatDictionary, Key>;
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<Key>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds(), column_names);
|
||||
}
|
||||
|
||||
|
@ -3,20 +3,25 @@
|
||||
#include <atomic>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
#include <optional>
|
||||
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <Core/Block.h>
|
||||
#include <ext/range.h>
|
||||
#include <ext/size.h>
|
||||
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using BlockPtr = std::shared_ptr<Block>;
|
||||
|
||||
class FlatDictionary final : public IDictionary
|
||||
{
|
||||
@ -66,76 +71,16 @@ public:
|
||||
void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
|
||||
void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void
|
||||
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
|
||||
const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
|
||||
|
||||
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
@ -143,9 +88,13 @@ private:
|
||||
template <typename Value>
|
||||
using ContainerType = PaddedPODArray<Value>;
|
||||
|
||||
using NullableSet = HashSet<Key, DefaultHash<Key>>;
|
||||
|
||||
struct Attribute final
|
||||
{
|
||||
AttributeUnderlyingType type;
|
||||
std::optional<NullableSet> nullable_set;
|
||||
|
||||
std::variant<
|
||||
UInt8,
|
||||
UInt16,
|
||||
@ -180,6 +129,7 @@ private:
|
||||
ContainerType<Float64>,
|
||||
ContainerType<StringRef>>
|
||||
arrays;
|
||||
|
||||
std::unique_ptr<Arena> string_arena;
|
||||
};
|
||||
|
||||
@ -194,13 +144,16 @@ private:
|
||||
void calculateBytesAllocated();
|
||||
|
||||
template <typename T>
|
||||
void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsImpl(
|
||||
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename T>
|
||||
void resize(Attribute & attribute, const Key id);
|
||||
@ -212,9 +165,6 @@ private:
|
||||
|
||||
const Attribute & getAttribute(const std::string & attribute_name) const;
|
||||
|
||||
template <typename T>
|
||||
void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
template <typename ChildType, typename AncestorType>
|
||||
void isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
|
@ -4,7 +4,10 @@
|
||||
#include "DictionaryFactory.h"
|
||||
#include "ClickHouseDictionarySource.h"
|
||||
#include <Core/Defines.h>
|
||||
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
@ -57,12 +60,13 @@ HashedDictionary::HashedDictionary(
|
||||
void HashedDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
|
||||
{
|
||||
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
|
||||
DictionaryDefaultValueExtractor<UInt64> extractor(null_value);
|
||||
|
||||
getItemsImpl<UInt64, UInt64>(
|
||||
*hierarchical_attribute,
|
||||
ids,
|
||||
[&](const size_t row, const UInt64 value) { out[row] = value; },
|
||||
[&](const size_t) { return null_value; });
|
||||
extractor);
|
||||
}
|
||||
|
||||
|
||||
@ -125,183 +129,105 @@ void HashedDictionary::isInConstantVector(const Key child_id, const PaddedPODArr
|
||||
isInImpl(child_id, ancestor_ids, out);
|
||||
}
|
||||
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void HashedDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) \
|
||||
const \
|
||||
{ \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
const auto null_value = std::get<TYPE>(attribute.null_values); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void HashedDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
|
||||
ColumnPtr HashedDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
ColumnPtr result;
|
||||
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
|
||||
auto size = ids.size();
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return null_value; });
|
||||
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
|
||||
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
getItemsImpl<AttributeType, AttributeType>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t row, const auto value) { return out[row] = value; },
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
if (attribute.nullable_set)
|
||||
{
|
||||
ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, false);
|
||||
ColumnUInt8::Container& vec_null_map_to = col_null_map_to->getData();
|
||||
|
||||
for (size_t row = 0; row < ids.size(); ++row)
|
||||
{
|
||||
auto id = ids[row];
|
||||
|
||||
if (attribute.nullable_set->find(id) != nullptr)
|
||||
vec_null_map_to[row] = true;
|
||||
}
|
||||
|
||||
result = ColumnNullable::create(result, std::move(col_null_map_to));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void HashedDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void HashedDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
|
||||
ColumnUInt8::Ptr HashedDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
|
||||
{
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t row) { return def->getDataAt(row); });
|
||||
}
|
||||
size_t ids_count = ext::size(ids);
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void HashedDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE & def, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
auto result = ColumnUInt8::create(ext::size(ids));
|
||||
auto& out = result->getData();
|
||||
|
||||
void HashedDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
|
||||
{
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return StringRef{def}; });
|
||||
}
|
||||
|
||||
void HashedDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
has<UInt8>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
has<UInt16>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
has<UInt32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
has<UInt64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
has<UInt128>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
has<Int8>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
has<Int16>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
has<Int32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
has<Int64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
has<Float32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
has<Float64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
has<StringRef>(attribute, ids, out);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
has<AttributeType>(attribute, ids, out);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
has<Decimal32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
has<Decimal64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
has<Decimal128>(attribute, ids, out);
|
||||
break;
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
query_count.fetch_add(ids_count, std::memory_order_relaxed);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void HashedDictionary::createAttributes()
|
||||
@ -312,7 +238,7 @@ void HashedDictionary::createAttributes()
|
||||
for (const auto & attribute : dict_struct.attributes)
|
||||
{
|
||||
attribute_index_by_name.emplace(attribute.name, attributes.size());
|
||||
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
|
||||
attributes.push_back(createAttribute(attribute, attribute.null_value));
|
||||
|
||||
if (attribute.hierarchical)
|
||||
{
|
||||
@ -429,6 +355,13 @@ void HashedDictionary::resize(Attribute & attribute, size_t added_rows)
|
||||
map_ref->resize(added_rows);
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void HashedDictionary::resize<String>(Attribute & attribute, size_t added_rows)
|
||||
{
|
||||
resize<StringRef>(attribute, added_rows);
|
||||
}
|
||||
|
||||
void HashedDictionary::resize(size_t added_rows)
|
||||
{
|
||||
if (!added_rows)
|
||||
@ -436,56 +369,14 @@ void HashedDictionary::resize(size_t added_rows)
|
||||
|
||||
for (auto & attribute : attributes)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
resize<UInt8>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
resize<UInt16>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
resize<UInt32>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
resize<UInt64>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
resize<UInt128>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
resize<Int8>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
resize<Int16>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
resize<Int32>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
resize<Int64>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
resize<Float32>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
resize<Float64>(attribute, added_rows);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
resize<AttributeType>(attribute, added_rows);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
resize<Decimal32>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
resize<Decimal64>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
resize<Decimal128>(attribute, added_rows);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
resize<StringRef>(attribute, added_rows);
|
||||
break;
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
}
|
||||
|
||||
@ -562,66 +453,27 @@ void HashedDictionary::addAttributeSize(const Attribute & attribute)
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void HashedDictionary::addAttributeSize<String>(const Attribute & attribute)
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
}
|
||||
|
||||
void HashedDictionary::calculateBytesAllocated()
|
||||
{
|
||||
bytes_allocated += attributes.size() * sizeof(attributes.front());
|
||||
|
||||
for (const auto & attribute : attributes)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
addAttributeSize<UInt8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
addAttributeSize<UInt16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
addAttributeSize<UInt32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
addAttributeSize<UInt64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
addAttributeSize<UInt128>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
addAttributeSize<Int8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
addAttributeSize<Int16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
addAttributeSize<Int32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
addAttributeSize<Int64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
addAttributeSize<Float32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
addAttributeSize<Float64>(attribute);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
addAttributeSize<AttributeType>(attribute);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
addAttributeSize<Decimal32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
addAttributeSize<Decimal64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
addAttributeSize<Decimal128>(attribute);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
}
|
||||
|
||||
@ -635,93 +487,66 @@ void HashedDictionary::createAttributeImpl(Attribute & attribute, const Field &
|
||||
attribute.sparse_maps = std::make_unique<SparseCollectionType<T>>();
|
||||
}
|
||||
|
||||
HashedDictionary::Attribute HashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||
template <>
|
||||
void HashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
Attribute attr{type, {}, {}, {}, {}};
|
||||
attribute.string_arena = std::make_unique<Arena>();
|
||||
const String & string = null_value.get<String>();
|
||||
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
|
||||
|
||||
switch (type)
|
||||
if (!sparse)
|
||||
attribute.maps = std::make_unique<CollectionType<StringRef>>();
|
||||
else
|
||||
attribute.sparse_maps = std::make_unique<SparseCollectionType<StringRef>>();
|
||||
}
|
||||
|
||||
HashedDictionary::Attribute HashedDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
|
||||
{
|
||||
auto nullable_set = attribute.is_nullable ? std::make_optional<NullableSet>() : std::optional<NullableSet>{};
|
||||
Attribute attr{attribute.underlying_type, std::move(nullable_set), {}, {}, {}, {}};
|
||||
|
||||
auto type_call = [&, this](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
createAttributeImpl<UInt8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
createAttributeImpl<UInt16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
createAttributeImpl<UInt32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
createAttributeImpl<UInt64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
createAttributeImpl<UInt128>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
createAttributeImpl<Int8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
createAttributeImpl<Int16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
createAttributeImpl<Int32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
createAttributeImpl<Int64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
createAttributeImpl<Float32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
createAttributeImpl<Float64>(attr, null_value);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
createAttributeImpl<AttributeType>(attr, null_value);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
createAttributeImpl<Decimal32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
createAttributeImpl<Decimal64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
createAttributeImpl<Decimal128>(attr, null_value);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
attr.null_values = null_value.get<String>();
|
||||
if (!sparse)
|
||||
attr.maps = std::make_unique<CollectionType<StringRef>>();
|
||||
else
|
||||
attr.sparse_maps = std::make_unique<SparseCollectionType<StringRef>>();
|
||||
attr.string_arena = std::make_unique<Arena>();
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
|
||||
template <typename OutputType, typename AttrType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename MapType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void HashedDictionary::getItemsAttrImpl(
|
||||
const AttrType & attr, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
const MapType & attr,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto rows = ext::size(ids);
|
||||
|
||||
for (const auto i : ext::range(0, rows))
|
||||
{
|
||||
const auto it = attr.find(ids[i]);
|
||||
set_value(i, it != attr.end() ? static_cast<OutputType>(second(*it)) : get_default(i));
|
||||
set_value(i, it != attr.end() ? static_cast<OutputType>(second(*it)) : default_value_extractor[i]);
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void HashedDictionary::getItemsImpl(
|
||||
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
if (!sparse)
|
||||
return getItemsAttrImpl<OutputType>(*std::get<CollectionPtrType<AttributeType>>(attribute.maps), ids, set_value, get_default);
|
||||
return getItemsAttrImpl<OutputType>(*std::get<SparseCollectionPtrType<AttributeType>>(attribute.sparse_maps), ids, set_value, get_default);
|
||||
return getItemsAttrImpl<AttributeType, OutputType>(*std::get<CollectionPtrType<AttributeType>>(attribute.maps), ids, set_value, default_value_extractor);
|
||||
return getItemsAttrImpl<AttributeType, OutputType>(*std::get<SparseCollectionPtrType<AttributeType>>(attribute.sparse_maps), ids, set_value, default_value_extractor);
|
||||
}
|
||||
|
||||
|
||||
@ -740,58 +565,41 @@ bool HashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HashedDictionary::setAttributeValueImpl<String>(Attribute & attribute, const Key id, const String value)
|
||||
{
|
||||
const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
|
||||
return setAttributeValueImpl<StringRef>(attribute, id, StringRef{string_in_arena, value.size()});
|
||||
}
|
||||
|
||||
bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
|
||||
{
|
||||
switch (attribute.type)
|
||||
bool result = false;
|
||||
|
||||
auto type_call = [&, this](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
return setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
return setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
return setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
return setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
return setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>());
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
return setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
return setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
return setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
return setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
return setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>());
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
return setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>());
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
return setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal32>());
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
return setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal64>());
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
return setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>());
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
if (attribute.nullable_set)
|
||||
{
|
||||
const auto & string = value.get<String>();
|
||||
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
if (!sparse)
|
||||
if (value.isNull())
|
||||
{
|
||||
auto & map = *std::get<CollectionPtrType<StringRef>>(attribute.maps);
|
||||
return map.insert({id, StringRef{string_in_arena, string.size()}}).second;
|
||||
result = attribute.nullable_set->insert(id).second;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & map = *std::get<SparseCollectionPtrType<StringRef>>(attribute.sparse_maps);
|
||||
return map.insert({id, StringRef{string_in_arena, string.size()}}).second;
|
||||
attribute.nullable_set->erase(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw Exception{"Invalid attribute type", ErrorCodes::BAD_ARGUMENTS};
|
||||
result = setAttributeValueImpl<AttributeType>(attribute, id, value.get<NearestFieldType<AttributeType>>());
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
const HashedDictionary::Attribute & HashedDictionary::getAttribute(const std::string & attribute_name) const
|
||||
@ -810,9 +618,18 @@ void HashedDictionary::has(const Attribute & attribute, const PaddedPODArray<Key
|
||||
const auto rows = ext::size(ids);
|
||||
|
||||
for (const auto i : ext::range(0, rows))
|
||||
{
|
||||
out[i] = attr.find(ids[i]) != nullptr;
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
if (attribute.nullable_set && !out[i])
|
||||
out[i] = attribute.nullable_set->find(ids[i]) != nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void HashedDictionary::has<String>(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
has<StringRef>(attribute, ids, out);
|
||||
}
|
||||
|
||||
template <typename T, typename AttrType>
|
||||
@ -833,50 +650,39 @@ PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds(const Attribute &
|
||||
return getIdsAttrImpl<T>(*std::get<SparseCollectionPtrType<T>>(attribute.sparse_maps));
|
||||
}
|
||||
|
||||
template <>
|
||||
PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds<String>(const Attribute & attribute) const
|
||||
{
|
||||
return getIds<StringRef>(attribute);
|
||||
}
|
||||
|
||||
PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds() const
|
||||
{
|
||||
const auto & attribute = attributes.front();
|
||||
PaddedPODArray<HashedDictionary::Key> result;
|
||||
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
return getIds<UInt8>(attribute);
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
return getIds<UInt16>(attribute);
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
return getIds<UInt32>(attribute);
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
return getIds<UInt64>(attribute);
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
return getIds<UInt128>(attribute);
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
return getIds<Int8>(attribute);
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
return getIds<Int16>(attribute);
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
return getIds<Int32>(attribute);
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
return getIds<Int64>(attribute);
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
return getIds<Float32>(attribute);
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
return getIds<Float64>(attribute);
|
||||
case AttributeUnderlyingType::utString:
|
||||
return getIds<StringRef>(attribute);
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
/// TODO: Check if order is satisfied
|
||||
result = getIds<AttributeType>(attribute);
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
return getIds<Decimal32>(attribute);
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
return getIds<Decimal64>(attribute);
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
return getIds<Decimal128>(attribute);
|
||||
}
|
||||
return PaddedPODArray<Key>();
|
||||
if (attribute.nullable_set)
|
||||
{
|
||||
for (const auto& value: *attribute.nullable_set)
|
||||
result.push_back(value.getKey());
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BlockInputStreamPtr HashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<HashedDictionary, Key>;
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<Key>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds(), column_names);
|
||||
}
|
||||
|
||||
|
@ -3,15 +3,18 @@
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <variant>
|
||||
#include <optional>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Core/Block.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <sparsehash/sparse_hash_map>
|
||||
#include <ext/range.h>
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
/** This dictionary stores all content in a hash table in memory
|
||||
* (a separate Key -> Value map for each attribute)
|
||||
@ -20,7 +23,6 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using BlockPtr = std::shared_ptr<Block>;
|
||||
|
||||
class HashedDictionary final : public IDictionary
|
||||
{
|
||||
@ -66,77 +68,16 @@ public:
|
||||
|
||||
void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override;
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void
|
||||
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
|
||||
const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE & def, ResultArrayType<TYPE> & out) \
|
||||
const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
|
||||
|
||||
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
void isInVectorVector(
|
||||
const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
|
||||
@ -162,9 +103,13 @@ private:
|
||||
template <typename Value>
|
||||
using SparseCollectionPtrType = std::unique_ptr<SparseCollectionType<Value>>;
|
||||
|
||||
using NullableSet = HashSet<Key, DefaultHash<Key>>;
|
||||
|
||||
struct Attribute final
|
||||
{
|
||||
AttributeUnderlyingType type;
|
||||
std::optional<NullableSet> nullable_set;
|
||||
|
||||
std::variant<
|
||||
UInt8,
|
||||
UInt16,
|
||||
@ -180,7 +125,7 @@ private:
|
||||
Decimal128,
|
||||
Float32,
|
||||
Float64,
|
||||
String>
|
||||
StringRef>
|
||||
null_values;
|
||||
std::variant<
|
||||
CollectionPtrType<UInt8>,
|
||||
@ -235,14 +180,21 @@ private:
|
||||
template <typename T>
|
||||
void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
|
||||
|
||||
template <typename OutputType, typename AttrType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename MapType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsAttrImpl(
|
||||
const AttrType & attr, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
const MapType & attr,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsImpl(
|
||||
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename T>
|
||||
bool setAttributeValueImpl(Attribute & attribute, const Key id, const T value);
|
||||
|
@ -10,6 +10,8 @@
|
||||
#include <common/StringRef.h>
|
||||
#include "IDictionarySource.h"
|
||||
#include <Dictionaries/DictionaryStructure.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <memory>
|
||||
@ -20,15 +22,31 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int TYPE_MISMATCH;
|
||||
}
|
||||
|
||||
struct IDictionaryBase;
|
||||
using DictionaryPtr = std::unique_ptr<IDictionaryBase>;
|
||||
|
||||
struct DictionaryStructure;
|
||||
class ColumnString;
|
||||
/** DictionaryKeyType provides IDictionary client information about
|
||||
* which key type is supported by dictionary.
|
||||
*
|
||||
* Simple is for dictionaries that support UInt64 key column.
|
||||
*
|
||||
* Complex is for dictionaries that support any combination of key columns.
|
||||
*
|
||||
* Range is for dictionary that support combination of UInt64 key column,
|
||||
* and numeric representable range key column.
|
||||
*/
|
||||
enum class DictionaryKeyType
|
||||
{
|
||||
simple,
|
||||
complex,
|
||||
range
|
||||
};
|
||||
|
||||
/**
|
||||
* Base class for Dictionaries implementation.
|
||||
*/
|
||||
struct IDictionaryBase : public IExternalLoadable
|
||||
{
|
||||
using Key = UInt64;
|
||||
@ -85,6 +103,33 @@ struct IDictionaryBase : public IExternalLoadable
|
||||
|
||||
virtual bool isInjective(const std::string & attribute_name) const = 0;
|
||||
|
||||
/** Subclass must provide key type that is supported by dictionary.
|
||||
* Client will use that key type to provide valid key columns for `getColumn` and `has` functions.
|
||||
*/
|
||||
virtual DictionaryKeyType getKeyType() const = 0;
|
||||
|
||||
/** Subclass must validate key columns and keys types
|
||||
* and return column representation of dictionary attribute.
|
||||
*
|
||||
* Parameter default_values_column must be used to provide default values
|
||||
* for keys that are not in dictionary. If null pointer is passed,
|
||||
* then default attribute value must be used.
|
||||
*/
|
||||
virtual ColumnPtr getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const = 0;
|
||||
|
||||
/** Subclass must validate key columns and key types and return ColumnUInt8 that
|
||||
* is bitmask representation of is key in dictionary or not.
|
||||
* If key is in dictionary then value of associated row will be 1, otherwise 0.
|
||||
*/
|
||||
virtual ColumnUInt8::Ptr hasKeys(
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types) const = 0;
|
||||
|
||||
virtual BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const = 0;
|
||||
|
||||
bool supportUpdates() const override { return true; }
|
||||
@ -115,7 +160,6 @@ protected:
|
||||
const String full_name;
|
||||
};
|
||||
|
||||
|
||||
struct IDictionary : IDictionaryBase
|
||||
{
|
||||
IDictionary(const StorageID & dict_id_) : IDictionaryBase(dict_id_) {}
|
||||
@ -124,8 +168,7 @@ struct IDictionary : IDictionaryBase
|
||||
|
||||
virtual void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const = 0;
|
||||
|
||||
virtual void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const = 0;
|
||||
|
||||
/// TODO: Rewrite
|
||||
/// Methods for hierarchy.
|
||||
|
||||
virtual void isInVectorVector(
|
||||
@ -157,14 +200,4 @@ struct IDictionary : IDictionaryBase
|
||||
}
|
||||
};
|
||||
|
||||
/// Implicit conversions in dictGet functions is disabled.
|
||||
inline void checkAttributeType(const IDictionaryBase * dictionary, const std::string & attribute_name,
|
||||
AttributeUnderlyingType attribute_type, AttributeUnderlyingType to)
|
||||
{
|
||||
if (attribute_type != to)
|
||||
throw Exception{ErrorCodes::TYPE_MISMATCH, "{}: type mismatch: attribute {} has type {}, expected {}",
|
||||
dictionary->getDictionaryID().getNameForLogs(),
|
||||
attribute_name, toString(attribute_type), toString(to)};
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <DataTypes/DataTypeFixedString.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <IO/WriteIntText.h>
|
||||
#include <Poco/ByteOrder.h>
|
||||
#include <Common/formatIPv6.h>
|
||||
@ -16,6 +17,7 @@
|
||||
#include <ext/range.h>
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -266,167 +268,75 @@ IPAddressDictionary::IPAddressDictionary(
|
||||
calculateBytesAllocated();
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void IPAddressDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
validateKeyTypes(key_types); \
|
||||
\
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
const auto null_value = std::get<TYPE>(attribute.null_values); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, \
|
||||
key_columns, \
|
||||
[&](const size_t row, const auto value) { out[row] = value; }, \
|
||||
[&](const size_t) { return null_value; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void IPAddressDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
|
||||
{
|
||||
validateKeyTypes(key_types);
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return null_value; });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void IPAddressDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
validateKeyTypes(key_types); \
|
||||
\
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, \
|
||||
key_columns, \
|
||||
[&](const size_t row, const auto value) { out[row] = value; }, \
|
||||
[&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void IPAddressDictionary::getString(
|
||||
ColumnPtr IPAddressDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
validateKeyTypes(key_types);
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t row) { return def->getDataAt(row); });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void IPAddressDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
validateKeyTypes(key_types); \
|
||||
\
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void IPAddressDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const
|
||||
{
|
||||
validateKeyTypes(key_types);
|
||||
ColumnPtr result;
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return StringRef{def}; });
|
||||
auto size = key_columns.front()->size();
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto & null_value = std::get<AttributeType>(attribute.null_values);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, size);
|
||||
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
|
||||
getItemsImpl<ValueType, ValueType>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
getItemsImpl<ValueType, ValueType>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const auto value) { return out[row] = value; },
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void IPAddressDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
|
||||
|
||||
ColumnUInt8::Ptr IPAddressDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
||||
{
|
||||
validateKeyTypes(key_types);
|
||||
|
||||
const auto first_column = key_columns.front();
|
||||
const auto rows = first_column->size();
|
||||
|
||||
auto result = ColumnUInt8::create(rows);
|
||||
auto& out = result->getData();
|
||||
|
||||
if (first_column->isNumeric())
|
||||
{
|
||||
uint8_t addrv6_buf[IPV6_BINARY_LENGTH];
|
||||
@ -451,6 +361,8 @@ void IPAddressDictionary::has(const Columns & key_columns, const DataTypes & key
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void IPAddressDictionary::createAttributes()
|
||||
@ -652,6 +564,13 @@ void IPAddressDictionary::addAttributeSize(const Attribute & attribute)
|
||||
bucket_count = vec.size();
|
||||
}
|
||||
|
||||
template <>
|
||||
void IPAddressDictionary::addAttributeSize<String>(const Attribute & attribute)
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
}
|
||||
|
||||
void IPAddressDictionary::calculateBytesAllocated()
|
||||
{
|
||||
if (auto * ipv4_col = std::get_if<IPv4Container>(&ip_column))
|
||||
@ -669,64 +588,18 @@ void IPAddressDictionary::calculateBytesAllocated()
|
||||
|
||||
for (const auto & attribute : attributes)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
addAttributeSize<UInt8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
addAttributeSize<UInt16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
addAttributeSize<UInt32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
addAttributeSize<UInt64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
addAttributeSize<UInt128>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
addAttributeSize<Int8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
addAttributeSize<Int16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
addAttributeSize<Int32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
addAttributeSize<Int64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
addAttributeSize<Float32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
addAttributeSize<Float64>(attribute);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
addAttributeSize<Decimal32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
addAttributeSize<Decimal64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
addAttributeSize<Decimal128>(attribute);
|
||||
break;
|
||||
addAttributeSize<AttributeType>(attribute);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
@ -734,65 +607,27 @@ void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field
|
||||
attribute.maps.emplace<ContainerType<T>>();
|
||||
}
|
||||
|
||||
template <>
|
||||
void IPAddressDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
attribute.null_values = null_value.isNull() ? String() : null_value.get<String>();
|
||||
attribute.maps.emplace<ContainerType<StringRef>>();
|
||||
attribute.string_arena = std::make_unique<Arena>();
|
||||
}
|
||||
|
||||
IPAddressDictionary::Attribute IPAddressDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||
{
|
||||
Attribute attr{type, {}, {}, {}};
|
||||
|
||||
switch (type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
createAttributeImpl<UInt8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
createAttributeImpl<UInt16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
createAttributeImpl<UInt32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
createAttributeImpl<UInt64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
createAttributeImpl<UInt128>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
createAttributeImpl<Int8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
createAttributeImpl<Int16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
createAttributeImpl<Int32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
createAttributeImpl<Int64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
createAttributeImpl<Float32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
createAttributeImpl<Float64>(attr, null_value);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
createAttributeImpl<Decimal32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
createAttributeImpl<Decimal64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
createAttributeImpl<Decimal128>(attr, null_value);
|
||||
break;
|
||||
createAttributeImpl<AttributeType>(attr, null_value);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
|
||||
attr.null_values = null_value.isNull() ? String() : null_value.get<String>();
|
||||
attr.maps.emplace<ContainerType<StringRef>>();
|
||||
attr.string_arena = std::make_unique<Arena>();
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
@ -802,9 +637,12 @@ const uint8_t * IPAddressDictionary::getIPv6FromOffset(const IPAddressDictionary
|
||||
return reinterpret_cast<const uint8_t *>(&ipv6_col[i * IPV6_BINARY_LENGTH]);
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto first_column = key_columns.front();
|
||||
const auto rows = first_column->size();
|
||||
@ -841,7 +679,7 @@ void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
|
||||
set_value(i, static_cast<OutputType>(vec[row_idx[*found_it]]));
|
||||
}
|
||||
else
|
||||
set_value(i, get_default(i));
|
||||
set_value(i, default_value_extractor[i]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -876,13 +714,16 @@ void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
|
||||
mask_column[*found_it] == mask))
|
||||
set_value(i, static_cast<OutputType>(vec[row_idx[*found_it]]));
|
||||
else
|
||||
set_value(i, get_default(i));
|
||||
set_value(i, default_value_extractor[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void IPAddressDictionary::getItemsImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto first_column = key_columns.front();
|
||||
const auto rows = first_column->size();
|
||||
@ -891,7 +732,7 @@ void IPAddressDictionary::getItemsImpl(
|
||||
if (unlikely(key_columns.size() == 2))
|
||||
{
|
||||
getItemsByTwoKeyColumnsImpl<AttributeType, OutputType>(
|
||||
attribute, key_columns, std::forward<ValueSetter>(set_value), std::forward<DefaultGetter>(get_default));
|
||||
attribute, key_columns, std::forward<ValueSetter>(set_value), default_value_extractor);
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
return;
|
||||
}
|
||||
@ -909,7 +750,7 @@ void IPAddressDictionary::getItemsImpl(
|
||||
if (found != ipNotFound())
|
||||
set_value(i, static_cast<OutputType>(vec[*found]));
|
||||
else
|
||||
set_value(i, get_default(i));
|
||||
set_value(i, default_value_extractor[i]);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -924,7 +765,7 @@ void IPAddressDictionary::getItemsImpl(
|
||||
if (found != ipNotFound())
|
||||
set_value(i, static_cast<OutputType>(vec[*found]));
|
||||
else
|
||||
set_value(i, get_default(i));
|
||||
set_value(i, default_value_extractor[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -940,45 +781,24 @@ void IPAddressDictionary::setAttributeValueImpl(Attribute & attribute, const T v
|
||||
|
||||
void IPAddressDictionary::setAttributeValue(Attribute & attribute, const Field & value)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
return setAttributeValueImpl<UInt8>(attribute, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
return setAttributeValueImpl<UInt16>(attribute, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
return setAttributeValueImpl<UInt32>(attribute, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
return setAttributeValueImpl<UInt64>(attribute, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
return setAttributeValueImpl<UInt128>(attribute, value.get<UInt128>());
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
return setAttributeValueImpl<Int8>(attribute, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
return setAttributeValueImpl<Int16>(attribute, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
return setAttributeValueImpl<Int32>(attribute, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
return setAttributeValueImpl<Int64>(attribute, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
return setAttributeValueImpl<Float32>(attribute, value.get<Float64>());
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
return setAttributeValueImpl<Float64>(attribute, value.get<Float64>());
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
return setAttributeValueImpl<Decimal32>(attribute, value.get<Decimal32>());
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
return setAttributeValueImpl<Decimal64>(attribute, value.get<Decimal64>());
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
return setAttributeValueImpl<Decimal128>(attribute, value.get<Decimal128>());
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
const auto & string = value.get<String>();
|
||||
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
return setAttributeValueImpl<StringRef>(attribute, StringRef{string_in_arena, string.size()});
|
||||
setAttributeValueImpl<StringRef>(attribute, StringRef{string_in_arena, string.size()});
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
setAttributeValueImpl<AttributeType>(attribute, value.get<NearestFieldType<AttributeType>>());
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
const IPAddressDictionary::Attribute & IPAddressDictionary::getAttribute(const std::string & attribute_name) const
|
||||
@ -1045,7 +865,7 @@ static auto keyViewGetter()
|
||||
|
||||
BlockInputStreamPtr IPAddressDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<IPAddressDictionary, UInt64>;
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
|
||||
|
||||
|
||||
const bool is_ipv4 = std::get_if<IPv4Container>(&ip_column) != nullptr;
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -61,91 +62,16 @@ public:
|
||||
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const;
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const;
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
@ -211,17 +137,23 @@ private:
|
||||
void calculateBytesAllocated();
|
||||
|
||||
template <typename T>
|
||||
void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
static Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsByTwoKeyColumnsImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void
|
||||
getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsImpl(
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename T>
|
||||
void setAttributeValueImpl(Attribute & attribute, const T value);
|
||||
|
@ -5,6 +5,8 @@
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
#include <numeric>
|
||||
|
||||
@ -92,6 +94,61 @@ bool IPolygonDictionary::isInjective(const std::string &) const
|
||||
return false;
|
||||
}
|
||||
|
||||
ColumnPtr IPolygonDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
ColumnPtr result;
|
||||
|
||||
const auto index = getAttributeIndex(attribute_name);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
auto keys_size = key_columns.front()->size();
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto & null_value = std::get<AttributeType>(null_values[index]);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
auto column_string = ColumnString::create();
|
||||
auto * out = column.get();
|
||||
|
||||
getItemsImpl<String, StringRef>(
|
||||
index,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); },
|
||||
default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
getItemsImpl<AttributeType, AttributeType>(
|
||||
index,
|
||||
key_columns,
|
||||
[&](const size_t row, const auto value) { return out[row] = value; },
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BlockInputStreamPtr IPolygonDictionary::getBlockInputStream(const Names &, size_t) const
|
||||
{
|
||||
// TODO: In order for this to work one would first have to support retrieving arrays from dictionaries.
|
||||
@ -255,8 +312,12 @@ std::vector<IPolygonDictionary::Point> IPolygonDictionary::extractPoints(const C
|
||||
return result;
|
||||
}
|
||||
|
||||
void IPolygonDictionary::has(const Columns & key_columns, const DataTypes &, PaddedPODArray<UInt8> & out) const
|
||||
ColumnUInt8::Ptr IPolygonDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
|
||||
{
|
||||
auto size = key_columns.front()->size();
|
||||
auto result = ColumnUInt8::create(size);
|
||||
auto& out = result->getData();
|
||||
|
||||
size_t row = 0;
|
||||
for (const auto & pt : extractPoints(key_columns))
|
||||
{
|
||||
@ -266,6 +327,8 @@ void IPolygonDictionary::has(const Columns & key_columns, const DataTypes &, Pad
|
||||
}
|
||||
|
||||
query_count.fetch_add(row, std::memory_order_relaxed);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name) const
|
||||
@ -276,152 +339,12 @@ size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name)
|
||||
return it->second;
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void IPolygonDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto ind = getAttributeIndex(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
const auto null_value = std::get<TYPE>(null_values[ind]); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
ind, \
|
||||
key_columns, \
|
||||
[&](const size_t row, const auto value) { out[row] = value; }, \
|
||||
[&](const size_t) { return null_value; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void IPolygonDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ColumnString * out) const
|
||||
{
|
||||
const auto ind = getAttributeIndex(attribute_name);
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto & null_value = StringRef{std::get<String>(null_values[ind])};
|
||||
|
||||
getItemsImpl<String, StringRef>(
|
||||
ind,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return null_value; });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void IPolygonDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes &, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto ind = getAttributeIndex(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
ind, \
|
||||
key_columns, \
|
||||
[&](const size_t row, const auto value) { out[row] = value; }, \
|
||||
[&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void IPolygonDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const
|
||||
{
|
||||
const auto ind = getAttributeIndex(attribute_name);
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsImpl<String, StringRef>(
|
||||
ind,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t row) { return def->getDataAt(row); });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void IPolygonDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes &, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto ind = getAttributeIndex(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
ind, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void IPolygonDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const String & def,
|
||||
ColumnString * const out) const
|
||||
{
|
||||
const auto ind = getAttributeIndex(attribute_name);
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsImpl<String, StringRef>(
|
||||
ind,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return StringRef{def}; });
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void IPolygonDictionary::getItemsImpl(
|
||||
size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
size_t attribute_ind,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto points = extractPoints(key_columns);
|
||||
|
||||
@ -437,7 +360,7 @@ void IPolygonDictionary::getItemsImpl(
|
||||
id = ids[id];
|
||||
if (!found)
|
||||
{
|
||||
set_value(i, static_cast<OutputType>(get_default(i)));
|
||||
set_value(i, static_cast<OutputType>(default_value_extractor[i]));
|
||||
continue;
|
||||
}
|
||||
if constexpr (std::is_same<AttributeType, String>::value)
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -78,101 +79,19 @@ public:
|
||||
|
||||
bool isInjective(const std::string & attribute_name) const override;
|
||||
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
|
||||
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
|
||||
/** Functions used to retrieve attributes of specific type by key. */
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes &, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes &, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const;
|
||||
|
||||
/** Checks whether or not a point can be found in one of the polygons in the dictionary.
|
||||
* The check is performed for multiple points represented by columns of their x and y coordinates.
|
||||
* The boolean result is written to out.
|
||||
*/
|
||||
// TODO: Refactor the whole dictionary design to perform stronger checks, i.e. make this an override.
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
/** Single coordinate type. */
|
||||
using Coord = Float32;
|
||||
/** A two-dimensional point in Euclidean coordinates. */
|
||||
@ -224,8 +143,12 @@ private:
|
||||
void appendNullValue(AttributeUnderlyingType type, const Field & value);
|
||||
|
||||
/** Helper function for retrieving the value of an attribute by key. */
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void getItemsImpl(size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsImpl(
|
||||
size_t attribute_ind,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
/** A mapping from the names of the attributes to their index in the two vectors defined below. */
|
||||
std::map<std::string, size_t> attribute_index_by_name;
|
||||
|
@ -37,26 +37,6 @@ protected:
|
||||
Block getBlock(size_t start, size_t length) const override;
|
||||
|
||||
private:
|
||||
template <typename Type>
|
||||
using DictionaryGetter = void (DictionaryType::*)(
|
||||
const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, PaddedPODArray<Type> &) const;
|
||||
|
||||
template <typename Type>
|
||||
using DictionaryDecimalGetter = void (DictionaryType::*)(
|
||||
const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, DecimalPaddedPODArray<Type> &) const;
|
||||
|
||||
template <typename AttributeType, typename Getter>
|
||||
ColumnPtr getColumnFromAttribute(
|
||||
Getter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const PaddedPODArray<Int64> & dates,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & concrete_dictionary) const;
|
||||
ColumnPtr getColumnFromAttributeString(
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const PaddedPODArray<Int64> & dates,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & concrete_dictionary) const;
|
||||
template <typename T>
|
||||
ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array) const;
|
||||
|
||||
@ -122,41 +102,6 @@ Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getBlock(
|
||||
return fillBlock(block_ids, block_start_dates, block_end_dates);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename RangeType, typename Key>
|
||||
template <typename AttributeType, typename Getter>
|
||||
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttribute(
|
||||
Getter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const PaddedPODArray<Int64> & dates,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & concrete_dictionary) const
|
||||
{
|
||||
if constexpr (IsDecimalNumber<AttributeType>)
|
||||
{
|
||||
auto column = ColumnDecimal<AttributeType>::create(ids_to_fill.size(), 0); /// NOTE: There's wrong scale here, but it's unused.
|
||||
(concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column->getData());
|
||||
return column;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto column_vector = ColumnVector<AttributeType>::create(ids_to_fill.size());
|
||||
(concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column_vector->getData());
|
||||
return column_vector;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename RangeType, typename Key>
|
||||
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttributeString(
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const PaddedPODArray<Int64> & dates,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & concrete_dictionary) const
|
||||
{
|
||||
auto column_string = ColumnString::create();
|
||||
concrete_dictionary.getString(attribute.name, ids_to_fill, dates, column_string.get());
|
||||
return column_string;
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename RangeType, typename Key>
|
||||
template <typename T>
|
||||
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromPODArray(const PaddedPODArray<T> & array) const
|
||||
@ -168,7 +113,6 @@ ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getCo
|
||||
return column_vector;
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename RangeType, typename Key>
|
||||
template <typename DictionarySpecialAttributeType, typename T>
|
||||
void RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::addSpecialColumn(
|
||||
@ -216,68 +160,24 @@ Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::fillBlock
|
||||
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
|
||||
|
||||
addSpecialColumn(structure.id, std::make_shared<DataTypeUInt64>(), "ID", names, ids_to_fill, columns);
|
||||
auto ids_column = columns.back().column;
|
||||
addSpecialColumn(structure.range_min, structure.range_max->type, "Range Start", names, block_start_dates, columns);
|
||||
addSpecialColumn(structure.range_max, structure.range_max->type, "Range End", names, block_end_dates, columns);
|
||||
|
||||
auto date_key = makeDateKey(block_start_dates, block_end_dates);
|
||||
auto date_column = getColumnFromPODArray(date_key);
|
||||
|
||||
for (const auto idx : ext::range(0, structure.attributes.size()))
|
||||
{
|
||||
const DictionaryAttribute & attribute = structure.attributes[idx];
|
||||
if (names.find(attribute.name) != names.end())
|
||||
{
|
||||
ColumnPtr column;
|
||||
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
|
||||
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids_to_fill, date_key, attribute, *dictionary)
|
||||
switch (attribute.underlying_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt128);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int8);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int16);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Float32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Float64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Decimal32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Decimal64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Decimal128);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
column = getColumnFromAttributeString(ids_to_fill, date_key, attribute, *dictionary);
|
||||
break;
|
||||
}
|
||||
#undef GET_COLUMN_FORM_ATTRIBUTE
|
||||
ColumnPtr column = dictionary->getColumn(
|
||||
attribute.name,
|
||||
attribute.type,
|
||||
{ids_column, date_column},
|
||||
{std::make_shared<DataTypeUInt64>(), std::make_shared<DataTypeInt64>()},
|
||||
nullptr);
|
||||
columns.emplace_back(column, attribute.type, attribute.name);
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,8 @@
|
||||
#include <ext/range.h>
|
||||
#include "DictionaryFactory.h"
|
||||
#include "RangeDictionaryBlockInputStream.h"
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
@ -50,6 +52,7 @@ namespace ErrorCodes
|
||||
extern const int DICTIONARY_IS_EMPTY;
|
||||
extern const int TYPE_MISMATCH;
|
||||
extern const int UNSUPPORTED_METHOD;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
bool RangeHashedDictionary::Range::isCorrectDate(const RangeStorageType & date)
|
||||
@ -85,66 +88,101 @@ RangeHashedDictionary::RangeHashedDictionary(
|
||||
calculateBytesAllocated();
|
||||
}
|
||||
|
||||
|
||||
#define DECLARE_MULTIPLE_GETTER(TYPE) \
|
||||
void RangeHashedDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<RangeStorageType> & dates, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItems<TYPE>(attribute, ids, dates, out); \
|
||||
}
|
||||
DECLARE_MULTIPLE_GETTER(UInt8)
|
||||
DECLARE_MULTIPLE_GETTER(UInt16)
|
||||
DECLARE_MULTIPLE_GETTER(UInt32)
|
||||
DECLARE_MULTIPLE_GETTER(UInt64)
|
||||
DECLARE_MULTIPLE_GETTER(UInt128)
|
||||
DECLARE_MULTIPLE_GETTER(Int8)
|
||||
DECLARE_MULTIPLE_GETTER(Int16)
|
||||
DECLARE_MULTIPLE_GETTER(Int32)
|
||||
DECLARE_MULTIPLE_GETTER(Int64)
|
||||
DECLARE_MULTIPLE_GETTER(Float32)
|
||||
DECLARE_MULTIPLE_GETTER(Float64)
|
||||
DECLARE_MULTIPLE_GETTER(Decimal32)
|
||||
DECLARE_MULTIPLE_GETTER(Decimal64)
|
||||
DECLARE_MULTIPLE_GETTER(Decimal128)
|
||||
#undef DECLARE_MULTIPLE_GETTER
|
||||
|
||||
void RangeHashedDictionary::getString(
|
||||
ColumnPtr RangeHashedDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
ColumnString * out) const
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::utString);
|
||||
const auto & attr = *std::get<Ptr<StringRef>>(attribute.maps);
|
||||
const auto & null_value = std::get<String>(attribute.null_values);
|
||||
ColumnPtr result;
|
||||
|
||||
for (const auto i : ext::range(0, ids.size()))
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
auto keys_size = key_columns.front()->size();
|
||||
|
||||
/// Cast second column to storage type
|
||||
Columns modified_key_columns = key_columns;
|
||||
|
||||
auto range_storage_column = key_columns[1];
|
||||
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types[1], ""};
|
||||
|
||||
auto range_column_storage_type = std::make_shared<DataTypeInt64>();
|
||||
modified_key_columns[1] = castColumnAccurate(column_to_cast, range_column_storage_type);
|
||||
|
||||
ColumnUInt8::MutablePtr col_null_map_to;
|
||||
ColumnUInt8::Container * vec_null_map_to = nullptr;
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
const auto * it = attr.find(ids[i]);
|
||||
if (it)
|
||||
{
|
||||
const auto date = dates[i];
|
||||
const auto & ranges_and_values = it->getMapped();
|
||||
const auto val_it
|
||||
= std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<StringRef> & v)
|
||||
{
|
||||
return v.range.contains(date);
|
||||
});
|
||||
col_null_map_to = ColumnUInt8::create(keys_size, false);
|
||||
vec_null_map_to = &col_null_map_to->getData();
|
||||
}
|
||||
|
||||
const auto string_ref = val_it != std::end(ranges_and_values) ? val_it->value : StringRef{null_value};
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
|
||||
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
|
||||
getItemsImpl<ValueType, ValueType>(
|
||||
attribute,
|
||||
modified_key_columns,
|
||||
[&](const size_t row, const StringRef value, bool is_null)
|
||||
{
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
out->insertData(value.data, value.size);
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
else
|
||||
out->insertData(null_value.data(), null_value.size());
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
getItemsImpl<ValueType, ValueType>(
|
||||
attribute,
|
||||
modified_key_columns,
|
||||
[&](const size_t row, const auto value, bool is_null)
|
||||
{
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
out[row] = value;
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
result = ColumnNullable::create(result, std::move(col_null_map_to));
|
||||
}
|
||||
|
||||
query_count.fetch_add(ids.size(), std::memory_order_relaxed);
|
||||
return result;
|
||||
}
|
||||
|
||||
ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Columns &, const DataTypes &) const
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
|
||||
"Has not supported", getDictionaryID().getNameForLogs());
|
||||
}
|
||||
|
||||
void RangeHashedDictionary::createAttributes()
|
||||
{
|
||||
@ -154,7 +192,7 @@ void RangeHashedDictionary::createAttributes()
|
||||
for (const auto & attribute : dict_struct.attributes)
|
||||
{
|
||||
attribute_index_by_name.emplace(attribute.name, attributes.size());
|
||||
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
|
||||
attributes.push_back(createAttribute(attribute, attribute.null_value));
|
||||
|
||||
if (attribute.hierarchical)
|
||||
throw Exception{ErrorCodes::BAD_ARGUMENTS, "Hierarchical attributes not supported by {} dictionary.",
|
||||
@ -220,66 +258,27 @@ void RangeHashedDictionary::addAttributeSize(const Attribute & attribute)
|
||||
bucket_count = map_ref->getBufferSizeInCells();
|
||||
}
|
||||
|
||||
template <>
|
||||
void RangeHashedDictionary::addAttributeSize<String>(const Attribute & attribute)
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
}
|
||||
|
||||
void RangeHashedDictionary::calculateBytesAllocated()
|
||||
{
|
||||
bytes_allocated += attributes.size() * sizeof(attributes.front());
|
||||
|
||||
for (const auto & attribute : attributes)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
addAttributeSize<UInt8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
addAttributeSize<UInt16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
addAttributeSize<UInt32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
addAttributeSize<UInt64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
addAttributeSize<UInt128>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
addAttributeSize<Int8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
addAttributeSize<Int16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
addAttributeSize<Int32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
addAttributeSize<Int64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
addAttributeSize<Float32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
addAttributeSize<Float64>(attribute);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
addAttributeSize<AttributeType>(attribute);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
addAttributeSize<Decimal32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
addAttributeSize<Decimal64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
addAttributeSize<Decimal128>(attribute);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
}
|
||||
|
||||
@ -290,125 +289,80 @@ void RangeHashedDictionary::createAttributeImpl(Attribute & attribute, const Fie
|
||||
attribute.maps = std::make_unique<Collection<T>>();
|
||||
}
|
||||
|
||||
RangeHashedDictionary::Attribute
|
||||
RangeHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||
template <>
|
||||
void RangeHashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
Attribute attr{type, {}, {}, {}};
|
||||
attribute.string_arena = std::make_unique<Arena>();
|
||||
const String & string = null_value.get<String>();
|
||||
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
|
||||
attribute.maps = std::make_unique<Collection<StringRef>>();
|
||||
}
|
||||
|
||||
switch (type)
|
||||
RangeHashedDictionary::Attribute
|
||||
RangeHashedDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
|
||||
{
|
||||
Attribute attr{attribute.underlying_type, attribute.is_nullable, {}, {}, {}};
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
createAttributeImpl<UInt8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
createAttributeImpl<UInt16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
createAttributeImpl<UInt32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
createAttributeImpl<UInt64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
createAttributeImpl<UInt128>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
createAttributeImpl<Int8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
createAttributeImpl<Int16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
createAttributeImpl<Int32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
createAttributeImpl<Int64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
createAttributeImpl<Float32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
createAttributeImpl<Float64>(attr, null_value);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
createAttributeImpl<AttributeType>(attr, null_value);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
createAttributeImpl<Decimal32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
createAttributeImpl<Decimal64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
createAttributeImpl<Decimal128>(attr, null_value);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
attr.null_values = null_value.get<String>();
|
||||
attr.maps = std::make_unique<Collection<StringRef>>();
|
||||
attr.string_arena = std::make_unique<Arena>();
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
|
||||
template <typename OutputType>
|
||||
void RangeHashedDictionary::getItems(
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
PaddedPODArray<OutputType> & out) const
|
||||
{
|
||||
if (false) {} // NOLINT
|
||||
#define DISPATCH(TYPE) else if (attribute.type == AttributeUnderlyingType::ut##TYPE) getItemsImpl<TYPE, OutputType>(attribute, ids, dates, out);
|
||||
DISPATCH(UInt8)
|
||||
DISPATCH(UInt16)
|
||||
DISPATCH(UInt32)
|
||||
DISPATCH(UInt64)
|
||||
DISPATCH(UInt128)
|
||||
DISPATCH(Int8)
|
||||
DISPATCH(Int16)
|
||||
DISPATCH(Int32)
|
||||
DISPATCH(Int64)
|
||||
DISPATCH(Float32)
|
||||
DISPATCH(Float64)
|
||||
DISPATCH(Decimal32)
|
||||
DISPATCH(Decimal64)
|
||||
DISPATCH(Decimal128)
|
||||
#undef DISPATCH
|
||||
else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void RangeHashedDictionary::getItemsImpl(
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
PaddedPODArray<OutputType> & out) const
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
|
||||
const auto null_value = std::get<AttributeType>(attribute.null_values);
|
||||
PaddedPODArray<Key> key_backup_storage;
|
||||
PaddedPODArray<RangeStorageType> range_backup_storage;
|
||||
|
||||
for (const auto i : ext::range(0, ids.size()))
|
||||
const PaddedPODArray<Key> & ids = getColumnVectorData(this, key_columns[0], key_backup_storage);
|
||||
const PaddedPODArray<RangeStorageType> & dates = getColumnVectorData(this, key_columns[1], range_backup_storage);
|
||||
|
||||
const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
|
||||
|
||||
for (const auto row : ext::range(0, ids.size()))
|
||||
{
|
||||
const auto it = attr.find(ids[i]);
|
||||
const auto it = attr.find(ids[row]);
|
||||
if (it)
|
||||
{
|
||||
const auto date = dates[i];
|
||||
const auto date = dates[row];
|
||||
const auto & ranges_and_values = it->getMapped();
|
||||
const auto val_it
|
||||
= std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<AttributeType> & v)
|
||||
{
|
||||
return v.range.contains(date);
|
||||
});
|
||||
const auto val_it = std::find_if(
|
||||
std::begin(ranges_and_values),
|
||||
std::end(ranges_and_values),
|
||||
[date](const Value<AttributeType> & v)
|
||||
{
|
||||
return v.range.contains(date);
|
||||
});
|
||||
|
||||
out[i] = static_cast<OutputType>(val_it != std::end(ranges_and_values) ? val_it->value : null_value); // NOLINT
|
||||
if (val_it != std::end(ranges_and_values))
|
||||
{
|
||||
auto& value = val_it->value;
|
||||
|
||||
if (value)
|
||||
set_value(row, static_cast<OutputType>(*value), false); // NOLINT
|
||||
else
|
||||
set_value(row, default_value_extractor[row], true);
|
||||
}
|
||||
else
|
||||
{
|
||||
set_value(row, default_value_extractor[row], false);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
out[i] = static_cast<OutputType>(null_value); // NOLINT
|
||||
set_value(row, default_value_extractor[row], false);
|
||||
}
|
||||
}
|
||||
|
||||
@ -417,9 +371,32 @@ void RangeHashedDictionary::getItemsImpl(
|
||||
|
||||
|
||||
template <typename T>
|
||||
void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const T value)
|
||||
void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const Field & value)
|
||||
{
|
||||
auto & map = *std::get<Ptr<T>>(attribute.maps);
|
||||
using ValueType = std::conditional_t<std::is_same_v<T, String>, StringRef, T>;
|
||||
auto & map = *std::get<Ptr<ValueType>>(attribute.maps);
|
||||
|
||||
Value<ValueType> value_to_insert;
|
||||
|
||||
if (attribute.is_nullable && value.isNull())
|
||||
{
|
||||
value_to_insert = { range, {} };
|
||||
}
|
||||
else
|
||||
{
|
||||
if constexpr (std::is_same_v<T, String>)
|
||||
{
|
||||
const auto & string = value.get<String>();
|
||||
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
const StringRef string_ref{string_in_arena, string.size()};
|
||||
value_to_insert = Value<ValueType>{ range, { string_ref }};
|
||||
}
|
||||
else
|
||||
{
|
||||
value_to_insert = Value<ValueType>{ range, { value.get<NearestFieldType<ValueType>>() }};
|
||||
}
|
||||
}
|
||||
|
||||
const auto it = map.find(id);
|
||||
|
||||
if (it)
|
||||
@ -427,92 +404,28 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const K
|
||||
auto & values = it->getMapped();
|
||||
|
||||
const auto insert_it
|
||||
= std::lower_bound(std::begin(values), std::end(values), range, [](const Value<T> & lhs, const Range & rhs_range)
|
||||
= std::lower_bound(std::begin(values), std::end(values), range, [](const Value<ValueType> & lhs, const Range & rhs_range)
|
||||
{
|
||||
return lhs.range < rhs_range;
|
||||
});
|
||||
|
||||
values.insert(insert_it, Value<T>{range, value});
|
||||
values.insert(insert_it, std::move(value_to_insert));
|
||||
}
|
||||
else
|
||||
map.insert({id, Values<T>{Value<T>{range, value}}});
|
||||
map.insert({id, Values<ValueType>{std::move(value_to_insert)}});
|
||||
}
|
||||
|
||||
void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
setAttributeValueImpl<UInt8>(attribute, id, range, value.get<UInt64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
setAttributeValueImpl<UInt16>(attribute, id, range, value.get<UInt64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
setAttributeValueImpl<UInt32>(attribute, id, range, value.get<UInt64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
setAttributeValueImpl<UInt64>(attribute, id, range, value.get<UInt64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
setAttributeValueImpl<UInt128>(attribute, id, range, value.get<UInt128>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
setAttributeValueImpl<Int8>(attribute, id, range, value.get<Int64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
setAttributeValueImpl<Int16>(attribute, id, range, value.get<Int64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
setAttributeValueImpl<Int32>(attribute, id, range, value.get<Int64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
setAttributeValueImpl<Int64>(attribute, id, range, value.get<Int64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
setAttributeValueImpl<Float32>(attribute, id, range, value.get<Float64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
setAttributeValueImpl<Float64>(attribute, id, range, value.get<Float64>());
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
setAttributeValueImpl<Decimal32>(attribute, id, range, value.get<Decimal32>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
setAttributeValueImpl<Decimal64>(attribute, id, range, value.get<Decimal64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
setAttributeValueImpl<Decimal128>(attribute, id, range, value.get<Decimal128>());
|
||||
break;
|
||||
setAttributeValueImpl<AttributeType>(attribute, id, range, value);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
auto & map = *std::get<Ptr<StringRef>>(attribute.maps);
|
||||
const auto & string = value.get<String>();
|
||||
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
const StringRef string_ref{string_in_arena, string.size()};
|
||||
|
||||
auto * it = map.find(id);
|
||||
|
||||
if (it)
|
||||
{
|
||||
auto & values = it->getMapped();
|
||||
|
||||
const auto insert_it = std::lower_bound(
|
||||
std::begin(values), std::end(values), range, [](const Value<StringRef> & lhs, const Range & rhs_range)
|
||||
{
|
||||
return lhs.range < rhs_range;
|
||||
});
|
||||
|
||||
values.insert(insert_it, Value<StringRef>{range, string_ref});
|
||||
}
|
||||
else
|
||||
map.insert({id, Values<StringRef>{Value<StringRef>{range, string_ref}}});
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
const RangeHashedDictionary::Attribute & RangeHashedDictionary::getAttribute(const std::string & attribute_name) const
|
||||
@ -541,55 +454,18 @@ void RangeHashedDictionary::getIdsAndDates(
|
||||
{
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
getIdsAndDates<UInt8>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
getIdsAndDates<UInt16>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
getIdsAndDates<UInt32>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
getIdsAndDates<UInt64>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
getIdsAndDates<UInt128>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
getIdsAndDates<Int8>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
getIdsAndDates<Int16>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
getIdsAndDates<Int32>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
getIdsAndDates<Int64>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
getIdsAndDates<Float32>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
getIdsAndDates<Float64>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
getIdsAndDates<StringRef>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
getIdsAndDates<Decimal32>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
getIdsAndDates<Decimal64>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
getIdsAndDates<Decimal128>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
}
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
getIdsAndDates<StringRef>(attribute, ids, start_dates, end_dates);
|
||||
else
|
||||
getIdsAndDates<AttributeType>(attribute, ids, start_dates, end_dates);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
template <typename T, typename RangeType>
|
||||
|
@ -1,16 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <variant>
|
||||
#include <optional>
|
||||
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -52,38 +54,18 @@ public:
|
||||
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
|
||||
}
|
||||
|
||||
typedef Int64 RangeStorageType;
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::range; }
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
#define DECLARE_MULTIPLE_GETTER(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<RangeStorageType> & dates, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE_MULTIPLE_GETTER(UInt8)
|
||||
DECLARE_MULTIPLE_GETTER(UInt16)
|
||||
DECLARE_MULTIPLE_GETTER(UInt32)
|
||||
DECLARE_MULTIPLE_GETTER(UInt64)
|
||||
DECLARE_MULTIPLE_GETTER(UInt128)
|
||||
DECLARE_MULTIPLE_GETTER(Int8)
|
||||
DECLARE_MULTIPLE_GETTER(Int16)
|
||||
DECLARE_MULTIPLE_GETTER(Int32)
|
||||
DECLARE_MULTIPLE_GETTER(Int64)
|
||||
DECLARE_MULTIPLE_GETTER(Float32)
|
||||
DECLARE_MULTIPLE_GETTER(Float64)
|
||||
DECLARE_MULTIPLE_GETTER(Decimal32)
|
||||
DECLARE_MULTIPLE_GETTER(Decimal64)
|
||||
DECLARE_MULTIPLE_GETTER(Decimal128)
|
||||
#undef DECLARE_MULTIPLE_GETTER
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
ColumnString * out) const;
|
||||
using RangeStorageType = Int64;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
@ -101,7 +83,7 @@ private:
|
||||
struct Value final
|
||||
{
|
||||
Range range;
|
||||
T value;
|
||||
std::optional<T> value;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
@ -111,10 +93,14 @@ private:
|
||||
template <typename T>
|
||||
using Ptr = std::unique_ptr<Collection<T>>;
|
||||
|
||||
using NullableSet = HashSet<Key, DefaultHash<Key>>;
|
||||
|
||||
struct Attribute final
|
||||
{
|
||||
public:
|
||||
AttributeUnderlyingType type;
|
||||
bool is_nullable;
|
||||
|
||||
std::variant<
|
||||
UInt8,
|
||||
UInt16,
|
||||
@ -130,7 +116,7 @@ private:
|
||||
Decimal128,
|
||||
Float32,
|
||||
Float64,
|
||||
String>
|
||||
StringRef>
|
||||
null_values;
|
||||
std::variant<
|
||||
Ptr<UInt8>,
|
||||
@ -162,30 +148,21 @@ private:
|
||||
void calculateBytesAllocated();
|
||||
|
||||
template <typename T>
|
||||
void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
|
||||
|
||||
|
||||
template <typename OutputType>
|
||||
void getItems(
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
PaddedPODArray<OutputType> & out) const;
|
||||
|
||||
template <typename AttributeType, typename OutputType>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsImpl(
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
PaddedPODArray<OutputType> & out) const;
|
||||
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename T>
|
||||
void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const T value);
|
||||
static void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const Field & value);
|
||||
|
||||
void setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value);
|
||||
static void setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value);
|
||||
|
||||
const Attribute & getAttribute(const std::string & attribute_name) const;
|
||||
|
||||
|
@ -22,7 +22,8 @@
|
||||
#include <filesystem>
|
||||
#include <city.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -445,7 +446,7 @@ void SSDCachePartition::flush()
|
||||
|
||||
template <typename Out, typename GetDefault>
|
||||
void SSDCachePartition::getValue(const size_t attribute_index, const PaddedPODArray<UInt64> & ids,
|
||||
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
|
||||
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & default_value_extractor,
|
||||
std::chrono::system_clock::time_point now) const
|
||||
{
|
||||
auto set_value = [&](const size_t index, ReadBuffer & buf)
|
||||
@ -456,7 +457,7 @@ void SSDCachePartition::getValue(const size_t attribute_index, const PaddedPODAr
|
||||
if (metadata.expiresAt() > now)
|
||||
{
|
||||
if (metadata.isDefault())
|
||||
out[index] = get_default(index);
|
||||
out[index] = default_value_extractor[index];
|
||||
else
|
||||
{
|
||||
ignoreFromBufferToAttributeIndex(attribute_index, buf);
|
||||
@ -939,14 +940,14 @@ SSDCacheStorage::~SSDCacheStorage()
|
||||
template <typename Out, typename GetDefault>
|
||||
void SSDCacheStorage::getValue(const size_t attribute_index, const PaddedPODArray<UInt64> & ids,
|
||||
ResultArrayType<Out> & out, std::unordered_map<Key, std::vector<size_t>> & not_found,
|
||||
GetDefault & get_default, std::chrono::system_clock::time_point now) const
|
||||
GetDefault & default_value_extractor, std::chrono::system_clock::time_point now) const
|
||||
{
|
||||
std::vector<bool> found(ids.size(), false);
|
||||
|
||||
{
|
||||
std::shared_lock lock(rw_lock);
|
||||
for (const auto & partition : partitions)
|
||||
partition->getValue<Out>(attribute_index, ids, out, found, get_default, now);
|
||||
partition->getValue<Out>(attribute_index, ids, out, found, default_value_extractor, now);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < ids.size(); ++i)
|
||||
@ -1327,102 +1328,62 @@ SSDCacheDictionary::SSDCacheDictionary(
|
||||
createAttributes();
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void SSDCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto index = getAttributeIndex(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
||||
const auto null_value = std::get<TYPE>(null_values[index]); /* NOLINT */ \
|
||||
getItemsNumberImpl<TYPE, TYPE>(index, ids, out, [&](const size_t) { return null_value; }); /* NOLINT */ \
|
||||
}
|
||||
ColumnPtr SSDCacheDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
ColumnPtr result;
|
||||
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
auto keys_size = ids.size();
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void SSDCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto index = getAttributeIndex(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>( \
|
||||
index, \
|
||||
ids, \
|
||||
out, \
|
||||
[&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
const auto index = getAttributeIndex(attribute_name);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void SSDCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto index = getAttributeIndex(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>( \
|
||||
index, \
|
||||
ids, \
|
||||
out, \
|
||||
[&](const size_t) { return def; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto & null_value = std::get<AttributeType>(null_values[index]);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
getItemsStringImpl(index, ids, column.get(), default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
getItemsNumberImpl<AttributeType, AttributeType>(index, ids, out, default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultGetter>
|
||||
void SSDCacheDictionary::getItemsNumberImpl(
|
||||
const size_t attribute_index, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
|
||||
const size_t attribute_index,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ResultArrayType<OutputType> & out,
|
||||
DefaultGetter & default_value_extractor) const
|
||||
{
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
std::unordered_map<Key, std::vector<size_t>> not_found_ids;
|
||||
storage.getValue<OutputType>(attribute_index, ids, out, not_found_ids, get_default, now);
|
||||
storage.getValue<OutputType>(attribute_index, ids, out, not_found_ids, default_value_extractor, now);
|
||||
if (not_found_ids.empty())
|
||||
return;
|
||||
|
||||
@ -1440,42 +1401,17 @@ void SSDCacheDictionary::getItemsNumberImpl(
|
||||
[&](const size_t id)
|
||||
{
|
||||
for (const size_t row : not_found_ids[id])
|
||||
out[row] = get_default(row);
|
||||
out[row] = default_value_extractor[row];
|
||||
},
|
||||
getLifetime());
|
||||
}
|
||||
|
||||
void SSDCacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
|
||||
{
|
||||
const auto index = getAttributeIndex(attribute_name);
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto null_value = StringRef{std::get<String>(null_values[index])};
|
||||
|
||||
getItemsStringImpl(index, ids, out, [&](const size_t) { return null_value; });
|
||||
}
|
||||
|
||||
void SSDCacheDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
|
||||
{
|
||||
const auto index = getAttributeIndex(attribute_name);
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsStringImpl(index, ids, out, [&](const size_t row) { return def->getDataAt(row); });
|
||||
}
|
||||
|
||||
void SSDCacheDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
|
||||
{
|
||||
const auto index = getAttributeIndex(attribute_name);
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsStringImpl(index, ids, out, [&](const size_t) { return StringRef{def}; });
|
||||
}
|
||||
|
||||
template <typename DefaultGetter>
|
||||
void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const PaddedPODArray<Key> & ids,
|
||||
ColumnString * out, DefaultGetter && get_default) const
|
||||
void SSDCacheDictionary::getItemsStringImpl(
|
||||
const size_t attribute_index,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ColumnString * out,
|
||||
DefaultGetter & default_value_extractor) const
|
||||
{
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
@ -1494,7 +1430,7 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const
|
||||
{
|
||||
if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
|
||||
{
|
||||
auto to_insert = get_default(row);
|
||||
auto to_insert = default_value_extractor[row];
|
||||
out->insertData(to_insert.data, to_insert.size);
|
||||
++default_index;
|
||||
}
|
||||
@ -1525,7 +1461,7 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const
|
||||
const auto & id = ids[row];
|
||||
if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
|
||||
{
|
||||
auto to_insert = get_default(row);
|
||||
auto to_insert = default_value_extractor[row];
|
||||
out->insertData(to_insert.data, to_insert.size);
|
||||
++default_index;
|
||||
}
|
||||
@ -1539,20 +1475,30 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const
|
||||
}
|
||||
else
|
||||
{
|
||||
auto to_insert = get_default(row);
|
||||
auto to_insert = default_value_extractor[row];
|
||||
out->insertData(to_insert.data, to_insert.size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SSDCacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
|
||||
ColumnUInt8::Ptr SSDCacheDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
|
||||
{
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
|
||||
auto result = ColumnUInt8::create(ext::size(ids));
|
||||
auto& out = result->getData();
|
||||
|
||||
const auto rows = ext::size(ids);
|
||||
for (const auto row : ext::range(0, rows))
|
||||
out[row] = false;
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
std::unordered_map<Key, std::vector<size_t>> not_found_ids;
|
||||
storage.has(ids, out, not_found_ids, now);
|
||||
if (not_found_ids.empty())
|
||||
return;
|
||||
return result;
|
||||
|
||||
std::vector<Key> required_ids(not_found_ids.size());
|
||||
std::transform(std::begin(not_found_ids), std::end(not_found_ids), std::begin(required_ids), [](const auto & pair) { return pair.first; });
|
||||
@ -1571,11 +1517,13 @@ void SSDCacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UIn
|
||||
out[row] = false;
|
||||
},
|
||||
getLifetime());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BlockInputStreamPtr SSDCacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<SSDCacheDictionary, Key>;
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<Key>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, storage.getCachedIds(), column_names);
|
||||
}
|
||||
|
||||
|
@ -2,11 +2,15 @@
|
||||
|
||||
#if defined(__linux__) || defined(__FreeBSD__)
|
||||
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <list>
|
||||
#include <shared_mutex>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include <Poco/Logger.h>
|
||||
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/ArenaWithFreeLists.h>
|
||||
@ -16,12 +20,11 @@
|
||||
#include <Core/Block.h>
|
||||
#include <Dictionaries/BucketCache.h>
|
||||
#include <IO/HashingWriteBuffer.h>
|
||||
#include <list>
|
||||
#include <pcg_random.hpp>
|
||||
#include <Poco/Logger.h>
|
||||
#include <shared_mutex>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -109,7 +112,7 @@ public:
|
||||
|
||||
template <typename Out, typename GetDefault>
|
||||
void getValue(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
|
||||
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
|
||||
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & default_value_extractor,
|
||||
std::chrono::system_clock::time_point now) const;
|
||||
|
||||
void getString(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
|
||||
@ -232,7 +235,7 @@ public:
|
||||
template <typename Out, typename GetDefault>
|
||||
void getValue(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
|
||||
ResultArrayType<Out> & out, std::unordered_map<Key, std::vector<size_t>> & not_found,
|
||||
GetDefault & get_default, std::chrono::system_clock::time_point now) const;
|
||||
GetDefault & default_value_extractor, std::chrono::system_clock::time_point now) const;
|
||||
|
||||
void getString(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
|
||||
StringRefs & refs, ArenaWithFreeLists & arena, std::unordered_map<Key, std::vector<size_t>> & not_found,
|
||||
@ -351,77 +354,20 @@ public:
|
||||
|
||||
std::exception_ptr getLastException() const override { return storage.getLastException(); }
|
||||
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
|
||||
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = SSDCacheStorage::ResultArrayType<T>;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void
|
||||
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * def, ColumnString * out)
|
||||
const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * out) const;
|
||||
|
||||
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
private:
|
||||
@ -434,11 +380,17 @@ private:
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultGetter>
|
||||
void getItemsNumberImpl(
|
||||
size_t attribute_index, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
|
||||
size_t attribute_index,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ResultArrayType<OutputType> & out,
|
||||
DefaultGetter & default_value_extractor) const;
|
||||
|
||||
template <typename DefaultGetter>
|
||||
void getItemsStringImpl(size_t attribute_index, const PaddedPODArray<Key> & ids,
|
||||
ColumnString * out, DefaultGetter && get_default) const;
|
||||
void getItemsStringImpl(
|
||||
size_t attribute_index,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ColumnString * out,
|
||||
DefaultGetter & default_value_extractor) const;
|
||||
|
||||
const std::string name;
|
||||
const DictionaryStructure dict_struct;
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <Common/ProfilingScopedRWLock.h>
|
||||
#include <Common/MemorySanitizer.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
#include <IO/AIO.h>
|
||||
@ -23,7 +24,7 @@
|
||||
#include <filesystem>
|
||||
#include <city.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -461,8 +462,12 @@ void SSDComplexKeyCachePartition::flush()
|
||||
|
||||
template <typename Out, typename GetDefault>
|
||||
void SSDComplexKeyCachePartition::getValue(
|
||||
const size_t attribute_index, const Columns & key_columns, const DataTypes & key_types,
|
||||
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
|
||||
const size_t attribute_index,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
ResultArrayType<Out> & out,
|
||||
std::vector<bool> & found,
|
||||
GetDefault & default_value_extractor,
|
||||
std::chrono::system_clock::time_point now) const
|
||||
{
|
||||
auto set_value = [&](const size_t index, ReadBuffer & buf)
|
||||
@ -474,7 +479,7 @@ void SSDComplexKeyCachePartition::getValue(
|
||||
if (metadata.expiresAt() > now)
|
||||
{
|
||||
if (metadata.isDefault())
|
||||
out[index] = get_default(index);
|
||||
out[index] = default_value_extractor[index];
|
||||
else
|
||||
{
|
||||
ignoreFromBufferToAttributeIndex(attribute_index, buf);
|
||||
@ -520,7 +525,7 @@ void SSDComplexKeyCachePartition::getString(const size_t attribute_index,
|
||||
getImpl(key_columns, key_types, set_value, found);
|
||||
}
|
||||
|
||||
void SSDComplexKeyCachePartition::has(
|
||||
void SSDComplexKeyCachePartition::hasKeys(
|
||||
const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
|
||||
std::vector<bool> & found, std::chrono::system_clock::time_point now) const
|
||||
{
|
||||
@ -1018,7 +1023,7 @@ void SSDComplexKeyCacheStorage::getString(
|
||||
hit_count.fetch_add(n - count_not_found, std::memory_order_release);
|
||||
}
|
||||
|
||||
void SSDComplexKeyCacheStorage::has(
|
||||
void SSDComplexKeyCacheStorage::hasKeys(
|
||||
const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
|
||||
std::unordered_map<KeyRef, std::vector<size_t>> & not_found,
|
||||
TemporalComplexKeysPool & not_found_pool, std::chrono::system_clock::time_point now) const
|
||||
@ -1031,7 +1036,7 @@ void SSDComplexKeyCacheStorage::has(
|
||||
{
|
||||
std::shared_lock lock(rw_lock);
|
||||
for (const auto & partition : partitions)
|
||||
partition->has(key_columns, key_types, out, found, now);
|
||||
partition->hasKeys(key_columns, key_types, out, found, now);
|
||||
}
|
||||
|
||||
size_t count_not_found = 0;
|
||||
@ -1376,96 +1381,64 @@ SSDComplexKeyCacheDictionary::SSDComplexKeyCacheDictionary(
|
||||
createAttributes();
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void SSDComplexKeyCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto index = getAttributeIndex(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
||||
const auto null_value = std::get<TYPE>(null_values[index]); /* NOLINT */ \
|
||||
getItemsNumberImpl<TYPE, TYPE>(index, key_columns, key_types, out, [&](const size_t) { return null_value; }); /* NOLINT */ \
|
||||
}
|
||||
ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
ColumnPtr result;
|
||||
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void SSDComplexKeyCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto index = getAttributeIndex(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(index, key_columns, key_types, out, [&](const size_t row) { return def[row]; }); /* NOLINT */ \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
const auto index = getAttributeIndex(attribute_name);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void SSDComplexKeyCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto index = getAttributeIndex(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(index, key_columns, key_types, out, [&](const size_t) { return def; }); /* NOLINT */ \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
auto keys_size = key_columns.front()->size();
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultGetter>
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto & null_value = std::get<AttributeType>(null_values[index]);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
getItemsStringImpl(index, key_columns, key_types, out, default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
getItemsNumberImpl<AttributeType, AttributeType>(
|
||||
index,
|
||||
key_columns,
|
||||
key_types,
|
||||
out,
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
|
||||
void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
|
||||
const size_t attribute_index,
|
||||
const Columns & key_columns, const DataTypes & key_types,
|
||||
ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
ResultArrayType<OutputType> & out,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
assert(dict_struct.key);
|
||||
assert(key_columns.size() == key_types.size());
|
||||
@ -1476,7 +1449,7 @@ void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
|
||||
|
||||
TemporalComplexKeysPool not_found_pool;
|
||||
std::unordered_map<KeyRef, std::vector<size_t>> not_found_keys;
|
||||
storage.getValue<OutputType>(attribute_index, key_columns, key_types, out, not_found_keys, not_found_pool, get_default, now);
|
||||
storage.getValue<OutputType>(attribute_index, key_columns, key_types, out, not_found_keys, not_found_pool, default_value_extractor, now);
|
||||
if (not_found_keys.empty())
|
||||
return;
|
||||
|
||||
@ -1503,54 +1476,17 @@ void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
|
||||
[&](const auto key)
|
||||
{
|
||||
for (const size_t row : not_found_keys[key])
|
||||
out[row] = get_default(row);
|
||||
out[row] = default_value_extractor[row];
|
||||
},
|
||||
getLifetime());
|
||||
}
|
||||
|
||||
void SSDComplexKeyCacheDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
|
||||
{
|
||||
const auto index = getAttributeIndex(attribute_name);
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto null_value = StringRef{std::get<String>(null_values[index])};
|
||||
|
||||
getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t) { return null_value; });
|
||||
}
|
||||
|
||||
void SSDComplexKeyCacheDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns, const DataTypes & key_types,
|
||||
const ColumnString * const def, ColumnString * const out) const
|
||||
{
|
||||
const auto index = getAttributeIndex(attribute_name);
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t row) { return def->getDataAt(row); });
|
||||
}
|
||||
|
||||
void SSDComplexKeyCacheDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const
|
||||
{
|
||||
const auto index = getAttributeIndex(attribute_name);
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t) { return StringRef{def}; });
|
||||
}
|
||||
|
||||
template <typename DefaultGetter>
|
||||
void SSDComplexKeyCacheDictionary::getItemsStringImpl(
|
||||
const size_t attribute_index,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
ColumnString * out,
|
||||
DefaultGetter && get_default) const
|
||||
DictionaryDefaultValueExtractor<String> & default_value_extractor) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
@ -1576,7 +1512,7 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
|
||||
{
|
||||
if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
|
||||
{
|
||||
auto to_insert = get_default(row);
|
||||
auto to_insert = default_value_extractor[row];
|
||||
out->insertData(to_insert.data, to_insert.size);
|
||||
++default_index;
|
||||
}
|
||||
@ -1619,7 +1555,7 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
|
||||
SCOPE_EXIT(tmp_keys_pool.rollback(key));
|
||||
if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
|
||||
{
|
||||
auto to_insert = get_default(row);
|
||||
auto to_insert = default_value_extractor[row];
|
||||
out->insertData(to_insert.data, to_insert.size);
|
||||
++default_index;
|
||||
}
|
||||
@ -1633,26 +1569,31 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
|
||||
}
|
||||
else
|
||||
{
|
||||
auto to_insert = get_default(row);
|
||||
auto to_insert = default_value_extractor[row];
|
||||
out->insertData(to_insert.data, to_insert.size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SSDComplexKeyCacheDictionary::has(
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
PaddedPODArray<UInt8> & out) const
|
||||
ColumnUInt8::Ptr SSDComplexKeyCacheDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
|
||||
auto result = ColumnUInt8::create(rows_num);
|
||||
auto& out = result->getData();
|
||||
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
out[row] = false;
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
std::unordered_map<KeyRef, std::vector<size_t>> not_found_keys;
|
||||
TemporalComplexKeysPool not_found_pool;
|
||||
storage.has(key_columns, key_types, out, not_found_keys, not_found_pool, now);
|
||||
storage.hasKeys(key_columns, key_types, out, not_found_keys, not_found_pool, now);
|
||||
if (not_found_keys.empty())
|
||||
return;
|
||||
return result;
|
||||
|
||||
std::vector<KeyRef> required_keys(not_found_keys.size());
|
||||
std::transform(std::begin(not_found_keys), std::end(not_found_keys), std::begin(required_keys), [](const auto & pair) { return pair.first; });
|
||||
@ -1681,6 +1622,8 @@ void SSDComplexKeyCacheDictionary::has(
|
||||
out[row] = false;
|
||||
},
|
||||
getLifetime());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BlockInputStreamPtr SSDComplexKeyCacheDictionary::getBlockInputStream(
|
||||
|
@ -2,11 +2,13 @@
|
||||
|
||||
#if defined(OS_LINUX) || defined(__FreeBSD__)
|
||||
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <list>
|
||||
#include <shared_mutex>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/Arena.h>
|
||||
@ -19,13 +21,11 @@
|
||||
#include <Dictionaries/BucketCache.h>
|
||||
#include <ext/scope_guard.h>
|
||||
#include <IO/HashingWriteBuffer.h>
|
||||
#include <list>
|
||||
#include <pcg_random.hpp>
|
||||
#include <Poco/Logger.h>
|
||||
#include <shared_mutex>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include "DictionaryStructure.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -313,7 +313,7 @@ public:
|
||||
template <typename Out, typename GetDefault>
|
||||
void getValue(const size_t attribute_index,
|
||||
const Columns & key_columns, const DataTypes & key_types,
|
||||
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
|
||||
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & default_value_extractor,
|
||||
std::chrono::system_clock::time_point now) const;
|
||||
|
||||
void getString(const size_t attribute_index,
|
||||
@ -321,7 +321,7 @@ public:
|
||||
StringRefs & refs, ArenaWithFreeLists & arena, std::vector<bool> & found,
|
||||
std::vector<size_t> & default_ids, std::chrono::system_clock::time_point now) const;
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types,
|
||||
void hasKeys(const Columns & key_columns, const DataTypes & key_types,
|
||||
ResultArrayType<UInt8> & out, std::vector<bool> & found,
|
||||
std::chrono::system_clock::time_point now) const;
|
||||
|
||||
@ -459,7 +459,7 @@ public:
|
||||
TemporalComplexKeysPool & not_found_pool,
|
||||
std::vector<size_t> & default_ids, std::chrono::system_clock::time_point now) const;
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
|
||||
void hasKeys(const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
|
||||
std::unordered_map<KeyRef, std::vector<size_t>> & not_found,
|
||||
TemporalComplexKeysPool & not_found_pool, std::chrono::system_clock::time_point now) const;
|
||||
|
||||
@ -569,88 +569,20 @@ public:
|
||||
|
||||
std::exception_ptr getLastException() const override { return storage.getLastException(); }
|
||||
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
|
||||
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = SSDComplexKeyCacheStorage::ResultArrayType<T>;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const Columns & key_columns,
|
||||
const DataTypes & key_types, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const Columns & key_columns,
|
||||
const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const Columns & key_columns,
|
||||
const DataTypes & key_types, const String & def, ColumnString * const out) const;
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
private:
|
||||
@ -661,17 +593,20 @@ private:
|
||||
AttributeValueVariant createAttributeNullValueWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
void createAttributes();
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
|
||||
void getItemsNumberImpl(
|
||||
const size_t attribute_index,
|
||||
const Columns & key_columns, const DataTypes & key_types,
|
||||
ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
ResultArrayType<OutputType> & out,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename DefaultGetter>
|
||||
void getItemsStringImpl(
|
||||
const size_t attribute_index,
|
||||
const Columns & key_columns, const DataTypes & key_types,
|
||||
ColumnString * out, DefaultGetter && get_default) const;
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
ColumnString * out,
|
||||
DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
|
||||
|
||||
const std::string name;
|
||||
const DictionaryStructure dict_struct;
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include "registerDictionaries.h"
|
||||
|
||||
#if USE_ODBC
|
||||
# include <Poco/Data/ODBC/Connector.h>
|
||||
# include <Poco/Data/ODBC/Connector.h> // Y_IGNORE
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
|
@ -6,31 +6,25 @@ LIBRARY()
|
||||
PEERDIR(
|
||||
clickhouse/src/Common
|
||||
contrib/libs/poco/Data
|
||||
contrib/libs/poco/Data/ODBC
|
||||
contrib/libs/poco/MongoDB
|
||||
contrib/libs/poco/Redis
|
||||
contrib/libs/sparsehash
|
||||
)
|
||||
|
||||
IF (USE_ODBC)
|
||||
PEERDIR(contrib/libs/poco/Data/ODBC)
|
||||
ENDIF ()
|
||||
|
||||
NO_COMPILER_WARNINGS()
|
||||
|
||||
|
||||
SRCS(
|
||||
CacheDictionary.cpp
|
||||
CacheDictionary_generate1.cpp
|
||||
CacheDictionary_generate2.cpp
|
||||
CacheDictionary_generate3.cpp
|
||||
CassandraBlockInputStream.cpp
|
||||
CassandraDictionarySource.cpp
|
||||
CassandraHelpers.cpp
|
||||
ClickHouseDictionarySource.cpp
|
||||
ComplexKeyCacheDictionary.cpp
|
||||
ComplexKeyCacheDictionary_createAttributeWithType.cpp
|
||||
ComplexKeyCacheDictionary_generate1.cpp
|
||||
ComplexKeyCacheDictionary_generate2.cpp
|
||||
ComplexKeyCacheDictionary_generate3.cpp
|
||||
ComplexKeyCacheDictionary_setAttributeValue.cpp
|
||||
ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp
|
||||
ComplexKeyDirectDictionary.cpp
|
||||
ComplexKeyHashedDictionary.cpp
|
||||
DictionaryBlockInputStreamBase.cpp
|
||||
|
@ -5,12 +5,15 @@ LIBRARY()
|
||||
PEERDIR(
|
||||
clickhouse/src/Common
|
||||
contrib/libs/poco/Data
|
||||
contrib/libs/poco/Data/ODBC
|
||||
contrib/libs/poco/MongoDB
|
||||
contrib/libs/poco/Redis
|
||||
contrib/libs/sparsehash
|
||||
)
|
||||
|
||||
IF (USE_ODBC)
|
||||
PEERDIR(contrib/libs/poco/Data/ODBC)
|
||||
ENDIF ()
|
||||
|
||||
NO_COMPILER_WARNINGS()
|
||||
|
||||
|
||||
|
@ -146,20 +146,32 @@ Block MySQLBlockInputStream::readImpl()
|
||||
const auto value = row[position_mapping[index]];
|
||||
const auto & sample = description.sample_block.getByPosition(index);
|
||||
|
||||
bool is_type_nullable = description.types[index].second;
|
||||
|
||||
if (!value.isNull())
|
||||
{
|
||||
if (description.types[index].second)
|
||||
if (is_type_nullable)
|
||||
{
|
||||
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[index]);
|
||||
const auto & data_type = assert_cast<const DataTypeNullable &>(*sample.type);
|
||||
insertValue(*data_type.getNestedType(), column_nullable.getNestedColumn(), description.types[index].first, value);
|
||||
column_nullable.getNullMapData().emplace_back(0);
|
||||
column_nullable.getNullMapData().emplace_back(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
insertValue(*sample.type, *columns[index], description.types[index].first, value);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
insertDefaultValue(*columns[index], *sample.column);
|
||||
|
||||
if (is_type_nullable)
|
||||
{
|
||||
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[index]);
|
||||
column_nullable.getNullMapData().back() = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
++num_rows;
|
||||
|
@ -504,7 +504,7 @@ private:
|
||||
using namespace traits_;
|
||||
using namespace impl_;
|
||||
|
||||
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
|
||||
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
|
||||
class FunctionBinaryArithmetic : public IFunction
|
||||
{
|
||||
static constexpr const bool is_plus = IsOperation<Op>::plus;
|
||||
@ -542,16 +542,54 @@ class FunctionBinaryArithmetic : public IFunction
|
||||
>(type, std::forward<F>(f));
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
static bool castTypeNoFloats(const IDataType * type, F && f)
|
||||
{
|
||||
return castTypeToEither<
|
||||
DataTypeUInt8,
|
||||
DataTypeUInt16,
|
||||
DataTypeUInt32,
|
||||
DataTypeUInt64,
|
||||
DataTypeUInt256,
|
||||
DataTypeInt8,
|
||||
DataTypeInt16,
|
||||
DataTypeInt32,
|
||||
DataTypeInt64,
|
||||
DataTypeInt128,
|
||||
DataTypeInt256,
|
||||
DataTypeDate,
|
||||
DataTypeDateTime,
|
||||
DataTypeDecimal<Decimal32>,
|
||||
DataTypeDecimal<Decimal64>,
|
||||
DataTypeDecimal<Decimal128>,
|
||||
DataTypeDecimal<Decimal256>,
|
||||
DataTypeFixedString
|
||||
>(type, std::forward<F>(f));
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
static bool castBothTypes(const IDataType * left, const IDataType * right, F && f)
|
||||
{
|
||||
return castType(left, [&](const auto & left_)
|
||||
if constexpr (valid_on_float_arguments)
|
||||
{
|
||||
return castType(right, [&](const auto & right_)
|
||||
return castType(left, [&](const auto & left_)
|
||||
{
|
||||
return f(left_, right_);
|
||||
return castType(right, [&](const auto & right_)
|
||||
{
|
||||
return f(left_, right_);
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
return castTypeNoFloats(left, [&](const auto & left_)
|
||||
{
|
||||
return castTypeNoFloats(right, [&](const auto & right_)
|
||||
{
|
||||
return f(left_, right_);
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static FunctionOverloadResolverPtr
|
||||
@ -1319,11 +1357,11 @@ public:
|
||||
};
|
||||
|
||||
|
||||
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
|
||||
class FunctionBinaryArithmeticWithConstants : public FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>
|
||||
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
|
||||
class FunctionBinaryArithmeticWithConstants : public FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>
|
||||
{
|
||||
public:
|
||||
using Base = FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>;
|
||||
using Base = FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>;
|
||||
using Monotonicity = typename Base::Monotonicity;
|
||||
|
||||
static FunctionPtr create(
|
||||
@ -1488,7 +1526,7 @@ private:
|
||||
DataTypePtr return_type;
|
||||
};
|
||||
|
||||
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
|
||||
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
|
||||
class BinaryArithmeticOverloadResolver : public IFunctionOverloadResolverImpl
|
||||
{
|
||||
public:
|
||||
@ -1512,14 +1550,14 @@ public:
|
||||
|| (arguments[1].column && isColumnConst(*arguments[1].column))))
|
||||
{
|
||||
return std::make_unique<DefaultFunction>(
|
||||
FunctionBinaryArithmeticWithConstants<Op, Name, valid_on_default_arguments>::create(
|
||||
FunctionBinaryArithmeticWithConstants<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::create(
|
||||
arguments[0], arguments[1], return_type, context),
|
||||
ext::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
|
||||
return_type);
|
||||
}
|
||||
|
||||
return std::make_unique<DefaultFunction>(
|
||||
FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>::create(context),
|
||||
FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::create(context),
|
||||
ext::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
|
||||
return_type);
|
||||
}
|
||||
@ -1530,7 +1568,7 @@ public:
|
||||
throw Exception(
|
||||
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + ", should be 2",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
return FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>::getReturnTypeImplStatic(arguments, context);
|
||||
return FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::getReturnTypeImplStatic(arguments, context);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -212,4 +212,12 @@ checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments)
|
||||
return {nested_columns, offsets->data()};
|
||||
}
|
||||
|
||||
bool areTypesEqual(const DataTypePtr & lhs, const DataTypePtr & rhs)
|
||||
{
|
||||
const auto & lhs_name = lhs->getName();
|
||||
const auto & rhs_name = rhs->getName();
|
||||
|
||||
return lhs_name == rhs_name;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -152,4 +152,8 @@ void validateFunctionArgumentTypes(const IFunction & func, const ColumnsWithType
|
||||
std::pair<std::vector<const IColumn *>, const ColumnArray::Offset *>
|
||||
checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments);
|
||||
|
||||
|
||||
/// Check if two types are equal
|
||||
bool areTypesEqual(const DataTypePtr & lhs, const DataTypePtr & rhs);
|
||||
|
||||
}
|
||||
|
@ -38,8 +38,8 @@ void registerFunctionsExternalDictionaries(FunctionFactory & factory)
|
||||
factory.registerFunction<FunctionDictGetDateTimeOrDefault>();
|
||||
factory.registerFunction<FunctionDictGetUUIDOrDefault>();
|
||||
factory.registerFunction<FunctionDictGetStringOrDefault>();
|
||||
factory.registerFunction<FunctionDictGetNoType>();
|
||||
factory.registerFunction<FunctionDictGetNoTypeOrDefault>();
|
||||
factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::get>>();
|
||||
factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::getOrDefault>>();
|
||||
}
|
||||
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -101,7 +101,8 @@ struct IntegerRoundingComputation
|
||||
return scale;
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE T computeImpl(T x, T scale)
|
||||
/// Integer overflow is Ok.
|
||||
static ALWAYS_INLINE_NO_SANITIZE_UNDEFINED T computeImpl(T x, T scale)
|
||||
{
|
||||
switch (rounding_mode)
|
||||
{
|
||||
|
@ -21,7 +21,7 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline constexpr bool is_gcd_lcm_implemeted = !(is_big_int_v<T> || std::is_floating_point_v<T>);
|
||||
inline constexpr bool is_gcd_lcm_implemeted = !is_big_int_v<T>;
|
||||
|
||||
template <typename A, typename B, typename Impl, typename Name>
|
||||
struct GCDLCMImpl
|
||||
@ -33,7 +33,7 @@ struct GCDLCMImpl
|
||||
static inline std::enable_if_t<!is_gcd_lcm_implemeted<Result>, Result>
|
||||
apply(A, B)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not implemented for big integers and floats", Name::name);
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not implemented for big integers", Name::name);
|
||||
}
|
||||
|
||||
template <typename Result = ResultType>
|
||||
|
@ -353,6 +353,9 @@ bool FunctionArrayEnumerateExtended<Derived>::execute128bit(
|
||||
keys_bytes += key_sizes[j];
|
||||
}
|
||||
|
||||
if (keys_bytes > 16)
|
||||
return false;
|
||||
|
||||
executeMethod<MethodFixed>(offsets, columns, key_sizes, nullptr, res_values);
|
||||
return true;
|
||||
}
|
||||
|
@ -37,7 +37,7 @@ struct BitAndImpl
|
||||
};
|
||||
|
||||
struct NameBitAnd { static constexpr auto name = "bitAnd"; };
|
||||
using FunctionBitAnd = BinaryArithmeticOverloadResolver<BitAndImpl, NameBitAnd, true>;
|
||||
using FunctionBitAnd = BinaryArithmeticOverloadResolver<BitAndImpl, NameBitAnd, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -36,7 +36,7 @@ struct BitOrImpl
|
||||
};
|
||||
|
||||
struct NameBitOr { static constexpr auto name = "bitOr"; };
|
||||
using FunctionBitOr = BinaryArithmeticOverloadResolver<BitOrImpl, NameBitOr, true>;
|
||||
using FunctionBitOr = BinaryArithmeticOverloadResolver<BitOrImpl, NameBitOr, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -43,7 +43,7 @@ struct BitRotateLeftImpl
|
||||
};
|
||||
|
||||
struct NameBitRotateLeft { static constexpr auto name = "bitRotateLeft"; };
|
||||
using FunctionBitRotateLeft = BinaryArithmeticOverloadResolver<BitRotateLeftImpl, NameBitRotateLeft>;
|
||||
using FunctionBitRotateLeft = BinaryArithmeticOverloadResolver<BitRotateLeftImpl, NameBitRotateLeft, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -42,7 +42,7 @@ struct BitRotateRightImpl
|
||||
};
|
||||
|
||||
struct NameBitRotateRight { static constexpr auto name = "bitRotateRight"; };
|
||||
using FunctionBitRotateRight = BinaryArithmeticOverloadResolver<BitRotateRightImpl, NameBitRotateRight>;
|
||||
using FunctionBitRotateRight = BinaryArithmeticOverloadResolver<BitRotateRightImpl, NameBitRotateRight, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -42,7 +42,7 @@ struct BitShiftLeftImpl
|
||||
};
|
||||
|
||||
struct NameBitShiftLeft { static constexpr auto name = "bitShiftLeft"; };
|
||||
using FunctionBitShiftLeft = BinaryArithmeticOverloadResolver<BitShiftLeftImpl, NameBitShiftLeft>;
|
||||
using FunctionBitShiftLeft = BinaryArithmeticOverloadResolver<BitShiftLeftImpl, NameBitShiftLeft, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -42,7 +42,7 @@ struct BitShiftRightImpl
|
||||
};
|
||||
|
||||
struct NameBitShiftRight { static constexpr auto name = "bitShiftRight"; };
|
||||
using FunctionBitShiftRight = BinaryArithmeticOverloadResolver<BitShiftRightImpl, NameBitShiftRight>;
|
||||
using FunctionBitShiftRight = BinaryArithmeticOverloadResolver<BitShiftRightImpl, NameBitShiftRight, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,7 @@ struct BitTestImpl
|
||||
};
|
||||
|
||||
struct NameBitTest { static constexpr auto name = "bitTest"; };
|
||||
using FunctionBitTest = BinaryArithmeticOverloadResolver<BitTestImpl, NameBitTest>;
|
||||
using FunctionBitTest = BinaryArithmeticOverloadResolver<BitTestImpl, NameBitTest, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -36,7 +36,7 @@ struct BitXorImpl
|
||||
};
|
||||
|
||||
struct NameBitXor { static constexpr auto name = "bitXor"; };
|
||||
using FunctionBitXor = BinaryArithmeticOverloadResolver<BitXorImpl, NameBitXor, true>;
|
||||
using FunctionBitXor = BinaryArithmeticOverloadResolver<BitXorImpl, NameBitXor, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -23,7 +23,7 @@ struct GCDImpl : public GCDLCMImpl<A, B, GCDImpl<A, B>, NameGCD>
|
||||
}
|
||||
};
|
||||
|
||||
using FunctionGCD = BinaryArithmeticOverloadResolver<GCDImpl, NameGCD, false>;
|
||||
using FunctionGCD = BinaryArithmeticOverloadResolver<GCDImpl, NameGCD, false, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -95,7 +95,7 @@ void geodistInit()
|
||||
|
||||
sphere_metric_meters_lut[i] = static_cast<float>(sqr((EARTH_DIAMETER * PI / 360) * cos(latitude)));
|
||||
|
||||
sphere_metric_lut[i] = cosf(latitude);
|
||||
sphere_metric_lut[i] = sqrf(cosf(latitude));
|
||||
}
|
||||
}
|
||||
|
||||
@ -182,7 +182,7 @@ float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg)
|
||||
/// (Remember how a plane flies from Moscow to New York)
|
||||
/// But if longitude is close but latitude is different enough, there is no difference between meridian and great circle line.
|
||||
|
||||
float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, KTABLE] indexes
|
||||
float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, METRIC_LUT_SIZE] indexes
|
||||
size_t latitude_midpoint_index = floatToIndex(latitude_midpoint) & (METRIC_LUT_SIZE - 1);
|
||||
|
||||
/// This is linear interpolation between two table items at index "latitude_midpoint_index" and "latitude_midpoint_index + 1".
|
||||
|
@ -54,7 +54,7 @@ struct LCMImpl : public GCDLCMImpl<A, B, LCMImpl<A, B>, NameLCM>
|
||||
}
|
||||
};
|
||||
|
||||
using FunctionLCM = BinaryArithmeticOverloadResolver<LCMImpl, NameLCM, false>;
|
||||
using FunctionLCM = BinaryArithmeticOverloadResolver<LCMImpl, NameLCM, false, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -69,6 +69,10 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion()
|
||||
boost::algorithm::to_lower(matched_region);
|
||||
region = matched_region;
|
||||
}
|
||||
else
|
||||
{
|
||||
region = Aws::Region::AWS_GLOBAL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -609,10 +609,10 @@ bool ActionsDAG::hasStatefulFunctions() const
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ActionsDAG::empty() const
|
||||
bool ActionsDAG::trivial() const
|
||||
{
|
||||
for (const auto & node : nodes)
|
||||
if (node.type != ActionType::INPUT)
|
||||
if (node.type == ActionType::FUNCTION || node.type == ActionType::ARRAY_JOIN)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
@ -223,7 +223,7 @@ public:
|
||||
|
||||
bool hasArrayJoin() const;
|
||||
bool hasStatefulFunctions() const;
|
||||
bool empty() const; /// If actions only contain inputs.
|
||||
bool trivial() const; /// If actions has no functions or array join.
|
||||
|
||||
const ActionsSettings & getSettings() const { return settings; }
|
||||
|
||||
|
@ -50,7 +50,6 @@
|
||||
#include <Interpreters/SystemLog.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/DDLWorker.h>
|
||||
#include <Common/DNSResolver.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/UncompressedCache.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
|
@ -51,7 +51,6 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
|
||||
#include <Interpreters/DNSCacheUpdater.h>
|
||||
#include <Common/SensitiveDataMasker.h>
|
||||
|
||||
#include <Processors/Transforms/LimitsCheckingTransform.h>
|
||||
|
56
src/Processors/QueryPlan/Optimizations/Optimizations.h
Normal file
56
src/Processors/QueryPlan/Optimizations/Optimizations.h
Normal file
@ -0,0 +1,56 @@
|
||||
#pragma once
|
||||
#include <Processors/QueryPlan/QueryPlan.h>
|
||||
#include <array>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace QueryPlanOptimizations
|
||||
{
|
||||
|
||||
/// This is the main function which optimizes the whole QueryPlan tree.
|
||||
void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes);
|
||||
|
||||
/// Optimization is a function applied to QueryPlan::Node.
|
||||
/// It can read and update subtree of specified node.
|
||||
/// It return the number of updated layers of subtree if some change happened.
|
||||
/// It must guarantee that the structure of tree is correct.
|
||||
///
|
||||
/// New nodes should be added to QueryPlan::Nodes list.
|
||||
/// It is not needed to remove old nodes from the list.
|
||||
struct Optimization
|
||||
{
|
||||
using Function = size_t (*)(QueryPlan::Node *, QueryPlan::Nodes &);
|
||||
const Function apply = nullptr;
|
||||
const char * name;
|
||||
};
|
||||
|
||||
/// Move ARRAY JOIN up if possible.
|
||||
size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
|
||||
|
||||
/// Move LimitStep down if possible.
|
||||
size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
|
||||
|
||||
/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
|
||||
size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes);
|
||||
|
||||
/// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep
|
||||
/// Replace chain `FilterStep -> ExpressionStep` to single FilterStep
|
||||
size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
|
||||
|
||||
inline const auto & getOptimizations()
|
||||
{
|
||||
static const std::array<Optimization, 4> optimizations =
|
||||
{{
|
||||
{tryLiftUpArrayJoin, "liftUpArrayJoin"},
|
||||
{tryPushDownLimit, "pushDownLimit"},
|
||||
{trySplitFilter, "splitFilter"},
|
||||
{tryMergeExpressions, "mergeExpressions"},
|
||||
}};
|
||||
|
||||
return optimizations;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
85
src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
Normal file
85
src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
Normal file
@ -0,0 +1,85 @@
|
||||
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
|
||||
#include <Processors/QueryPlan/FilterStep.h>
|
||||
#include <Processors/QueryPlan/ExpressionStep.h>
|
||||
#include <Processors/QueryPlan/ArrayJoinStep.h>
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
#include <Interpreters/ArrayJoinAction.h>
|
||||
|
||||
namespace DB::QueryPlanOptimizations
|
||||
{
|
||||
|
||||
size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
|
||||
{
|
||||
if (parent_node->children.size() != 1)
|
||||
return 0;
|
||||
|
||||
QueryPlan::Node * child_node = parent_node->children.front();
|
||||
|
||||
auto & parent = parent_node->step;
|
||||
auto & child = child_node->step;
|
||||
auto * expression_step = typeid_cast<ExpressionStep *>(parent.get());
|
||||
auto * filter_step = typeid_cast<FilterStep *>(parent.get());
|
||||
auto * array_join_step = typeid_cast<ArrayJoinStep *>(child.get());
|
||||
|
||||
if (!(expression_step || filter_step) || !array_join_step)
|
||||
return 0;
|
||||
|
||||
const auto & array_join = array_join_step->arrayJoin();
|
||||
const auto & expression = expression_step ? expression_step->getExpression()
|
||||
: filter_step->getExpression();
|
||||
|
||||
auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns);
|
||||
|
||||
/// No actions can be moved before ARRAY JOIN.
|
||||
if (split_actions.first->trivial())
|
||||
return 0;
|
||||
|
||||
auto description = parent->getStepDescription();
|
||||
|
||||
/// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin.
|
||||
if (split_actions.second->trivial())
|
||||
{
|
||||
auto expected_header = parent->getOutputStream().header;
|
||||
|
||||
/// Expression/Filter -> ArrayJoin
|
||||
std::swap(parent, child);
|
||||
/// ArrayJoin -> Expression/Filter
|
||||
|
||||
if (expression_step)
|
||||
child = std::make_unique<ExpressionStep>(child_node->children.at(0)->step->getOutputStream(),
|
||||
std::move(split_actions.first));
|
||||
else
|
||||
child = std::make_unique<FilterStep>(child_node->children.at(0)->step->getOutputStream(),
|
||||
std::move(split_actions.first),
|
||||
filter_step->getFilterColumnName(),
|
||||
filter_step->removesFilterColumn());
|
||||
|
||||
child->setStepDescription(std::move(description));
|
||||
|
||||
array_join_step->updateInputStream(child->getOutputStream(), expected_header);
|
||||
return 2;
|
||||
}
|
||||
|
||||
/// Add new expression step before ARRAY JOIN.
|
||||
/// Expression/Filter -> ArrayJoin -> Something
|
||||
auto & node = nodes.emplace_back();
|
||||
node.children.swap(child_node->children);
|
||||
child_node->children.emplace_back(&node);
|
||||
/// Expression/Filter -> ArrayJoin -> node -> Something
|
||||
|
||||
node.step = std::make_unique<ExpressionStep>(node.children.at(0)->step->getOutputStream(),
|
||||
std::move(split_actions.first));
|
||||
node.step->setStepDescription(description);
|
||||
array_join_step->updateInputStream(node.step->getOutputStream(), {});
|
||||
|
||||
if (expression_step)
|
||||
parent = std::make_unique<ExpressionStep>(array_join_step->getOutputStream(), split_actions.second);
|
||||
else
|
||||
parent = std::make_unique<FilterStep>(array_join_step->getOutputStream(), split_actions.second,
|
||||
filter_step->getFilterColumnName(), filter_step->removesFilterColumn());
|
||||
|
||||
parent->setStepDescription(description + " [split]");
|
||||
return 3;
|
||||
}
|
||||
|
||||
}
|
114
src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
Normal file
114
src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
Normal file
@ -0,0 +1,114 @@
|
||||
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
|
||||
#include <Processors/QueryPlan/ITransformingStep.h>
|
||||
#include <Processors/QueryPlan/LimitStep.h>
|
||||
#include <Processors/QueryPlan/TotalsHavingStep.h>
|
||||
#include <Processors/QueryPlan/MergingSortedStep.h>
|
||||
#include <Processors/QueryPlan/FinishSortingStep.h>
|
||||
#include <Processors/QueryPlan/MergeSortingStep.h>
|
||||
#include <Processors/QueryPlan/PartialSortingStep.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
namespace DB::QueryPlanOptimizations
|
||||
{
|
||||
|
||||
/// If plan looks like Limit -> Sorting, update limit for Sorting
|
||||
static bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit)
|
||||
{
|
||||
if (limit == 0)
|
||||
return false;
|
||||
|
||||
QueryPlanStepPtr & step = node->step;
|
||||
QueryPlan::Node * child = nullptr;
|
||||
bool updated = false;
|
||||
|
||||
if (auto * merging_sorted = typeid_cast<MergingSortedStep *>(step.get()))
|
||||
{
|
||||
/// TODO: remove LimitStep here.
|
||||
merging_sorted->updateLimit(limit);
|
||||
updated = true;
|
||||
child = node->children.front();
|
||||
}
|
||||
else if (auto * finish_sorting = typeid_cast<FinishSortingStep *>(step.get()))
|
||||
{
|
||||
/// TODO: remove LimitStep here.
|
||||
finish_sorting->updateLimit(limit);
|
||||
updated = true;
|
||||
}
|
||||
else if (auto * merge_sorting = typeid_cast<MergeSortingStep *>(step.get()))
|
||||
{
|
||||
merge_sorting->updateLimit(limit);
|
||||
updated = true;
|
||||
child = node->children.front();
|
||||
}
|
||||
else if (auto * partial_sorting = typeid_cast<PartialSortingStep *>(step.get()))
|
||||
{
|
||||
partial_sorting->updateLimit(limit);
|
||||
updated = true;
|
||||
}
|
||||
|
||||
/// We often have chain PartialSorting -> MergeSorting -> MergingSorted
|
||||
/// Try update limit for them also if possible.
|
||||
if (child)
|
||||
tryUpdateLimitForSortingSteps(child, limit);
|
||||
|
||||
return updated;
|
||||
}
|
||||
|
||||
size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
|
||||
{
|
||||
if (parent_node->children.size() != 1)
|
||||
return 0;
|
||||
|
||||
QueryPlan::Node * child_node = parent_node->children.front();
|
||||
|
||||
auto & parent = parent_node->step;
|
||||
auto & child = child_node->step;
|
||||
auto * limit = typeid_cast<LimitStep *>(parent.get());
|
||||
|
||||
if (!limit)
|
||||
return 0;
|
||||
|
||||
/// Skip LIMIT WITH TIES by now.
|
||||
if (limit->withTies())
|
||||
return 0;
|
||||
|
||||
const auto * transforming = dynamic_cast<const ITransformingStep *>(child.get());
|
||||
|
||||
/// Skip everything which is not transform.
|
||||
if (!transforming)
|
||||
return 0;
|
||||
|
||||
/// Special cases for sorting steps.
|
||||
if (tryUpdateLimitForSortingSteps(child_node, limit->getLimitForSorting()))
|
||||
return 0;
|
||||
|
||||
/// Special case for TotalsHaving. Totals may be incorrect if we push down limit.
|
||||
if (typeid_cast<const TotalsHavingStep *>(child.get()))
|
||||
return 0;
|
||||
|
||||
/// Now we should decide if pushing down limit possible for this step.
|
||||
|
||||
const auto & transform_traits = transforming->getTransformTraits();
|
||||
const auto & data_stream_traits = transforming->getDataStreamTraits();
|
||||
|
||||
/// Cannot push down if child changes the number of rows.
|
||||
if (!transform_traits.preserves_number_of_rows)
|
||||
return 0;
|
||||
|
||||
/// Cannot push down if data was sorted exactly by child stream.
|
||||
if (!child->getOutputStream().sort_description.empty() && !data_stream_traits.preserves_sorting)
|
||||
return 0;
|
||||
|
||||
/// Now we push down limit only if it doesn't change any stream properties.
|
||||
/// TODO: some of them may be changed and, probably, not important for following streams. We may add such info.
|
||||
if (!limit->getOutputStream().hasEqualPropertiesWith(transforming->getOutputStream()))
|
||||
return 0;
|
||||
|
||||
/// Input stream for Limit have changed.
|
||||
limit->updateInputStream(transforming->getInputStreams().front());
|
||||
|
||||
parent.swap(child);
|
||||
return 2;
|
||||
}
|
||||
|
||||
}
|
65
src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
Normal file
65
src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
Normal file
@ -0,0 +1,65 @@
|
||||
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
|
||||
#include <Processors/QueryPlan/FilterStep.h>
|
||||
#include <Processors/QueryPlan/ExpressionStep.h>
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
|
||||
namespace DB::QueryPlanOptimizations
|
||||
{
|
||||
|
||||
size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
|
||||
{
|
||||
if (parent_node->children.size() != 1)
|
||||
return false;
|
||||
|
||||
QueryPlan::Node * child_node = parent_node->children.front();
|
||||
|
||||
auto & parent = parent_node->step;
|
||||
auto & child = child_node->step;
|
||||
|
||||
auto * parent_expr = typeid_cast<ExpressionStep *>(parent.get());
|
||||
auto * parent_filter = typeid_cast<FilterStep *>(parent.get());
|
||||
auto * child_expr = typeid_cast<ExpressionStep *>(child.get());
|
||||
|
||||
if (parent_expr && child_expr)
|
||||
{
|
||||
const auto & child_actions = child_expr->getExpression();
|
||||
const auto & parent_actions = parent_expr->getExpression();
|
||||
|
||||
/// We cannot combine actions with arrayJoin and stateful function because we not always can reorder them.
|
||||
/// Example: select rowNumberInBlock() from (select arrayJoin([1, 2]))
|
||||
/// Such a query will return two zeroes if we combine actions together.
|
||||
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
|
||||
return 0;
|
||||
|
||||
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
|
||||
|
||||
auto expr = std::make_unique<ExpressionStep>(child_expr->getInputStreams().front(), merged);
|
||||
expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
|
||||
|
||||
parent_node->step = std::move(expr);
|
||||
parent_node->children.swap(child_node->children);
|
||||
return 1;
|
||||
}
|
||||
else if (parent_filter && child_expr)
|
||||
{
|
||||
const auto & child_actions = child_expr->getExpression();
|
||||
const auto & parent_actions = parent_filter->getExpression();
|
||||
|
||||
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
|
||||
return 0;
|
||||
|
||||
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
|
||||
|
||||
auto filter = std::make_unique<FilterStep>(child_expr->getInputStreams().front(), merged,
|
||||
parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn());
|
||||
filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
|
||||
|
||||
parent_node->step = std::move(filter);
|
||||
parent_node->children.swap(child_node->children);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
75
src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
Normal file
75
src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
Normal file
@ -0,0 +1,75 @@
|
||||
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
|
||||
#include <stack>
|
||||
|
||||
namespace DB::QueryPlanOptimizations
|
||||
{
|
||||
|
||||
void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
|
||||
{
|
||||
const auto & optimizations = getOptimizations();
|
||||
|
||||
struct Frame
|
||||
{
|
||||
QueryPlan::Node * node;
|
||||
|
||||
/// If not zero, traverse only depth_limit layers of tree (if no other optimizations happen).
|
||||
/// Otherwise, traverse all children.
|
||||
size_t depth_limit = 0;
|
||||
|
||||
/// Next child to process.
|
||||
size_t next_child = 0;
|
||||
};
|
||||
|
||||
std::stack<Frame> stack;
|
||||
stack.push(Frame{.node = &root});
|
||||
|
||||
while (!stack.empty())
|
||||
{
|
||||
auto & frame = stack.top();
|
||||
|
||||
/// If traverse_depth_limit == 0, then traverse without limit (first entrance)
|
||||
/// If traverse_depth_limit > 1, then traverse with (limit - 1)
|
||||
if (frame.depth_limit != 1)
|
||||
{
|
||||
/// Traverse all children first.
|
||||
if (frame.next_child < frame.node->children.size())
|
||||
{
|
||||
stack.push(Frame
|
||||
{
|
||||
.node = frame.node->children[frame.next_child],
|
||||
.depth_limit = frame.depth_limit ? (frame.depth_limit - 1) : 0,
|
||||
});
|
||||
|
||||
++frame.next_child;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
size_t max_update_depth = 0;
|
||||
|
||||
/// Apply all optimizations.
|
||||
for (const auto & optimization : optimizations)
|
||||
{
|
||||
/// Just in case, skip optimization if it is not initialized.
|
||||
if (!optimization.apply)
|
||||
continue;
|
||||
|
||||
/// Try to apply optimization.
|
||||
auto update_depth = optimization.apply(frame.node, nodes);
|
||||
max_update_depth = std::max<size_t>(max_update_depth, update_depth);
|
||||
}
|
||||
|
||||
/// Traverse `max_update_depth` layers of tree again.
|
||||
if (max_update_depth)
|
||||
{
|
||||
frame.depth_limit = max_update_depth;
|
||||
frame.next_child = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
/// Nothing was applied.
|
||||
stack.pop();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
50
src/Processors/QueryPlan/Optimizations/splitFilter.cpp
Normal file
50
src/Processors/QueryPlan/Optimizations/splitFilter.cpp
Normal file
@ -0,0 +1,50 @@
|
||||
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
|
||||
#include <Processors/QueryPlan/FilterStep.h>
|
||||
#include <Processors/QueryPlan/ExpressionStep.h>
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
|
||||
namespace DB::QueryPlanOptimizations
|
||||
{
|
||||
|
||||
/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
|
||||
size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
|
||||
{
|
||||
auto * filter_step = typeid_cast<FilterStep *>(node->step.get());
|
||||
if (!filter_step)
|
||||
return 0;
|
||||
|
||||
const auto & expr = filter_step->getExpression();
|
||||
|
||||
/// Do not split if there are function like runningDifference.
|
||||
if (expr->hasStatefulFunctions())
|
||||
return 0;
|
||||
|
||||
auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName());
|
||||
|
||||
if (split.second->trivial())
|
||||
return 0;
|
||||
|
||||
if (filter_step->removesFilterColumn())
|
||||
split.second->removeUnusedInput(filter_step->getFilterColumnName());
|
||||
|
||||
auto description = filter_step->getStepDescription();
|
||||
|
||||
auto & filter_node = nodes.emplace_back();
|
||||
node->children.swap(filter_node.children);
|
||||
node->children.push_back(&filter_node);
|
||||
|
||||
filter_node.step = std::make_unique<FilterStep>(
|
||||
filter_node.children.at(0)->step->getOutputStream(),
|
||||
std::move(split.first),
|
||||
filter_step->getFilterColumnName(),
|
||||
filter_step->removesFilterColumn());
|
||||
|
||||
node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputStream(), std::move(split.second));
|
||||
|
||||
filter_node.step->setStepDescription("(" + description + ")[split]");
|
||||
node->step->setStepDescription(description);
|
||||
|
||||
return 2;
|
||||
}
|
||||
|
||||
}
|
@ -6,15 +6,7 @@
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
#include <Interpreters/ArrayJoinAction.h>
|
||||
#include <stack>
|
||||
#include <Processors/QueryPlan/LimitStep.h>
|
||||
#include "MergingSortedStep.h"
|
||||
#include "FinishSortingStep.h"
|
||||
#include "MergeSortingStep.h"
|
||||
#include "PartialSortingStep.h"
|
||||
#include "TotalsHavingStep.h"
|
||||
#include "ExpressionStep.h"
|
||||
#include "ArrayJoinStep.h"
|
||||
#include "FilterStep.h"
|
||||
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -341,318 +333,9 @@ void QueryPlan::explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptio
|
||||
}
|
||||
}
|
||||
|
||||
/// If plan looks like Limit -> Sorting, update limit for Sorting
|
||||
bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit)
|
||||
{
|
||||
if (limit == 0)
|
||||
return false;
|
||||
|
||||
QueryPlanStepPtr & step = node->step;
|
||||
QueryPlan::Node * child = nullptr;
|
||||
bool updated = false;
|
||||
|
||||
if (auto * merging_sorted = typeid_cast<MergingSortedStep *>(step.get()))
|
||||
{
|
||||
/// TODO: remove LimitStep here.
|
||||
merging_sorted->updateLimit(limit);
|
||||
updated = true;
|
||||
child = node->children.front();
|
||||
}
|
||||
else if (auto * finish_sorting = typeid_cast<FinishSortingStep *>(step.get()))
|
||||
{
|
||||
/// TODO: remove LimitStep here.
|
||||
finish_sorting->updateLimit(limit);
|
||||
updated = true;
|
||||
}
|
||||
else if (auto * merge_sorting = typeid_cast<MergeSortingStep *>(step.get()))
|
||||
{
|
||||
merge_sorting->updateLimit(limit);
|
||||
updated = true;
|
||||
child = node->children.front();
|
||||
}
|
||||
else if (auto * partial_sorting = typeid_cast<PartialSortingStep *>(step.get()))
|
||||
{
|
||||
partial_sorting->updateLimit(limit);
|
||||
updated = true;
|
||||
}
|
||||
|
||||
/// We often have chain PartialSorting -> MergeSorting -> MergingSorted
|
||||
/// Try update limit for them also if possible.
|
||||
if (child)
|
||||
tryUpdateLimitForSortingSteps(child, limit);
|
||||
|
||||
return updated;
|
||||
}
|
||||
|
||||
/// Move LimitStep down if possible.
|
||||
static void tryPushDownLimit(QueryPlanStepPtr & parent, QueryPlan::Node * child_node)
|
||||
{
|
||||
auto & child = child_node->step;
|
||||
auto * limit = typeid_cast<LimitStep *>(parent.get());
|
||||
|
||||
if (!limit)
|
||||
return;
|
||||
|
||||
/// Skip LIMIT WITH TIES by now.
|
||||
if (limit->withTies())
|
||||
return;
|
||||
|
||||
const auto * transforming = dynamic_cast<const ITransformingStep *>(child.get());
|
||||
|
||||
/// Skip everything which is not transform.
|
||||
if (!transforming)
|
||||
return;
|
||||
|
||||
/// Special cases for sorting steps.
|
||||
if (tryUpdateLimitForSortingSteps(child_node, limit->getLimitForSorting()))
|
||||
return;
|
||||
|
||||
/// Special case for TotalsHaving. Totals may be incorrect if we push down limit.
|
||||
if (typeid_cast<const TotalsHavingStep *>(child.get()))
|
||||
return;
|
||||
|
||||
/// Now we should decide if pushing down limit possible for this step.
|
||||
|
||||
const auto & transform_traits = transforming->getTransformTraits();
|
||||
const auto & data_stream_traits = transforming->getDataStreamTraits();
|
||||
|
||||
/// Cannot push down if child changes the number of rows.
|
||||
if (!transform_traits.preserves_number_of_rows)
|
||||
return;
|
||||
|
||||
/// Cannot push down if data was sorted exactly by child stream.
|
||||
if (!child->getOutputStream().sort_description.empty() && !data_stream_traits.preserves_sorting)
|
||||
return;
|
||||
|
||||
/// Now we push down limit only if it doesn't change any stream properties.
|
||||
/// TODO: some of them may be changed and, probably, not important for following streams. We may add such info.
|
||||
if (!limit->getOutputStream().hasEqualPropertiesWith(transforming->getOutputStream()))
|
||||
return;
|
||||
|
||||
/// Input stream for Limit have changed.
|
||||
limit->updateInputStream(transforming->getInputStreams().front());
|
||||
|
||||
parent.swap(child);
|
||||
}
|
||||
|
||||
/// Move ARRAY JOIN up if possible.
|
||||
static void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * child_node, QueryPlan::Nodes & nodes)
|
||||
{
|
||||
auto & parent = parent_node->step;
|
||||
auto & child = child_node->step;
|
||||
auto * expression_step = typeid_cast<ExpressionStep *>(parent.get());
|
||||
auto * filter_step = typeid_cast<FilterStep *>(parent.get());
|
||||
auto * array_join_step = typeid_cast<ArrayJoinStep *>(child.get());
|
||||
|
||||
if (!(expression_step || filter_step) || !array_join_step)
|
||||
return;
|
||||
|
||||
const auto & array_join = array_join_step->arrayJoin();
|
||||
const auto & expression = expression_step ? expression_step->getExpression()
|
||||
: filter_step->getExpression();
|
||||
|
||||
auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns);
|
||||
|
||||
/// No actions can be moved before ARRAY JOIN.
|
||||
if (split_actions.first->empty())
|
||||
return;
|
||||
|
||||
auto description = parent->getStepDescription();
|
||||
|
||||
/// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin.
|
||||
if (split_actions.second->empty())
|
||||
{
|
||||
auto expected_header = parent->getOutputStream().header;
|
||||
|
||||
/// Expression/Filter -> ArrayJoin
|
||||
std::swap(parent, child);
|
||||
/// ArrayJoin -> Expression/Filter
|
||||
|
||||
if (expression_step)
|
||||
child = std::make_unique<ExpressionStep>(child_node->children.at(0)->step->getOutputStream(),
|
||||
std::move(split_actions.first));
|
||||
else
|
||||
child = std::make_unique<FilterStep>(child_node->children.at(0)->step->getOutputStream(),
|
||||
std::move(split_actions.first),
|
||||
filter_step->getFilterColumnName(),
|
||||
filter_step->removesFilterColumn());
|
||||
|
||||
child->setStepDescription(std::move(description));
|
||||
|
||||
array_join_step->updateInputStream(child->getOutputStream(), expected_header);
|
||||
return;
|
||||
}
|
||||
|
||||
/// Add new expression step before ARRAY JOIN.
|
||||
/// Expression/Filter -> ArrayJoin -> Something
|
||||
auto & node = nodes.emplace_back();
|
||||
node.children.swap(child_node->children);
|
||||
child_node->children.emplace_back(&node);
|
||||
/// Expression/Filter -> ArrayJoin -> node -> Something
|
||||
|
||||
node.step = std::make_unique<ExpressionStep>(node.children.at(0)->step->getOutputStream(),
|
||||
std::move(split_actions.first));
|
||||
node.step->setStepDescription(description);
|
||||
array_join_step->updateInputStream(node.step->getOutputStream(), {});
|
||||
|
||||
if (expression_step)
|
||||
parent = std::make_unique<ExpressionStep>(array_join_step->getOutputStream(), split_actions.second);
|
||||
else
|
||||
parent = std::make_unique<FilterStep>(array_join_step->getOutputStream(), split_actions.second,
|
||||
filter_step->getFilterColumnName(), filter_step->removesFilterColumn());
|
||||
|
||||
parent->setStepDescription(description + " [split]");
|
||||
}
|
||||
|
||||
/// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep
|
||||
/// Replace chain `FilterStep -> ExpressionStep` to single FilterStep
|
||||
static bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Node * child_node)
|
||||
{
|
||||
auto & parent = parent_node->step;
|
||||
auto & child = child_node->step;
|
||||
|
||||
auto * parent_expr = typeid_cast<ExpressionStep *>(parent.get());
|
||||
auto * parent_filter = typeid_cast<FilterStep *>(parent.get());
|
||||
auto * child_expr = typeid_cast<ExpressionStep *>(child.get());
|
||||
|
||||
if (parent_expr && child_expr)
|
||||
{
|
||||
const auto & child_actions = child_expr->getExpression();
|
||||
const auto & parent_actions = parent_expr->getExpression();
|
||||
|
||||
/// We cannot combine actions with arrayJoin and stateful function because we not always can reorder them.
|
||||
/// Example: select rowNumberInBlock() from (select arrayJoin([1, 2]))
|
||||
/// Such a query will return two zeroes if we combine actions together.
|
||||
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
|
||||
return false;
|
||||
|
||||
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
|
||||
|
||||
auto expr = std::make_unique<ExpressionStep>(child_expr->getInputStreams().front(), merged);
|
||||
expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
|
||||
|
||||
parent_node->step = std::move(expr);
|
||||
parent_node->children.swap(child_node->children);
|
||||
return true;
|
||||
}
|
||||
else if (parent_filter && child_expr)
|
||||
{
|
||||
const auto & child_actions = child_expr->getExpression();
|
||||
const auto & parent_actions = parent_filter->getExpression();
|
||||
|
||||
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
|
||||
return false;
|
||||
|
||||
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
|
||||
|
||||
auto filter = std::make_unique<FilterStep>(child_expr->getInputStreams().front(), merged,
|
||||
parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn());
|
||||
filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
|
||||
|
||||
parent_node->step = std::move(filter);
|
||||
parent_node->children.swap(child_node->children);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
|
||||
static bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
|
||||
{
|
||||
auto * filter_step = typeid_cast<FilterStep *>(node->step.get());
|
||||
if (!filter_step)
|
||||
return false;
|
||||
|
||||
const auto & expr = filter_step->getExpression();
|
||||
|
||||
/// Do not split if there are function like runningDifference.
|
||||
if (expr->hasStatefulFunctions())
|
||||
return false;
|
||||
|
||||
auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName());
|
||||
|
||||
if (split.second->empty())
|
||||
return false;
|
||||
|
||||
if (filter_step->removesFilterColumn())
|
||||
split.second->removeUnusedInput(filter_step->getFilterColumnName());
|
||||
|
||||
auto description = filter_step->getStepDescription();
|
||||
|
||||
auto & filter_node = nodes.emplace_back();
|
||||
node->children.swap(filter_node.children);
|
||||
node->children.push_back(&filter_node);
|
||||
|
||||
filter_node.step = std::make_unique<FilterStep>(
|
||||
filter_node.children.at(0)->step->getOutputStream(),
|
||||
std::move(split.first),
|
||||
filter_step->getFilterColumnName(),
|
||||
filter_step->removesFilterColumn());
|
||||
|
||||
node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputStream(), std::move(split.second));
|
||||
|
||||
filter_node.step->setStepDescription("(" + description + ")[split]");
|
||||
node->step->setStepDescription(description);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void QueryPlan::optimize()
|
||||
{
|
||||
/* Stack contains info for every nodes in the path from tree root to the current node.
|
||||
* Every optimization changes only current node and it's children.
|
||||
* Optimization may change QueryPlanStep, but not QueryPlan::Node (only add a new one).
|
||||
* So, QueryPlan::Node::children will be always valid.
|
||||
*/
|
||||
|
||||
struct Frame
|
||||
{
|
||||
Node * node;
|
||||
size_t next_child = 0;
|
||||
};
|
||||
|
||||
std::stack<Frame> stack;
|
||||
stack.push(Frame{.node = root});
|
||||
|
||||
while (!stack.empty())
|
||||
{
|
||||
auto & frame = stack.top();
|
||||
|
||||
if (frame.next_child == 0)
|
||||
{
|
||||
if (frame.node->children.size() == 1)
|
||||
{
|
||||
tryPushDownLimit(frame.node->step, frame.node->children.front());
|
||||
|
||||
while (tryMergeExpressions(frame.node, frame.node->children.front()));
|
||||
|
||||
if (frame.node->children.size() == 1)
|
||||
tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes);
|
||||
|
||||
trySplitFilter(frame.node, nodes);
|
||||
}
|
||||
}
|
||||
|
||||
if (frame.next_child < frame.node->children.size())
|
||||
{
|
||||
stack.push(Frame{frame.node->children[frame.next_child]});
|
||||
++frame.next_child;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (frame.node->children.size() == 1)
|
||||
{
|
||||
while (tryMergeExpressions(frame.node, frame.node->children.front()));
|
||||
|
||||
trySplitFilter(frame.node, nodes);
|
||||
|
||||
tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes);
|
||||
}
|
||||
|
||||
stack.pop();
|
||||
}
|
||||
}
|
||||
QueryPlanOptimizations::optimizeTree(*root, nodes);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -116,6 +116,11 @@ SRCS(
|
||||
QueryPlan/MergingFinal.cpp
|
||||
QueryPlan/MergingSortedStep.cpp
|
||||
QueryPlan/OffsetStep.cpp
|
||||
QueryPlan/Optimizations/liftUpArrayJoin.cpp
|
||||
QueryPlan/Optimizations/limitPushDown.cpp
|
||||
QueryPlan/Optimizations/mergeExpressions.cpp
|
||||
QueryPlan/Optimizations/optimizeTree.cpp
|
||||
QueryPlan/Optimizations/splitFilter.cpp
|
||||
QueryPlan/PartialSortingStep.cpp
|
||||
QueryPlan/QueryPlan.cpp
|
||||
QueryPlan/ReadFromPreparedSource.cpp
|
||||
|
@ -414,18 +414,19 @@ size_t MergeTreeRangeReader::ReadResult::numZerosInTail(const UInt8 * begin, con
|
||||
end -= 64;
|
||||
const auto * pos = end;
|
||||
UInt64 val =
|
||||
static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
|
||||
static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos)),
|
||||
zero16)))
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 16)),
|
||||
zero16))) << 16u)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 32)),
|
||||
zero16))) << 32u)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 48)),
|
||||
zero16))) << 48u);
|
||||
val = ~val;
|
||||
if (val == 0)
|
||||
count += 64;
|
||||
else
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user