mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Merge branch 'master' into clickhouse-test-log-comment
This commit is contained in:
commit
a923b94bdb
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -184,7 +184,7 @@
|
||||
url = https://github.com/ClickHouse-Extras/krb5
|
||||
[submodule "contrib/cyrus-sasl"]
|
||||
path = contrib/cyrus-sasl
|
||||
url = https://github.com/cyrusimap/cyrus-sasl
|
||||
url = https://github.com/ClickHouse-Extras/cyrus-sasl
|
||||
branch = cyrus-sasl-2.1
|
||||
[submodule "contrib/croaring"]
|
||||
path = contrib/croaring
|
||||
|
@ -84,10 +84,12 @@
|
||||
# define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined")))
|
||||
# define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address")))
|
||||
# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
|
||||
# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined")))
|
||||
#else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it.
|
||||
# define NO_SANITIZE_UNDEFINED
|
||||
# define NO_SANITIZE_ADDRESS
|
||||
# define NO_SANITIZE_THREAD
|
||||
# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED ALWAYS_INLINE
|
||||
#endif
|
||||
|
||||
/// A template function for suppressing warnings about unused variables or function results.
|
||||
|
@ -31,7 +31,7 @@ static void *volatile vdso_func = (void *)getcpu_init;
|
||||
int sched_getcpu(void)
|
||||
{
|
||||
int r;
|
||||
unsigned cpu;
|
||||
unsigned cpu = 0;
|
||||
|
||||
#ifdef VDSO_GETCPU_SYM
|
||||
getcpu_f f = (getcpu_f)vdso_func;
|
||||
|
@ -62,6 +62,7 @@ RUN python3 -m pip install \
|
||||
avro \
|
||||
cassandra-driver \
|
||||
confluent-kafka \
|
||||
dict2xml \
|
||||
dicttoxml \
|
||||
docker \
|
||||
docker-compose==1.22.0 \
|
||||
|
@ -53,10 +53,12 @@ function run_tests()
|
||||
if [ "$NUM_TRIES" -gt "1" ]; then
|
||||
ADDITIONAL_OPTIONS+=('--skip')
|
||||
ADDITIONAL_OPTIONS+=('00000_no_tests_to_skip')
|
||||
ADDITIONAL_OPTIONS+=('--jobs')
|
||||
ADDITIONAL_OPTIONS+=('4')
|
||||
fi
|
||||
|
||||
clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
|
||||
--test-runs "$NUM_TRIES" --jobs 4 \
|
||||
--test-runs "$NUM_TRIES" \
|
||||
"$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
|
||||
| ts '%Y-%m-%d %H:%M:%S' \
|
||||
| tee -a test_output/test_result.txt
|
||||
|
@ -7,8 +7,6 @@ toc_title: EmbeddedRocksDB
|
||||
|
||||
This engine allows integrating ClickHouse with [rocksdb](http://rocksdb.org/).
|
||||
|
||||
`EmbeddedRocksDB` lets you:
|
||||
|
||||
## Creating a Table {#table_engine-EmbeddedRocksDB-creating-a-table}
|
||||
|
||||
``` sql
|
||||
@ -23,6 +21,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
Required parameters:
|
||||
|
||||
- `primary_key_name` – any column name in the column list.
|
||||
- `primary key` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a `rocksdb key`.
|
||||
- columns other than the primary key will be serialized in binary as `rocksdb` value in corresponding order.
|
||||
- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from `rocksdb`.
|
||||
|
||||
Example:
|
||||
|
||||
@ -38,8 +39,4 @@ ENGINE = EmbeddedRocksDB
|
||||
PRIMARY KEY key
|
||||
```
|
||||
|
||||
## Description {#description}
|
||||
|
||||
- `primary key` must be specified, it only supports one column in primary key. The primary key will serialized in binary as rocksdb key.
|
||||
- columns other than the primary key will be serialized in binary as rocksdb value in corresponding order.
|
||||
- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from rocksdb.
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/embedded-rocksdb/) <!--hide-->
|
||||
|
@ -428,7 +428,7 @@ Possible values:
|
||||
|
||||
- `'basic'` — Use basic parser.
|
||||
|
||||
ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `'2019-08-20 10:18:56'` or `2019-08-20`.
|
||||
ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `2019-08-20 10:18:56` or `2019-08-20`.
|
||||
|
||||
Default value: `'basic'`.
|
||||
|
||||
@ -443,19 +443,19 @@ Allows choosing different output formats of the text representation of date and
|
||||
|
||||
Possible values:
|
||||
|
||||
- `'simple'` - Simple output format.
|
||||
- `simple` - Simple output format.
|
||||
|
||||
Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `'2019-08-20 10:18:56'`. Calculation is performed according to the data type's time zone (if present) or server time zone.
|
||||
Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `2019-08-20 10:18:56`. The calculation is performed according to the data type's time zone (if present) or server time zone.
|
||||
|
||||
- `'iso'` - ISO output format.
|
||||
- `iso` - ISO output format.
|
||||
|
||||
Clickhouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `'2019-08-20T10:18:56Z'`. Note that output is in UTC (`Z` means UTC).
|
||||
Clickhouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `2019-08-20T10:18:56Z`. Note that output is in UTC (`Z` means UTC).
|
||||
|
||||
- `'unix_timestamp'` - Unix timestamp output format.
|
||||
- `unix_timestamp` - Unix timestamp output format.
|
||||
|
||||
Clickhouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `'1566285536'`.
|
||||
Clickhouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `1566285536`.
|
||||
|
||||
Default value: `'simple'`.
|
||||
Default value: `simple`.
|
||||
|
||||
See also:
|
||||
|
||||
|
@ -241,7 +241,7 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN)
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `window` — Length of the sliding window in seconds.
|
||||
- `window` — Length of the sliding window. The unit of `window` depends on the timestamp itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`.
|
||||
- `mode` - It is an optional argument.
|
||||
- `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values.
|
||||
- `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it’s value can’t exceed the Int64 maximum, which is 2^63 - 1).
|
||||
|
@ -11,7 +11,7 @@ Key length depends on encryption mode. It is 16, 24, and 32 bytes long for `-128
|
||||
|
||||
Initialization vector length is always 16 bytes (bytes in excess of 16 are ignored).
|
||||
|
||||
Note that these functions work slowly.
|
||||
Note that these functions work slowly until ClickHouse 21.1.
|
||||
|
||||
## encrypt {#encrypt}
|
||||
|
||||
@ -41,7 +41,7 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Ciphered String. [String](../../sql-reference/data-types/string.md#string).
|
||||
- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Examples**
|
||||
|
||||
@ -52,57 +52,38 @@ Query:
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
`comment` String,
|
||||
`secret` String
|
||||
)
|
||||
ENGINE = Memory
|
||||
```
|
||||
|
||||
Insert this data:
|
||||
Insert some data (please avoid storing the keys/ivs in the database as this undermines the whole concept of encryption), also storing 'hints' is unsafe too and used only for illustrative purposes:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
INSERT INTO encryption_test VALUES('aes-256-cfb128 no IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212')),\
|
||||
('aes-256-cfb128 no IV, different key', encrypt('aes-256-cfb128', 'Secret', 'keykeykeykeykeykeykeykeykeykeyke')),\
|
||||
('aes-256-cfb128 with IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')),\
|
||||
('aes-256-cbc no IV', encrypt('aes-256-cbc', 'Secret', '12345678910121314151617181920212'));
|
||||
```
|
||||
|
||||
Example without `iv`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test;
|
||||
SELECT comment, hex(secret) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐
|
||||
│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │
|
||||
│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │
|
||||
│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │
|
||||
└─────────────┴──────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Example with `iv`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐
|
||||
│ aes-256-ctr │ │
|
||||
│ aes-256-ctr │ 7FB039F7 │
|
||||
│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │
|
||||
└─────────────┴───────────────────────────────────────────────┘
|
||||
┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
|
||||
│ aes-256-cfb128 no IV │ B4972BDC4459 │
|
||||
│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │
|
||||
│ aes-256-cfb128 with IV │ 5E6CB398F653 │
|
||||
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
|
||||
└─────────────────────────────────────┴──────────────────────────────────┘
|
||||
```
|
||||
|
||||
Example with `-gcm`:
|
||||
@ -110,40 +91,26 @@ Example with `-gcm`:
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
|
||||
INSERT INTO encryption_test VALUES('aes-256-gcm', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')), \
|
||||
('aes-256-gcm with AAD', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv', 'aad'));
|
||||
|
||||
SELECT comment, hex(secret) FROM encryption_test WHERE comment LIKE '%gcm%';
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐
|
||||
│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │
|
||||
│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │
|
||||
│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │
|
||||
└─────────────┴────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Example with `-gcm` mode and with `aad`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐
|
||||
│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │
|
||||
│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │
|
||||
│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │
|
||||
└─────────────┴────────────────────────────────────────────────────────────────────────┘
|
||||
┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
|
||||
│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
|
||||
│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
|
||||
└──────────────────────┴──────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## aes_encrypt_mysql {#aes_encrypt_mysql}
|
||||
|
||||
Compatible with mysql encryption and can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
|
||||
Compatible with mysql encryption and resulting ciphertext can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
|
||||
|
||||
Will produce same ciphertext as `encrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_encrypt_mysql` will stick to what MySQL's `aes_encrypt` does: 'fold' `key` and ignore excess bits of `IV`.
|
||||
|
||||
Supported encryption modes:
|
||||
|
||||
@ -156,7 +123,7 @@ Supported encryption modes:
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
``` sql
|
||||
aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
|
||||
```
|
||||
|
||||
@ -164,78 +131,98 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
|
||||
|
||||
- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Optinal, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Ciphered String. [String](../../sql-reference/data-types/string.md#string).
|
||||
- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
|
||||
**Examples**
|
||||
|
||||
Create this table:
|
||||
Given equal input `encrypt` and `aes_encrypt_mysql` produce the same ciphertext:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
SELECT encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') = aes_encrypt_mysql('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') AS ciphertexts_equal;
|
||||
```
|
||||
|
||||
Insert this data:
|
||||
Result:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
┌─ciphertexts_equal─┐
|
||||
│ 1 │
|
||||
└───────────────────┘
|
||||
```
|
||||
|
||||
Example without `iv`:
|
||||
|
||||
But `encrypt` fails when `key` or `iv` is longer than expected:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test;
|
||||
SELECT encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐
|
||||
│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │
|
||||
│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │
|
||||
│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │
|
||||
└─────────────┴──────────────────────────────────────────────────────────────────┘
|
||||
Received exception from server (version 21.1.2):
|
||||
Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123').
|
||||
```
|
||||
|
||||
Example with `iv`:
|
||||
While `aes_encrypt_mysql` produces MySQL-compatitalbe output:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test;
|
||||
SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123')) AS ciphertext;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─ciphertext───┐
|
||||
│ 24E9E4966469 │
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
Notice how supplying even longer `IV` produces the same result
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456')) AS ciphertext
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐
|
||||
│ aes-256-cfb128 │ │
|
||||
│ aes-256-cfb128 │ 7FB039F7 │
|
||||
│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │
|
||||
└────────────────┴────────────────────────────────────────────────────────────┘
|
||||
┌─ciphertext───┐
|
||||
│ 24E9E4966469 │
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
Which is binary equal to what MySQL produces on same inputs:
|
||||
|
||||
``` sql
|
||||
mysql> SET block_encryption_mode='aes-256-cfb128';
|
||||
Query OK, 0 rows affected (0.00 sec)
|
||||
|
||||
mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
|
||||
+------------------------+
|
||||
| ciphertext |
|
||||
+------------------------+
|
||||
| 0x24E9E4966469 |
|
||||
+------------------------+
|
||||
1 row in set (0.00 sec)
|
||||
```
|
||||
|
||||
## decrypt {#decrypt}
|
||||
|
||||
This function decrypts data using these modes:
|
||||
This function decrypts ciphertext into a plaintext using these modes:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
- aes-128-cbc, aes-192-cbc, aes-256-cbc
|
||||
@ -247,7 +234,7 @@ This function decrypts data using these modes:
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
``` sql
|
||||
decrypt('mode', 'ciphertext', 'key' [, iv, aad])
|
||||
```
|
||||
|
||||
@ -265,51 +252,56 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad])
|
||||
|
||||
**Examples**
|
||||
|
||||
Create this table:
|
||||
Re-using table from [encrypt](./encryption-functions.md#encrypt).
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
|
||||
Insert this data:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
|
||||
SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test;
|
||||
SELECT comment, hex(secret) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐
|
||||
│ aes-128-ecb │ │
|
||||
│ aes-128-ecb │ text │
|
||||
│ aes-128-ecb │ What Is ClickHouse? │
|
||||
└─────────────┴─────────────────────────────────────────────────────────────────────┘
|
||||
``` text
|
||||
┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
|
||||
│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
|
||||
│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
|
||||
└──────────────────────┴──────────────────────────────────────────────┘
|
||||
┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
|
||||
│ aes-256-cfb128 no IV │ B4972BDC4459 │
|
||||
│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │
|
||||
│ aes-256-cfb128 with IV │ 5E6CB398F653 │
|
||||
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
|
||||
└─────────────────────────────────────┴──────────────────────────────────┘
|
||||
```
|
||||
|
||||
Now let's try to decrypt all that data.
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920212') as plaintext FROM encryption_test
|
||||
```
|
||||
|
||||
Result:
|
||||
``` text
|
||||
┌─comment─────────────────────────────┬─plaintext─┐
|
||||
│ aes-256-cfb128 no IV │ Secret │
|
||||
│ aes-256-cfb128 no IV, different key │ <20>4<EFBFBD>
|
||||
<20> │
|
||||
│ aes-256-cfb128 with IV │ <20><><EFBFBD>6<EFBFBD>~ │
|
||||
│aes-256-cbc no IV │ <20>2*4<>h3c<33>4w<34><77>@
|
||||
└─────────────────────────────────────┴───────────┘
|
||||
```
|
||||
|
||||
Notice how only portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption.
|
||||
|
||||
## aes_decrypt_mysql {#aes_decrypt_mysql}
|
||||
|
||||
Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function.
|
||||
|
||||
Will produce same plaintext as `decrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_decrypt_mysql` will stick to what MySQL's `aes_decrypt` does: 'fold' `key` and ignore excess bits of `IV`.
|
||||
|
||||
Supported decryption modes:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
@ -321,7 +313,7 @@ Supported decryption modes:
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
``` sql
|
||||
aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
|
||||
```
|
||||
|
||||
@ -338,44 +330,30 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
|
||||
|
||||
**Examples**
|
||||
|
||||
Create this table:
|
||||
|
||||
Query:
|
||||
|
||||
Let's decrypt data we've previously encrypted with MySQL:
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
mysql> SET block_encryption_mode='aes-256-cfb128';
|
||||
Query OK, 0 rows affected (0.00 sec)
|
||||
|
||||
Insert this data:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
|
||||
+------------------------+
|
||||
| ciphertext |
|
||||
+------------------------+
|
||||
| 0x24E9E4966469 |
|
||||
+------------------------+
|
||||
1 row in set (0.00 sec)
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test;
|
||||
SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐
|
||||
│ aes-128-cbc │ │
|
||||
│ aes-128-cbc │ text │
|
||||
│ aes-128-cbc │ What Is ClickHouse? │
|
||||
└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘
|
||||
┌─plaintext─┐
|
||||
│ Secret │
|
||||
└───────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/encryption_functions/) <!--hide-->
|
||||
|
@ -0,0 +1,44 @@
|
||||
---
|
||||
toc_priority: 6
|
||||
toc_title: EmbeddedRocksDB
|
||||
---
|
||||
|
||||
# Движок EmbeddedRocksDB {#EmbeddedRocksDB-engine}
|
||||
|
||||
Этот движок позволяет интегрировать ClickHouse с [rocksdb](http://rocksdb.org/).
|
||||
|
||||
## Создание таблицы {#table_engine-EmbeddedRocksDB-creating-a-table}
|
||||
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
|
||||
...
|
||||
) ENGINE = EmbeddedRocksDB
|
||||
PRIMARY KEY(primary_key_name);
|
||||
```
|
||||
|
||||
Обязательные параметры:
|
||||
|
||||
- `primary_key_name` может быть любое имя столбца из списка столбцов.
|
||||
- Указание первичного ключа `primary key` является обязательным. Он будет сериализован в двоичном формате как ключ `rocksdb`.
|
||||
- Поддерживается только один столбец в первичном ключе.
|
||||
- Столбцы, которые отличаются от первичного ключа, будут сериализованы в двоичном формате как значение `rockdb` в соответствующем порядке.
|
||||
- Запросы с фильтрацией по ключу `equals` или `in` оптимизируются для поиска по нескольким ключам из `rocksdb`.
|
||||
|
||||
Пример:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE test
|
||||
(
|
||||
`key` String,
|
||||
`v1` UInt32,
|
||||
`v2` String,
|
||||
`v3` Float32,
|
||||
)
|
||||
ENGINE = EmbeddedRocksDB
|
||||
PRIMARY KEY key;
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/embedded-rocksdb/) <!--hide-->
|
@ -406,21 +406,46 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- `'best_effort'` — включает расширенный парсинг.
|
||||
- `best_effort` — включает расширенный парсинг.
|
||||
|
||||
ClickHouse может парсить базовый формат `YYYY-MM-DD HH:MM:SS` и все форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). Например, `'2018-06-08T01:02:03.000Z'`.
|
||||
ClickHouse может парсить базовый формат `YYYY-MM-DD HH:MM:SS` и все форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). Например, `2018-06-08T01:02:03.000Z`.
|
||||
|
||||
- `'basic'` — используется базовый парсер.
|
||||
- `basic` — используется базовый парсер.
|
||||
|
||||
ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS` или `YYYY-MM-DD`. Например, `'2019-08-20 10:18:56'` или `2019-08-20`.
|
||||
ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS` или `YYYY-MM-DD`. Например, `2019-08-20 10:18:56` или `2019-08-20`.
|
||||
|
||||
Значение по умолчанию: `'basic'`.
|
||||
Значение по умолчанию: `basic`.
|
||||
|
||||
См. также:
|
||||
|
||||
- [Тип данных DateTime.](../../sql-reference/data-types/datetime.md)
|
||||
- [Функции для работы с датой и временем.](../../sql-reference/functions/date-time-functions.md)
|
||||
|
||||
## date_time_output_format {#settings-date_time_output_format}
|
||||
|
||||
Позволяет выбрать разные выходные форматы текстового представления даты и времени.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- `simple` - простой выходной формат.
|
||||
|
||||
Выходные дата и время Clickhouse в формате `YYYY-MM-DD hh:mm:ss`. Например, `2019-08-20 10:18:56`. Расчет выполняется в соответствии с часовым поясом типа данных (если он есть) или часовым поясом сервера.
|
||||
|
||||
- `iso` - выходной формат ISO.
|
||||
|
||||
Выходные дата и время Clickhouse в формате [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ`. Например, `2019-08-20T10:18:56Z`. Обратите внимание, что выходные данные отображаются в формате UTC (`Z` означает UTC).
|
||||
|
||||
- `unix_timestamp` - выходной формат Unix.
|
||||
|
||||
Выходные дата и время в формате [Unix](https://en.wikipedia.org/wiki/Unix_time). Например `1566285536`.
|
||||
|
||||
Значение по умолчанию: `simple`.
|
||||
|
||||
См. также:
|
||||
|
||||
- [Тип данных DateTime](../../sql-reference/data-types/datetime.md)
|
||||
- [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md)
|
||||
|
||||
## join_default_strictness {#settings-join_default_strictness}
|
||||
|
||||
Устанавливает строгость по умолчанию для [JOIN](../../sql-reference/statements/select/join.md#select-join).
|
||||
|
@ -27,7 +27,7 @@ DateTime([timezone])
|
||||
|
||||
Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`.
|
||||
|
||||
ClickHouse отображает значения типа `DateTime` в формате `YYYY-MM-DD hh:mm:ss`. Отображение можно поменять с помощью функции [formatDateTime](../../sql-reference/data-types/datetime.md#formatdatetime).
|
||||
ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/settings.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime).
|
||||
|
||||
При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format).
|
||||
|
||||
|
@ -118,7 +118,7 @@ for (auto & stream : streams)
|
||||
stream.second->finalize();
|
||||
```
|
||||
|
||||
**18.** 行的某尾不应该包含空格。
|
||||
**18.** 行的末尾不应该包含空格。
|
||||
|
||||
**19.** 源文件应该用 UTF-8 编码。
|
||||
|
||||
|
@ -401,7 +401,7 @@ TTL date_time + INTERVAL 15 HOUR
|
||||
|
||||
### 列 TTL {#mergetree-column-ttl}
|
||||
|
||||
当列中的值过期时, ClickHouse会将它们替换成该列数据类型的默认值。如果数据片段中列的所有值均已过期,则ClickHouse 会从文件系统中的数据片段中此列。
|
||||
当列中的值过期时, ClickHouse会将它们替换成该列数据类型的默认值。如果数据片段中列的所有值均已过期,则ClickHouse 会从文件系统中的数据片段中删除此列。
|
||||
|
||||
`TTL`子句不能被用于主键字段。
|
||||
|
||||
|
@ -932,6 +932,10 @@ private:
|
||||
std::cerr << "Received exception from server (version "
|
||||
<< server_version << "):" << std::endl << "Code: "
|
||||
<< server_exception->code() << ". " << text << std::endl;
|
||||
if (is_interactive)
|
||||
{
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
if (client_exception)
|
||||
@ -939,6 +943,10 @@ private:
|
||||
fmt::print(stderr,
|
||||
"Error on processing query '{}':\n{}\n",
|
||||
full_query, client_exception->message());
|
||||
if (is_interactive)
|
||||
{
|
||||
fmt::print(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
// A debug check -- at least some exception must be set, if the error
|
||||
|
@ -190,6 +190,7 @@ add_object_library(clickhouse_processors_sources Processors/Sources)
|
||||
add_object_library(clickhouse_processors_merges Processors/Merges)
|
||||
add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms)
|
||||
add_object_library(clickhouse_processors_queryplan Processors/QueryPlan)
|
||||
add_object_library(clickhouse_processors_queryplan_optimizations Processors/QueryPlan/Optimizations)
|
||||
|
||||
set (DBMS_COMMON_LIBRARIES)
|
||||
# libgcc_s does not provide an implementation of an atomics library. Instead,
|
||||
|
@ -0,0 +1,2 @@
|
||||
add_executable(test-connect test_connect.cpp)
|
||||
target_link_libraries (test-connect PRIVATE dbms)
|
99
src/Client/tests/test_connect.cpp
Normal file
99
src/Client/tests/test_connect.cpp
Normal file
@ -0,0 +1,99 @@
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
#include <atomic>
|
||||
#include <Poco/Net/StreamSocket.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
|
||||
/** In a loop it connects to the server and immediately breaks the connection.
|
||||
* Using the SO_LINGER option, we ensure that the connection is terminated by sending a RST packet (not FIN).
|
||||
* Long time ago this behavior caused a bug in the TCPServer implementation in the Poco library.
|
||||
*/
|
||||
int main(int argc, char ** argv)
|
||||
try
|
||||
{
|
||||
size_t num_iterations = 1;
|
||||
size_t num_threads = 1;
|
||||
std::string host = "localhost";
|
||||
uint16_t port = 9000;
|
||||
|
||||
if (argc >= 2)
|
||||
num_iterations = DB::parse<size_t>(argv[1]);
|
||||
|
||||
if (argc >= 3)
|
||||
num_threads = DB::parse<size_t>(argv[2]);
|
||||
|
||||
if (argc >= 4)
|
||||
host = argv[3];
|
||||
|
||||
if (argc >= 5)
|
||||
port = DB::parse<uint16_t>(argv[4]);
|
||||
|
||||
std::atomic_bool cancel{false};
|
||||
std::vector<std::thread> threads(num_threads);
|
||||
for (auto & thread : threads)
|
||||
{
|
||||
thread = std::thread([&]
|
||||
{
|
||||
for (size_t i = 0; i < num_iterations && !cancel.load(std::memory_order_relaxed); ++i)
|
||||
{
|
||||
std::cerr << ".";
|
||||
|
||||
Poco::Net::SocketAddress address(host, port);
|
||||
|
||||
int fd = socket(PF_INET, SOCK_STREAM, IPPROTO_IP);
|
||||
|
||||
if (fd < 0)
|
||||
DB::throwFromErrno("Cannot create socket", 0);
|
||||
|
||||
linger linger_value;
|
||||
linger_value.l_onoff = 1;
|
||||
linger_value.l_linger = 0;
|
||||
|
||||
if (0 != setsockopt(fd, SOL_SOCKET, SO_LINGER, &linger_value, sizeof(linger_value)))
|
||||
DB::throwFromErrno("Cannot set linger", 0);
|
||||
|
||||
try
|
||||
{
|
||||
Stopwatch watch;
|
||||
|
||||
int res = connect(fd, address.addr(), address.length());
|
||||
|
||||
if (res != 0 && errno != EINPROGRESS && errno != EWOULDBLOCK)
|
||||
{
|
||||
close(fd);
|
||||
DB::throwFromErrno("Cannot connect", 0);
|
||||
}
|
||||
|
||||
close(fd);
|
||||
|
||||
if (watch.elapsedSeconds() > 0.1)
|
||||
{
|
||||
std::cerr << watch.elapsedSeconds() << "\n";
|
||||
cancel = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
std::cerr << e.displayText() << "\n";
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
for (auto & thread : threads)
|
||||
thread.join();
|
||||
|
||||
std::cerr << "\n";
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
std::cerr << e.displayText() << "\n";
|
||||
}
|
@ -289,7 +289,8 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
|
||||
|
||||
while (filt_pos < filt_end_sse)
|
||||
{
|
||||
int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
|
||||
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
|
||||
mask = ~mask;
|
||||
|
||||
if (0 == mask)
|
||||
{
|
||||
|
@ -356,7 +356,8 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
|
||||
|
||||
while (filt_pos < filt_end_sse)
|
||||
{
|
||||
int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
|
||||
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
|
||||
mask = ~mask;
|
||||
|
||||
if (0 == mask)
|
||||
{
|
||||
|
@ -17,13 +17,17 @@ namespace DB
|
||||
static UInt64 toBits64(const Int8 * bytes64)
|
||||
{
|
||||
static const __m128i zero16 = _mm_setzero_si128();
|
||||
return static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16)))
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16)))
|
||||
<< 16)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16)))
|
||||
<< 32)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16)))
|
||||
<< 48);
|
||||
UInt64 res =
|
||||
static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16)))
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16))) << 16)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16))) << 32)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16))) << 48);
|
||||
|
||||
return ~res;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -49,7 +53,7 @@ size_t countBytesInFilter(const UInt8 * filt, size_t sz)
|
||||
#endif
|
||||
|
||||
for (; pos < end; ++pos)
|
||||
count += *pos > 0;
|
||||
count += *pos != 0;
|
||||
|
||||
return count;
|
||||
}
|
||||
@ -82,7 +86,7 @@ size_t countBytesInFilterWithNull(const IColumn::Filter & filt, const UInt8 * nu
|
||||
#endif
|
||||
|
||||
for (; pos < end; ++pos)
|
||||
count += (*pos & ~*pos2) > 0;
|
||||
count += (*pos & ~*pos2) != 0;
|
||||
|
||||
return count;
|
||||
}
|
||||
@ -232,9 +236,10 @@ namespace
|
||||
|
||||
while (filt_pos < filt_end_aligned)
|
||||
{
|
||||
const auto mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
|
||||
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)),
|
||||
zero_vec));
|
||||
mask = ~mask;
|
||||
|
||||
if (mask == 0)
|
||||
{
|
||||
|
@ -120,9 +120,10 @@ inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_siz
|
||||
|
||||
for (size_t offset = min_size; offset < max_size; offset += 16)
|
||||
{
|
||||
uint16_t mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
|
||||
uint16_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(longest + offset)),
|
||||
zero16));
|
||||
mask = ~mask;
|
||||
|
||||
if (mask)
|
||||
{
|
||||
|
@ -163,6 +163,7 @@ private:
|
||||
friend class ActionsDAG;
|
||||
};
|
||||
|
||||
using BlockPtr = std::shared_ptr<Block>;
|
||||
using Blocks = std::vector<Block>;
|
||||
using BlocksList = std::list<Block>;
|
||||
using BlocksPtr = std::shared_ptr<Blocks>;
|
||||
|
@ -6,8 +6,10 @@
|
||||
#include <Core/MySQL/PacketsProtocolText.h>
|
||||
#include <Core/MySQL/PacketsReplication.h>
|
||||
#include <Core/MySQL/MySQLReplication.h>
|
||||
#include <Common/DNSResolver.h>
|
||||
#include <Poco/String.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using namespace Generic;
|
||||
|
@ -7,7 +7,6 @@
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Poco/Net/NetException.h>
|
||||
#include <Poco/Net/StreamSocket.h>
|
||||
#include <Common/DNSResolver.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/NetException.h>
|
||||
#include <Core/MySQL/IMySQLWritePacket.h>
|
||||
|
@ -139,6 +139,7 @@ class IColumn;
|
||||
\
|
||||
M(UInt64, min_bytes_to_use_direct_io, 0, "The minimum number of bytes for reading the data with O_DIRECT option during SELECT queries execution. 0 - disabled.", 0) \
|
||||
M(UInt64, min_bytes_to_use_mmap_io, 0, "The minimum number of bytes for reading the data with mmap option during SELECT queries execution. 0 - disabled.", 0) \
|
||||
M(Bool, checksum_on_read, true, "Validate checksums on reading. It is enabled by default and should be always enabled in production. Please do not expect any benefits in disabling this setting. It may only be used for experiments and benchmarks. The setting only applicable for tables of MergeTree family. Checksums are always validated for other table engines and when receiving data over network.", 0) \
|
||||
\
|
||||
M(Bool, force_index_by_date, 0, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \
|
||||
M(Bool, force_primary_key, 0, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
|
||||
|
@ -885,15 +885,17 @@ MutableColumnUniquePtr DataTypeLowCardinality::createColumnUniqueImpl(const IDat
|
||||
if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(&keys_type))
|
||||
type = nullable_type->getNestedType().get();
|
||||
|
||||
if (isString(type))
|
||||
WhichDataType which(type);
|
||||
|
||||
if (which.isString())
|
||||
return creator(static_cast<ColumnString *>(nullptr));
|
||||
if (isFixedString(type))
|
||||
else if (which.isFixedString())
|
||||
return creator(static_cast<ColumnFixedString *>(nullptr));
|
||||
if (typeid_cast<const DataTypeDate *>(type))
|
||||
else if (which.isDate())
|
||||
return creator(static_cast<ColumnVector<UInt16> *>(nullptr));
|
||||
if (typeid_cast<const DataTypeDateTime *>(type))
|
||||
else if (which.isDateTime())
|
||||
return creator(static_cast<ColumnVector<UInt32> *>(nullptr));
|
||||
if (isColumnedAsNumber(type))
|
||||
else if (which.isInt() || which.isUInt() || which.isFloat())
|
||||
{
|
||||
MutableColumnUniquePtr column;
|
||||
TypeListNativeNumbers::forEach(CreateColumnVector(column, *type, creator));
|
||||
|
@ -31,6 +31,7 @@ public:
|
||||
|
||||
bool canBeUsedInBitOperations() const override { return true; }
|
||||
bool canBeInsideNullable() const override { return true; }
|
||||
bool canBeInsideLowCardinality() const override { return false; }
|
||||
|
||||
bool canBePromoted() const override { return false; }
|
||||
};
|
||||
|
@ -13,11 +13,13 @@
|
||||
#include <IO/WriteBufferFromOStream.h>
|
||||
#include <ext/range.h>
|
||||
#include <ext/size.h>
|
||||
#include <ext/map.h>
|
||||
#include <ext/chrono_io.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include "CacheDictionary.inc.h"
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -130,8 +132,8 @@ const IDictionarySource * CacheDictionary::getSource() const
|
||||
void CacheDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
|
||||
{
|
||||
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_value);
|
||||
|
||||
getItemsNumberImpl<UInt64, UInt64>(*hierarchical_attribute, ids, out, [&](const size_t) { return null_value; });
|
||||
DictionaryDefaultValueExtractor<UInt64> default_value_extractor(null_value);
|
||||
getItemsNumberImpl<UInt64, UInt64>(*hierarchical_attribute, ids, out, default_value_extractor);
|
||||
}
|
||||
|
||||
|
||||
@ -249,34 +251,384 @@ void CacheDictionary::isInConstantVector(const Key child_id, const PaddedPODArra
|
||||
out[i] = std::find(ancestors.begin(), ancestors.end(), ancestor_ids[i]) != ancestors.end();
|
||||
}
|
||||
|
||||
void CacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
|
||||
ColumnPtr CacheDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
ColumnPtr result;
|
||||
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto & keys = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
auto keys_size = keys.size();
|
||||
|
||||
auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
const auto null_value = StringRef{std::get<String>(attribute.null_value)};
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
getItemsString(attribute, ids, out, [&](const size_t) { return null_value; });
|
||||
const auto & null_value = std::get<AttributeType>(attribute.null_value);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
getItemsString(attribute, keys, column.get(), default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
getItemsNumberImpl<AttributeType, AttributeType>(attribute, keys, out, default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void CacheDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
|
||||
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
|
||||
void CacheDictionary::getItemsNumberImpl(
|
||||
Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ResultArrayType<OutputType> & out,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
/// First fill everything with default values
|
||||
const auto rows = ext::size(ids);
|
||||
for (const auto row : ext::range(0, rows))
|
||||
out[row] = default_value_extractor[row];
|
||||
|
||||
getItemsString(attribute, ids, out, [&](const size_t row) { return def->getDataAt(row); });
|
||||
/// Maybe there are duplicate keys, so we remember their indices.
|
||||
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
|
||||
|
||||
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
|
||||
|
||||
size_t cache_hit = 0;
|
||||
size_t cache_not_found_count = 0;
|
||||
size_t cache_expired_cound = 0;
|
||||
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
auto insert_to_answer_routine = [&](size_t row, size_t idx)
|
||||
{
|
||||
auto & cell = cells[idx];
|
||||
if (!cell.isDefault())
|
||||
out[row] = static_cast<OutputType>(attribute_array[idx]);
|
||||
};
|
||||
|
||||
/// fetch up-to-date values, decide which ones require update
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
|
||||
/** cell should be updated if either:
|
||||
* 1. ids do not match,
|
||||
* 2. cell has expired,
|
||||
* 3. explicit defaults were specified and cell was set default. */
|
||||
|
||||
const auto [cell_idx, state] = findCellIdxForGet(id, now);
|
||||
|
||||
if (state == ResultState::FoundAndValid)
|
||||
{
|
||||
++cache_hit;
|
||||
insert_to_answer_routine(row, cell_idx);
|
||||
}
|
||||
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
|
||||
{
|
||||
++cache_not_found_count;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
}
|
||||
else if (state == ResultState::FoundButExpired)
|
||||
{
|
||||
cache_expired_cound++;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
|
||||
if (allow_read_expired_keys)
|
||||
insert_to_answer_routine(row, cell_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_cound);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows - cache_not_found_count - cache_expired_cound, std::memory_order_release);
|
||||
|
||||
if (!cache_not_found_count)
|
||||
{
|
||||
/// Nothing to update - return
|
||||
if (!cache_expired_cound)
|
||||
return;
|
||||
|
||||
/// Update async only if allow_read_expired_keys_is_enabledadd condvar usage and better code
|
||||
if (allow_read_expired_keys)
|
||||
{
|
||||
std::vector<Key> required_expired_ids;
|
||||
required_expired_ids.reserve(cache_expired_cound);
|
||||
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
/// request new values
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
|
||||
/// Nothing to do - return
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/// From this point we have to update all keys sync.
|
||||
/// Maybe allow_read_expired_keys_from_cache_dictionary is disabled
|
||||
/// and there no cache_not_found_ids but some cache_expired.
|
||||
|
||||
std::vector<Key> required_ids;
|
||||
required_ids.reserve(cache_not_found_count + cache_expired_cound);
|
||||
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
/// Request new values
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
waitForCurrentUpdateFinish(update_unit_ptr);
|
||||
|
||||
/// Add updated keys to answer.
|
||||
|
||||
const size_t attribute_index = getAttributeIndex(attribute.name);
|
||||
|
||||
for (auto & [key, value] : update_unit_ptr->found_ids)
|
||||
{
|
||||
if (value.found)
|
||||
{
|
||||
for (const size_t row : cache_expired_or_not_found_ids[key])
|
||||
out[row] = std::get<OutputType>(value.values[attribute_index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CacheDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
|
||||
void CacheDictionary::getItemsString(
|
||||
Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ColumnString * out,
|
||||
DictionaryDefaultValueExtractor<String> & default_value_extractor) const
|
||||
{
|
||||
auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto rows = ext::size(ids);
|
||||
|
||||
getItemsString(attribute, ids, out, [&](const size_t) { return StringRef{def}; });
|
||||
/// Save on some allocations.
|
||||
out->getOffsets().reserve(rows);
|
||||
|
||||
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
|
||||
|
||||
auto found_outdated_values = false;
|
||||
|
||||
/// Perform optimistic version, fallback to pessimistic if failed.
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
/// Fetch up-to-date values, discard on fail.
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
const auto [cell_idx, state] = findCellIdxForGet(id, now);
|
||||
|
||||
if (state == ResultState::FoundAndValid)
|
||||
{
|
||||
auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
}
|
||||
else
|
||||
{
|
||||
found_outdated_values = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Optimistic code completed successfully.
|
||||
if (!found_outdated_values)
|
||||
{
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows, std::memory_order_release);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, ids.size());
|
||||
return;
|
||||
}
|
||||
|
||||
/// Now onto the pessimistic one, discard possible partial results from the optimistic path.
|
||||
out->getChars().resize_assume_reserved(0);
|
||||
out->getOffsets().resize_assume_reserved(0);
|
||||
|
||||
/// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
|
||||
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
|
||||
/// we are going to store every string separately
|
||||
std::unordered_map<Key, String> local_cache;
|
||||
|
||||
size_t cache_not_found_count = 0;
|
||||
size_t cache_expired_count = 0;
|
||||
|
||||
size_t total_length = 0;
|
||||
size_t cache_hit = 0;
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
auto insert_value_routine = [&](size_t row, size_t id, size_t cell_idx)
|
||||
{
|
||||
const auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
|
||||
|
||||
/// Do not store default, but count it in total length.
|
||||
if (!cell.isDefault())
|
||||
local_cache[id] = String{string_ref};
|
||||
|
||||
total_length += string_ref.size + 1;
|
||||
};
|
||||
|
||||
for (const auto row : ext::range(0, ids.size()))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
const auto [cell_idx, state] = findCellIdxForGet(id, now);
|
||||
|
||||
if (state == ResultState::FoundAndValid)
|
||||
{
|
||||
++cache_hit;
|
||||
insert_value_routine(row, id, cell_idx);
|
||||
}
|
||||
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
|
||||
{
|
||||
++cache_not_found_count;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
}
|
||||
else if (state == ResultState::FoundButExpired)
|
||||
{
|
||||
++cache_expired_count;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
|
||||
if (allow_read_expired_keys)
|
||||
insert_value_routine(row, id, cell_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_count);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows - cache_expired_count - cache_not_found_count, std::memory_order_release);
|
||||
|
||||
/// Async update of expired keys.
|
||||
if (!cache_not_found_count)
|
||||
{
|
||||
if (allow_read_expired_keys && cache_expired_count)
|
||||
{
|
||||
std::vector<Key> required_expired_ids;
|
||||
required_expired_ids.reserve(cache_expired_count);
|
||||
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
|
||||
/// Insert all found keys and defaults to output array.
|
||||
out->getChars().reserve(total_length);
|
||||
|
||||
for (const auto row : ext::range(0, ext::size(ids)))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
StringRef value;
|
||||
|
||||
/// Previously we stored found keys in map.
|
||||
const auto it = local_cache.find(id);
|
||||
if (it != local_cache.end())
|
||||
value = StringRef(it->second);
|
||||
else
|
||||
value = default_value_extractor[row];
|
||||
|
||||
out->insertData(value.data, value.size);
|
||||
}
|
||||
|
||||
/// Nothing to do else.
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/// We will request both cache_not_found_ids and cache_expired_ids sync.
|
||||
std::vector<Key> required_ids;
|
||||
required_ids.reserve(cache_not_found_count + cache_expired_count);
|
||||
std::transform(
|
||||
std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
waitForCurrentUpdateFinish(update_unit_ptr);
|
||||
|
||||
const size_t attribute_index = getAttributeIndex(attribute.name);
|
||||
|
||||
/// Only calculate the total length.
|
||||
for (auto & [key, value] : update_unit_ptr->found_ids)
|
||||
{
|
||||
if (value.found)
|
||||
{
|
||||
const auto found_value_ref = std::get<String>(value.values[attribute_index]);
|
||||
total_length += (found_value_ref.size() + 1) * cache_expired_or_not_found_ids[key].size();
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto row : cache_expired_or_not_found_ids[key])
|
||||
total_length += default_value_extractor[row].size + 1;
|
||||
}
|
||||
}
|
||||
|
||||
out->getChars().reserve(total_length);
|
||||
|
||||
for (const auto row : ext::range(0, ext::size(ids)))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
StringRef value;
|
||||
|
||||
/// We have two maps: found in cache and found in source.
|
||||
const auto local_it = local_cache.find(id);
|
||||
if (local_it != local_cache.end())
|
||||
value = StringRef(local_it->second);
|
||||
else
|
||||
{
|
||||
const auto found_it = update_unit_ptr->found_ids.find(id);
|
||||
|
||||
/// Previously we didn't store defaults in local cache.
|
||||
if (found_it != update_unit_ptr->found_ids.end() && found_it->second.found)
|
||||
value = std::get<String>(found_it->second.values[attribute_index]);
|
||||
else
|
||||
value = default_value_extractor[row];
|
||||
}
|
||||
|
||||
out->insertData(value.data, value.size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class... Ts>
|
||||
struct Overloaded : Ts... {using Ts::operator()...;};
|
||||
|
||||
@ -375,8 +727,14 @@ size_t CacheDictionary::findCellIdxForSet(const Key & id) const
|
||||
return oldest_id;
|
||||
}
|
||||
|
||||
void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
|
||||
ColumnUInt8::Ptr CacheDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
|
||||
{
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
|
||||
auto result = ColumnUInt8::create(ext::size(ids));
|
||||
auto& out = result->getData();
|
||||
|
||||
/// There are three types of ids.
|
||||
/// - Valid ids. These ids are presented in local cache and their lifetime is not expired.
|
||||
/// - CacheExpired ids. Ids that are in local cache, but their values are rotted (lifetime is expired).
|
||||
@ -444,7 +802,7 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
|
||||
{
|
||||
/// Nothing to update - return;
|
||||
if (!cache_expired_count)
|
||||
return;
|
||||
return result;
|
||||
|
||||
if (allow_read_expired_keys)
|
||||
{
|
||||
@ -458,7 +816,7 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
/// Update is async - no need to wait.
|
||||
return;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
@ -483,6 +841,8 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
|
||||
for (const auto row : cache_expired_or_not_found_ids[key])
|
||||
out[row] = true;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@ -707,7 +1067,7 @@ PaddedPODArray<CacheDictionary::Key> CacheDictionary::getCachedIds() const
|
||||
|
||||
BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<CacheDictionary, Key>;
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<Key>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getCachedIds(), column_names);
|
||||
}
|
||||
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
@ -119,77 +120,20 @@ public:
|
||||
|
||||
std::exception_ptr getLastException() const override;
|
||||
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
|
||||
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void
|
||||
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
|
||||
const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
|
||||
|
||||
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
private:
|
||||
@ -260,12 +204,18 @@ private:
|
||||
/* NOLINTNEXTLINE(readability-convert-member-functions-to-static) */
|
||||
Attribute createAttributeWithTypeAndName(const AttributeUnderlyingType type, const String & name, const Field & null_value);
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
|
||||
void getItemsNumberImpl(
|
||||
Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
|
||||
Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ResultArrayType<OutputType> & out,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename DefaultGetter>
|
||||
void getItemsString(Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const;
|
||||
void getItemsString(
|
||||
Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ColumnString * out,
|
||||
DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
|
||||
|
||||
PaddedPODArray<Key> getCachedIds() const;
|
||||
|
||||
@ -456,5 +406,6 @@ private:
|
||||
mutable std::condition_variable is_update_finished;
|
||||
|
||||
std::atomic<bool> finished{false};
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,368 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include "CacheDictionary.h"
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Common/ProfilingScopedRWLock.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <ext/chrono_io.h>
|
||||
#include <ext/map.h>
|
||||
#include <ext/range.h>
|
||||
#include <ext/size.h>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event DictCacheKeysRequested;
|
||||
extern const Event DictCacheKeysRequestedMiss;
|
||||
extern const Event DictCacheKeysRequestedFound;
|
||||
extern const Event DictCacheKeysExpired;
|
||||
extern const Event DictCacheKeysNotFound;
|
||||
extern const Event DictCacheKeysHit;
|
||||
extern const Event DictCacheRequestTimeNs;
|
||||
extern const Event DictCacheRequests;
|
||||
extern const Event DictCacheLockWriteNs;
|
||||
extern const Event DictCacheLockReadNs;
|
||||
}
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric DictCacheRequests;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultGetter>
|
||||
void CacheDictionary::getItemsNumberImpl(
|
||||
Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
|
||||
{
|
||||
/// First fill everything with default values
|
||||
const auto rows = ext::size(ids);
|
||||
for (const auto row : ext::range(0, rows))
|
||||
out[row] = get_default(row);
|
||||
|
||||
/// Maybe there are duplicate keys, so we remember their indices.
|
||||
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
|
||||
|
||||
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
|
||||
|
||||
size_t cache_hit = 0;
|
||||
size_t cache_not_found_count = 0;
|
||||
size_t cache_expired_cound = 0;
|
||||
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
auto insert_to_answer_routine = [&](size_t row, size_t idx)
|
||||
{
|
||||
auto & cell = cells[idx];
|
||||
if (!cell.isDefault())
|
||||
out[row] = static_cast<OutputType>(attribute_array[idx]);
|
||||
};
|
||||
|
||||
/// fetch up-to-date values, decide which ones require update
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
|
||||
/** cell should be updated if either:
|
||||
* 1. ids do not match,
|
||||
* 2. cell has expired,
|
||||
* 3. explicit defaults were specified and cell was set default. */
|
||||
|
||||
const auto [cell_idx, state] = findCellIdxForGet(id, now);
|
||||
|
||||
if (state == ResultState::FoundAndValid)
|
||||
{
|
||||
++cache_hit;
|
||||
insert_to_answer_routine(row, cell_idx);
|
||||
}
|
||||
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
|
||||
{
|
||||
++cache_not_found_count;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
}
|
||||
else if (state == ResultState::FoundButExpired)
|
||||
{
|
||||
cache_expired_cound++;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
|
||||
if (allow_read_expired_keys)
|
||||
insert_to_answer_routine(row, cell_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_cound);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows - cache_not_found_count - cache_expired_cound, std::memory_order_release);
|
||||
|
||||
if (!cache_not_found_count)
|
||||
{
|
||||
/// Nothing to update - return
|
||||
if (!cache_expired_cound)
|
||||
return;
|
||||
|
||||
/// Update async only if allow_read_expired_keys_is_enabledadd condvar usage and better code
|
||||
if (allow_read_expired_keys)
|
||||
{
|
||||
std::vector<Key> required_expired_ids;
|
||||
required_expired_ids.reserve(cache_expired_cound);
|
||||
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
/// request new values
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
|
||||
/// Nothing to do - return
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/// From this point we have to update all keys sync.
|
||||
/// Maybe allow_read_expired_keys_from_cache_dictionary is disabled
|
||||
/// and there no cache_not_found_ids but some cache_expired.
|
||||
|
||||
std::vector<Key> required_ids;
|
||||
required_ids.reserve(cache_not_found_count + cache_expired_cound);
|
||||
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
/// Request new values
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
waitForCurrentUpdateFinish(update_unit_ptr);
|
||||
|
||||
/// Add updated keys to answer.
|
||||
|
||||
const size_t attribute_index = getAttributeIndex(attribute.name);
|
||||
|
||||
for (auto & [key, value] : update_unit_ptr->found_ids)
|
||||
{
|
||||
if (value.found)
|
||||
{
|
||||
for (const size_t row : cache_expired_or_not_found_ids[key])
|
||||
out[row] = std::get<OutputType>(value.values[attribute_index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename DefaultGetter>
|
||||
void CacheDictionary::getItemsString(
|
||||
Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const
|
||||
{
|
||||
const auto rows = ext::size(ids);
|
||||
|
||||
/// Save on some allocations.
|
||||
out->getOffsets().reserve(rows);
|
||||
|
||||
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
|
||||
|
||||
auto found_outdated_values = false;
|
||||
|
||||
/// Perform optimistic version, fallback to pessimistic if failed.
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
/// Fetch up-to-date values, discard on fail.
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
const auto [cell_idx, state] = findCellIdxForGet(id, now);
|
||||
|
||||
if (state == ResultState::FoundAndValid)
|
||||
{
|
||||
auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
}
|
||||
else
|
||||
{
|
||||
found_outdated_values = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Optimistic code completed successfully.
|
||||
if (!found_outdated_values)
|
||||
{
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows, std::memory_order_release);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, ids.size());
|
||||
return;
|
||||
}
|
||||
|
||||
/// Now onto the pessimistic one, discard possible partial results from the optimistic path.
|
||||
out->getChars().resize_assume_reserved(0);
|
||||
out->getOffsets().resize_assume_reserved(0);
|
||||
|
||||
/// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
|
||||
std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
|
||||
/// we are going to store every string separately
|
||||
std::unordered_map<Key, String> local_cache;
|
||||
|
||||
size_t cache_not_found_count = 0;
|
||||
size_t cache_expired_count = 0;
|
||||
|
||||
size_t total_length = 0;
|
||||
size_t cache_hit = 0;
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
auto insert_value_routine = [&](size_t row, size_t id, size_t cell_idx)
|
||||
{
|
||||
const auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
|
||||
|
||||
/// Do not store default, but count it in total length.
|
||||
if (!cell.isDefault())
|
||||
local_cache[id] = String{string_ref};
|
||||
|
||||
total_length += string_ref.size + 1;
|
||||
};
|
||||
|
||||
for (const auto row : ext::range(0, ids.size()))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
const auto [cell_idx, state] = findCellIdxForGet(id, now);
|
||||
|
||||
if (state == ResultState::FoundAndValid)
|
||||
{
|
||||
++cache_hit;
|
||||
insert_value_routine(row, id, cell_idx);
|
||||
}
|
||||
else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
|
||||
{
|
||||
++cache_not_found_count;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
}
|
||||
else if (state == ResultState::FoundButExpired)
|
||||
{
|
||||
++cache_expired_count;
|
||||
cache_expired_or_not_found_ids[id].push_back(row);
|
||||
|
||||
if (allow_read_expired_keys)
|
||||
insert_value_routine(row, id, cell_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_count);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows - cache_expired_count - cache_not_found_count, std::memory_order_release);
|
||||
|
||||
/// Async update of expired keys.
|
||||
if (!cache_not_found_count)
|
||||
{
|
||||
if (allow_read_expired_keys && cache_expired_count)
|
||||
{
|
||||
std::vector<Key> required_expired_ids;
|
||||
required_expired_ids.reserve(cache_expired_count);
|
||||
std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
|
||||
/// Insert all found keys and defaults to output array.
|
||||
out->getChars().reserve(total_length);
|
||||
|
||||
for (const auto row : ext::range(0, ext::size(ids)))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
StringRef value;
|
||||
|
||||
/// Previously we stored found keys in map.
|
||||
const auto it = local_cache.find(id);
|
||||
if (it != local_cache.end())
|
||||
value = StringRef(it->second);
|
||||
else
|
||||
value = get_default(row);
|
||||
|
||||
out->insertData(value.data, value.size);
|
||||
}
|
||||
|
||||
/// Nothing to do else.
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/// We will request both cache_not_found_ids and cache_expired_ids sync.
|
||||
std::vector<Key> required_ids;
|
||||
required_ids.reserve(cache_not_found_count + cache_expired_count);
|
||||
std::transform(
|
||||
std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
|
||||
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
|
||||
|
||||
auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
|
||||
|
||||
tryPushToUpdateQueueOrThrow(update_unit_ptr);
|
||||
waitForCurrentUpdateFinish(update_unit_ptr);
|
||||
|
||||
const size_t attribute_index = getAttributeIndex(attribute.name);
|
||||
|
||||
/// Only calculate the total length.
|
||||
for (auto & [key, value] : update_unit_ptr->found_ids)
|
||||
{
|
||||
if (value.found)
|
||||
{
|
||||
const auto found_value_ref = std::get<String>(value.values[attribute_index]);
|
||||
total_length += (found_value_ref.size() + 1) * cache_expired_or_not_found_ids[key].size();
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto row : cache_expired_or_not_found_ids[key])
|
||||
total_length += get_default(row).size + 1;
|
||||
}
|
||||
}
|
||||
|
||||
out->getChars().reserve(total_length);
|
||||
|
||||
for (const auto row : ext::range(0, ext::size(ids)))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
StringRef value;
|
||||
|
||||
/// We have two maps: found in cache and found in source.
|
||||
const auto local_it = local_cache.find(id);
|
||||
if (local_it != local_cache.end())
|
||||
value = StringRef(local_it->second);
|
||||
else
|
||||
{
|
||||
const auto found_it = update_unit_ptr->found_ids.find(id);
|
||||
|
||||
/// Previously we didn't store defaults in local cache.
|
||||
if (found_it != update_unit_ptr->found_ids.end() && found_it->second.found)
|
||||
value = std::get<String>(found_it->second.values[attribute_index]);
|
||||
else
|
||||
value = get_default(row);
|
||||
}
|
||||
|
||||
out->insertData(value.data, value.size);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
#include <Dictionaries/CacheDictionary.h>
|
||||
#include <Dictionaries/CacheDictionary.inc.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
#define DEFINE(TYPE) \
|
||||
void CacheDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) \
|
||||
const \
|
||||
{ \
|
||||
auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
const auto null_value = std::get<TYPE>(attribute.null_value); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t) { return null_value; }); \
|
||||
}
|
||||
|
||||
DEFINE(UInt8)
|
||||
DEFINE(UInt16)
|
||||
DEFINE(UInt32)
|
||||
DEFINE(UInt64)
|
||||
DEFINE(UInt128)
|
||||
DEFINE(Int8)
|
||||
DEFINE(Int16)
|
||||
DEFINE(Int32)
|
||||
DEFINE(Int64)
|
||||
DEFINE(Float32)
|
||||
DEFINE(Float64)
|
||||
DEFINE(Decimal32)
|
||||
DEFINE(Decimal64)
|
||||
DEFINE(Decimal128)
|
||||
|
||||
#undef DEFINE
|
||||
}
|
@ -1,34 +0,0 @@
|
||||
#include <Dictionaries/CacheDictionary.h>
|
||||
#include <Dictionaries/CacheDictionary.inc.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
#define DEFINE(TYPE) \
|
||||
void CacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
|
||||
DEFINE(UInt8)
|
||||
DEFINE(UInt16)
|
||||
DEFINE(UInt32)
|
||||
DEFINE(UInt64)
|
||||
DEFINE(UInt128)
|
||||
DEFINE(Int8)
|
||||
DEFINE(Int16)
|
||||
DEFINE(Int32)
|
||||
DEFINE(Int64)
|
||||
DEFINE(Float32)
|
||||
DEFINE(Float64)
|
||||
DEFINE(Decimal32)
|
||||
DEFINE(Decimal64)
|
||||
DEFINE(Decimal128)
|
||||
|
||||
#undef DEFINE
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
#include <Dictionaries/CacheDictionary.h>
|
||||
#include <Dictionaries/CacheDictionary.inc.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
#define DEFINE(TYPE) \
|
||||
void CacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t) { return def; }); \
|
||||
}
|
||||
|
||||
DEFINE(UInt8)
|
||||
DEFINE(UInt16)
|
||||
DEFINE(UInt32)
|
||||
DEFINE(UInt64)
|
||||
DEFINE(UInt128)
|
||||
DEFINE(Int8)
|
||||
DEFINE(Int16)
|
||||
DEFINE(Int32)
|
||||
DEFINE(Int64)
|
||||
DEFINE(Float32)
|
||||
DEFINE(Float64)
|
||||
DEFINE(Decimal32)
|
||||
DEFINE(Decimal64)
|
||||
DEFINE(Decimal128)
|
||||
|
||||
#undef DEFINE
|
||||
}
|
@ -10,7 +10,8 @@
|
||||
#include <ext/range.h>
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -70,48 +71,50 @@ ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(
|
||||
createAttributes();
|
||||
}
|
||||
|
||||
|
||||
void ComplexKeyCacheDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto null_value = StringRef{std::get<String>(attribute.null_values)};
|
||||
|
||||
getItemsString(attribute, key_columns, out, [&](const size_t) { return null_value; });
|
||||
}
|
||||
|
||||
void ComplexKeyCacheDictionary::getString(
|
||||
ColumnPtr ComplexKeyCacheDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsString(attribute, key_columns, out, [&](const size_t row) { return def->getDataAt(row); });
|
||||
}
|
||||
|
||||
void ComplexKeyCacheDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
ColumnPtr result;
|
||||
|
||||
auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
getItemsString(attribute, key_columns, out, [&](const size_t) { return StringRef{def}; });
|
||||
auto keys_size = key_columns.front()->size();
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto & null_value = std::get<AttributeType>(attribute.null_values);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
getItemsString(attribute, key_columns, out, default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
getItemsNumberImpl<AttributeType, AttributeType>(attribute, key_columns, out, default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// returns cell_idx (always valid for replacing), 'cell is valid' flag, 'cell is outdated' flag,
|
||||
@ -158,15 +161,21 @@ ComplexKeyCacheDictionary::findCellIdx(const StringRef & key, const CellMetadata
|
||||
return {oldest_id, false, false};
|
||||
}
|
||||
|
||||
void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
|
||||
ColumnUInt8::Ptr ComplexKeyCacheDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
|
||||
auto result = ColumnUInt8::create(rows_num);
|
||||
auto& out = result->getData();
|
||||
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
out[row] = false;
|
||||
|
||||
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
|
||||
MapType<std::vector<size_t>> outdated_keys;
|
||||
|
||||
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
Arena temporary_keys_pool;
|
||||
@ -212,7 +221,7 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
|
||||
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
|
||||
|
||||
if (outdated_keys.empty())
|
||||
return;
|
||||
return result;
|
||||
|
||||
std::vector<size_t> required_rows(outdated_keys.size());
|
||||
std::transform(
|
||||
@ -233,8 +242,395 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
|
||||
for (const auto out_idx : outdated_keys[key])
|
||||
out[out_idx] = false;
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
|
||||
void ComplexKeyCacheDictionary::getItemsNumberImpl(
|
||||
Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
PaddedPODArray<OutputType> & out,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
|
||||
MapType<std::vector<size_t>> outdated_keys;
|
||||
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
|
||||
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
Arena temporary_keys_pool;
|
||||
PODArray<StringRef> keys_array(rows_num);
|
||||
|
||||
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
/// fetch up-to-date values, decide which ones require update
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
keys_array[row] = key;
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
/** cell should be updated if either:
|
||||
* 1. keys (or hash) do not match,
|
||||
* 2. cell has expired,
|
||||
* 3. explicit defaults were specified and cell was set default. */
|
||||
|
||||
if (!find_result.valid)
|
||||
{
|
||||
outdated_keys[key].push_back(row);
|
||||
if (find_result.outdated)
|
||||
++cache_expired;
|
||||
else
|
||||
++cache_not_found;
|
||||
}
|
||||
else
|
||||
{
|
||||
++cache_hit;
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
const auto & cell = cells[cell_idx];
|
||||
out[row] = cell.isDefault() ? default_value_extractor[row] : static_cast<OutputType>(attribute_array[cell_idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
query_count.fetch_add(rows_num, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
|
||||
|
||||
if (outdated_keys.empty())
|
||||
return;
|
||||
|
||||
std::vector<size_t> required_rows(outdated_keys.size());
|
||||
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
|
||||
{
|
||||
return pair.getMapped().front();
|
||||
});
|
||||
|
||||
/// request new values
|
||||
update(
|
||||
key_columns,
|
||||
keys_array,
|
||||
required_rows,
|
||||
[&](const StringRef key, const size_t cell_idx)
|
||||
{
|
||||
for (const auto row : outdated_keys[key])
|
||||
out[row] = static_cast<OutputType>(attribute_array[cell_idx]);
|
||||
},
|
||||
[&](const StringRef key, const size_t)
|
||||
{
|
||||
for (const auto row : outdated_keys[key])
|
||||
out[row] = default_value_extractor[row];
|
||||
});
|
||||
}
|
||||
|
||||
void ComplexKeyCacheDictionary::getItemsString(
|
||||
Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ColumnString * out,
|
||||
DictionaryDefaultValueExtractor<String> & default_value_extractor) const
|
||||
{
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
/// save on some allocations
|
||||
out->getOffsets().reserve(rows_num);
|
||||
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
Arena temporary_keys_pool;
|
||||
|
||||
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
|
||||
|
||||
auto found_outdated_values = false;
|
||||
|
||||
/// perform optimistic version, fallback to pessimistic if failed
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
/// fetch up-to-date values, discard on fail
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
SCOPE_EXIT(temporary_keys_pool.rollback(key.size));
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
if (!find_result.valid)
|
||||
{
|
||||
found_outdated_values = true;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
const auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// optimistic code completed successfully
|
||||
if (!found_outdated_values)
|
||||
{
|
||||
query_count.fetch_add(rows_num, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows_num, std::memory_order_release);
|
||||
return;
|
||||
}
|
||||
|
||||
/// now onto the pessimistic one, discard possible partial results from the optimistic path
|
||||
out->getChars().resize_assume_reserved(0);
|
||||
out->getOffsets().resize_assume_reserved(0);
|
||||
|
||||
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
|
||||
MapType<std::vector<size_t>> outdated_keys;
|
||||
/// we are going to store every string separately
|
||||
MapType<StringRef> map;
|
||||
PODArray<StringRef> keys_array(rows_num);
|
||||
|
||||
size_t total_length = 0;
|
||||
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
keys_array[row] = key;
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
if (!find_result.valid)
|
||||
{
|
||||
outdated_keys[key].push_back(row);
|
||||
if (find_result.outdated)
|
||||
++cache_expired;
|
||||
else
|
||||
++cache_not_found;
|
||||
}
|
||||
else
|
||||
{
|
||||
++cache_hit;
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
const auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
|
||||
|
||||
if (!cell.isDefault())
|
||||
map[key] = copyIntoArena(string_ref, temporary_keys_pool);
|
||||
|
||||
total_length += string_ref.size + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
|
||||
query_count.fetch_add(rows_num, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
|
||||
|
||||
/// request new values
|
||||
if (!outdated_keys.empty())
|
||||
{
|
||||
std::vector<size_t> required_rows(outdated_keys.size());
|
||||
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
|
||||
{
|
||||
return pair.getMapped().front();
|
||||
});
|
||||
|
||||
update(
|
||||
key_columns,
|
||||
keys_array,
|
||||
required_rows,
|
||||
[&](const StringRef key, const size_t cell_idx)
|
||||
{
|
||||
const StringRef attribute_value = attribute_array[cell_idx];
|
||||
|
||||
/// We must copy key and value to own memory, because it may be replaced with another
|
||||
/// in next iterations of inner loop of update.
|
||||
const StringRef copied_key = copyIntoArena(key, temporary_keys_pool);
|
||||
const StringRef copied_value = copyIntoArena(attribute_value, temporary_keys_pool);
|
||||
|
||||
map[copied_key] = copied_value;
|
||||
total_length += (attribute_value.size + 1) * outdated_keys[key].size();
|
||||
},
|
||||
[&](const StringRef key, const size_t)
|
||||
{
|
||||
for (const auto row : outdated_keys[key])
|
||||
total_length += default_value_extractor[row].size + 1;
|
||||
});
|
||||
}
|
||||
|
||||
out->getChars().reserve(total_length);
|
||||
|
||||
for (const auto row : ext::range(0, ext::size(keys_array)))
|
||||
{
|
||||
const StringRef key = keys_array[row];
|
||||
auto * const it = map.find(key);
|
||||
const auto string_ref = it ? it->getMapped() : default_value_extractor[row];
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename PresentKeyHandler, typename AbsentKeyHandler>
|
||||
void ComplexKeyCacheDictionary::update(
|
||||
const Columns & in_key_columns,
|
||||
const PODArray<StringRef> & in_keys,
|
||||
const std::vector<size_t> & in_requested_rows,
|
||||
PresentKeyHandler && on_cell_updated,
|
||||
AbsentKeyHandler && on_key_not_found) const
|
||||
{
|
||||
MapType<bool> remaining_keys{in_requested_rows.size()};
|
||||
for (const auto row : in_requested_rows)
|
||||
remaining_keys.insert({in_keys[row], false});
|
||||
|
||||
std::uniform_int_distribution<UInt64> distribution(dict_lifetime.min_sec, dict_lifetime.max_sec);
|
||||
|
||||
const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
|
||||
{
|
||||
Stopwatch watch;
|
||||
auto stream = source_ptr->loadKeys(in_key_columns, in_requested_rows);
|
||||
stream->readPrefix();
|
||||
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
|
||||
const auto attributes_size = attributes.size();
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
/// cache column pointers
|
||||
const auto key_columns = ext::map<Columns>(
|
||||
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
|
||||
|
||||
const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
|
||||
{
|
||||
return block.safeGetByPosition(keys_size + attribute_idx).column;
|
||||
});
|
||||
|
||||
const auto rows_num = block.rows();
|
||||
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
auto key = allocKey(row, key_columns, keys);
|
||||
const auto hash = StringRefHash{}(key);
|
||||
const auto find_result = findCellIdx(key, now, hash);
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
auto & cell = cells[cell_idx];
|
||||
|
||||
for (const auto attribute_idx : ext::range(0, attributes.size()))
|
||||
{
|
||||
const auto & attribute_column = *attribute_columns[attribute_idx];
|
||||
auto & attribute = attributes[attribute_idx];
|
||||
|
||||
setAttributeValue(attribute, cell_idx, attribute_column[row]);
|
||||
}
|
||||
|
||||
/// if cell id is zero and zero does not map to this cell, then the cell is unused
|
||||
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
|
||||
element_count.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
/// handle memory allocated for old key
|
||||
if (key == cell.key)
|
||||
{
|
||||
freeKey(key);
|
||||
key = cell.key;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// new key is different from the old one
|
||||
if (cell.key.data)
|
||||
freeKey(cell.key);
|
||||
|
||||
cell.key = key;
|
||||
}
|
||||
|
||||
cell.hash = hash;
|
||||
|
||||
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
|
||||
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
|
||||
else
|
||||
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
|
||||
|
||||
/// inform caller
|
||||
on_cell_updated(key, cell_idx);
|
||||
/// mark corresponding id as found
|
||||
remaining_keys[key] = true;
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, in_requested_rows.size());
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed());
|
||||
}
|
||||
|
||||
size_t found_num = 0;
|
||||
size_t not_found_num = 0;
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
/// Check which ids have not been found and require setting null_value
|
||||
for (const auto & key_found_pair : remaining_keys)
|
||||
{
|
||||
if (key_found_pair.getMapped())
|
||||
{
|
||||
++found_num;
|
||||
continue;
|
||||
}
|
||||
|
||||
++not_found_num;
|
||||
|
||||
auto key = key_found_pair.getKey();
|
||||
const auto hash = StringRefHash{}(key);
|
||||
const auto find_result = findCellIdx(key, now, hash);
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
auto & cell = cells[cell_idx];
|
||||
|
||||
/// Set null_value for each attribute
|
||||
for (auto & attribute : attributes)
|
||||
setDefaultAttributeValue(attribute, cell_idx);
|
||||
|
||||
/// Check if cell had not been occupied before and increment element counter if it hadn't
|
||||
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
|
||||
element_count.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
if (key == cell.key)
|
||||
key = cell.key;
|
||||
else
|
||||
{
|
||||
if (cell.key.data)
|
||||
freeKey(cell.key);
|
||||
|
||||
/// copy key from temporary pool
|
||||
key = copyKey(key);
|
||||
cell.key = key;
|
||||
}
|
||||
|
||||
cell.hash = hash;
|
||||
|
||||
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
|
||||
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
|
||||
else
|
||||
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
|
||||
|
||||
cell.setDefault();
|
||||
|
||||
/// inform caller that the cell has not been found
|
||||
on_key_not_found(key, cell_idx);
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num);
|
||||
}
|
||||
|
||||
|
||||
void ComplexKeyCacheDictionary::createAttributes()
|
||||
{
|
||||
const auto attributes_size = dict_struct.attributes.size();
|
||||
@ -263,6 +659,102 @@ ComplexKeyCacheDictionary::Attribute & ComplexKeyCacheDictionary::getAttribute(c
|
||||
return attributes[it->second];
|
||||
}
|
||||
|
||||
void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute, const size_t idx) const
|
||||
{
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
const auto & null_value_ref = std::get<String>(attribute.null_values);
|
||||
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
|
||||
|
||||
if (string_ref.data != null_value_ref.data())
|
||||
{
|
||||
if (string_ref.data)
|
||||
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
|
||||
|
||||
string_ref = StringRef{null_value_ref};
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::get<ContainerPtrType<AttributeType>>(attribute.arrays)[idx] = std::get<AttributeType>(attribute.null_values);
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
ComplexKeyCacheDictionary::Attribute
|
||||
ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||
{
|
||||
Attribute attr{type, {}, {}};
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
attr.null_values = null_value.get<String>();
|
||||
attr.arrays = std::make_unique<ContainerType<StringRef>>(size);
|
||||
bytes_allocated += size * sizeof(StringRef);
|
||||
if (!string_arena)
|
||||
string_arena = std::make_unique<ArenaWithFreeLists>();
|
||||
}
|
||||
else
|
||||
{
|
||||
attr.null_values = AttributeType(null_value.get<NearestFieldType<AttributeType>>()); /* NOLINT */
|
||||
attr.arrays = std::make_unique<ContainerType<AttributeType>>(size); /* NOLINT */
|
||||
bytes_allocated += size * sizeof(AttributeType);
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const size_t idx, const Field & value) const
|
||||
{
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
const auto & string = value.get<String>();
|
||||
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
|
||||
const auto & null_value_ref = std::get<String>(attribute.null_values);
|
||||
|
||||
/// free memory unless it points to a null_value
|
||||
if (string_ref.data && string_ref.data != null_value_ref.data())
|
||||
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
|
||||
|
||||
const auto str_size = string.size();
|
||||
if (str_size != 0)
|
||||
{
|
||||
auto * str_ptr = string_arena->alloc(str_size);
|
||||
std::copy(string.data(), string.data() + str_size, str_ptr);
|
||||
string_ref = StringRef{str_ptr, str_size};
|
||||
}
|
||||
else
|
||||
string_ref = {};
|
||||
}
|
||||
else
|
||||
{
|
||||
std::get<ContainerPtrType<AttributeType>>(attribute.arrays)[idx] = value.get<NearestFieldType<AttributeType>>();
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
StringRef ComplexKeyCacheDictionary::allocKey(const size_t row, const Columns & key_columns, StringRefs & keys) const
|
||||
{
|
||||
if (key_size_is_fixed)
|
||||
@ -388,7 +880,7 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names &
|
||||
keys.push_back(cells[idx].key);
|
||||
}
|
||||
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyCacheDictionary, UInt64>;
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, keys, column_names);
|
||||
}
|
||||
|
||||
|
@ -23,7 +23,7 @@
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -89,93 +89,16 @@ public:
|
||||
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
|
||||
|
||||
/// In all functions below, key_columns must be full (non-constant) columns.
|
||||
/// See the requirement in IDataType.h for text-serialization functions.
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const;
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const;
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
@ -252,227 +175,18 @@ private:
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
|
||||
void getItemsNumberImpl(
|
||||
Attribute & attribute, const Columns & key_columns, PaddedPODArray<OutputType> & out, DefaultGetter && get_default) const
|
||||
{
|
||||
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
|
||||
MapType<std::vector<size_t>> outdated_keys;
|
||||
auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
|
||||
Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
PaddedPODArray<OutputType> & out,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
Arena temporary_keys_pool;
|
||||
PODArray<StringRef> keys_array(rows_num);
|
||||
|
||||
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
/// fetch up-to-date values, decide which ones require update
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
keys_array[row] = key;
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
/** cell should be updated if either:
|
||||
* 1. keys (or hash) do not match,
|
||||
* 2. cell has expired,
|
||||
* 3. explicit defaults were specified and cell was set default. */
|
||||
|
||||
if (!find_result.valid)
|
||||
{
|
||||
outdated_keys[key].push_back(row);
|
||||
if (find_result.outdated)
|
||||
++cache_expired;
|
||||
else
|
||||
++cache_not_found;
|
||||
}
|
||||
else
|
||||
{
|
||||
++cache_hit;
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
const auto & cell = cells[cell_idx];
|
||||
out[row] = cell.isDefault() ? get_default(row) : static_cast<OutputType>(attribute_array[cell_idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
query_count.fetch_add(rows_num, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
|
||||
|
||||
if (outdated_keys.empty())
|
||||
return;
|
||||
|
||||
std::vector<size_t> required_rows(outdated_keys.size());
|
||||
std::transform(
|
||||
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); });
|
||||
|
||||
/// request new values
|
||||
update(
|
||||
key_columns,
|
||||
keys_array,
|
||||
required_rows,
|
||||
[&](const StringRef key, const size_t cell_idx)
|
||||
{
|
||||
for (const auto row : outdated_keys[key])
|
||||
out[row] = static_cast<OutputType>(attribute_array[cell_idx]);
|
||||
},
|
||||
[&](const StringRef key, const size_t)
|
||||
{
|
||||
for (const auto row : outdated_keys[key])
|
||||
out[row] = get_default(row);
|
||||
});
|
||||
}
|
||||
|
||||
template <typename DefaultGetter>
|
||||
void getItemsString(Attribute & attribute, const Columns & key_columns, ColumnString * out, DefaultGetter && get_default) const
|
||||
{
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
/// save on some allocations
|
||||
out->getOffsets().reserve(rows_num);
|
||||
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
Arena temporary_keys_pool;
|
||||
|
||||
auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
|
||||
|
||||
auto found_outdated_values = false;
|
||||
|
||||
/// perform optimistic version, fallback to pessimistic if failed
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
/// fetch up-to-date values, discard on fail
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
SCOPE_EXIT(temporary_keys_pool.rollback(key.size));
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
if (!find_result.valid)
|
||||
{
|
||||
found_outdated_values = true;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
const auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// optimistic code completed successfully
|
||||
if (!found_outdated_values)
|
||||
{
|
||||
query_count.fetch_add(rows_num, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows_num, std::memory_order_release);
|
||||
return;
|
||||
}
|
||||
|
||||
/// now onto the pessimistic one, discard possible partial results from the optimistic path
|
||||
out->getChars().resize_assume_reserved(0);
|
||||
out->getOffsets().resize_assume_reserved(0);
|
||||
|
||||
/// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
|
||||
MapType<std::vector<size_t>> outdated_keys;
|
||||
/// we are going to store every string separately
|
||||
MapType<StringRef> map;
|
||||
PODArray<StringRef> keys_array(rows_num);
|
||||
|
||||
size_t total_length = 0;
|
||||
size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
keys_array[row] = key;
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
if (!find_result.valid)
|
||||
{
|
||||
outdated_keys[key].push_back(row);
|
||||
if (find_result.outdated)
|
||||
++cache_expired;
|
||||
else
|
||||
++cache_not_found;
|
||||
}
|
||||
else
|
||||
{
|
||||
++cache_hit;
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
const auto & cell = cells[cell_idx];
|
||||
const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
|
||||
|
||||
if (!cell.isDefault())
|
||||
map[key] = copyIntoArena(string_ref, temporary_keys_pool);
|
||||
|
||||
total_length += string_ref.size + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
|
||||
|
||||
query_count.fetch_add(rows_num, std::memory_order_relaxed);
|
||||
hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
|
||||
|
||||
/// request new values
|
||||
if (!outdated_keys.empty())
|
||||
{
|
||||
std::vector<size_t> required_rows(outdated_keys.size());
|
||||
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
|
||||
{
|
||||
return pair.getMapped().front();
|
||||
});
|
||||
|
||||
update(
|
||||
key_columns,
|
||||
keys_array,
|
||||
required_rows,
|
||||
[&](const StringRef key, const size_t cell_idx)
|
||||
{
|
||||
const StringRef attribute_value = attribute_array[cell_idx];
|
||||
|
||||
/// We must copy key and value to own memory, because it may be replaced with another
|
||||
/// in next iterations of inner loop of update.
|
||||
const StringRef copied_key = copyIntoArena(key, temporary_keys_pool);
|
||||
const StringRef copied_value = copyIntoArena(attribute_value, temporary_keys_pool);
|
||||
|
||||
map[copied_key] = copied_value;
|
||||
total_length += (attribute_value.size + 1) * outdated_keys[key].size();
|
||||
},
|
||||
[&](const StringRef key, const size_t)
|
||||
{
|
||||
for (const auto row : outdated_keys[key])
|
||||
total_length += get_default(row).size + 1;
|
||||
});
|
||||
}
|
||||
|
||||
out->getChars().reserve(total_length);
|
||||
|
||||
for (const auto row : ext::range(0, ext::size(keys_array)))
|
||||
{
|
||||
const StringRef key = keys_array[row];
|
||||
const auto it = map.find(key);
|
||||
const auto string_ref = it ? it->getMapped() : get_default(row);
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
}
|
||||
}
|
||||
void getItemsString(
|
||||
Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ColumnString * out,
|
||||
DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
|
||||
|
||||
template <typename PresentKeyHandler, typename AbsentKeyHandler>
|
||||
void update(
|
||||
@ -480,152 +194,7 @@ private:
|
||||
const PODArray<StringRef> & in_keys,
|
||||
const std::vector<size_t> & in_requested_rows,
|
||||
PresentKeyHandler && on_cell_updated,
|
||||
AbsentKeyHandler && on_key_not_found) const
|
||||
{
|
||||
MapType<bool> remaining_keys{in_requested_rows.size()};
|
||||
for (const auto row : in_requested_rows)
|
||||
remaining_keys.insert({in_keys[row], false});
|
||||
|
||||
std::uniform_int_distribution<UInt64> distribution(dict_lifetime.min_sec, dict_lifetime.max_sec);
|
||||
|
||||
const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
|
||||
{
|
||||
Stopwatch watch;
|
||||
auto stream = source_ptr->loadKeys(in_key_columns, in_requested_rows);
|
||||
stream->readPrefix();
|
||||
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys(keys_size);
|
||||
|
||||
const auto attributes_size = attributes.size();
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
/// cache column pointers
|
||||
const auto key_columns = ext::map<Columns>(
|
||||
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
|
||||
|
||||
const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
|
||||
{
|
||||
return block.safeGetByPosition(keys_size + attribute_idx).column;
|
||||
});
|
||||
|
||||
const auto rows_num = block.rows();
|
||||
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
auto key = allocKey(row, key_columns, keys);
|
||||
const auto hash = StringRefHash{}(key);
|
||||
const auto find_result = findCellIdx(key, now, hash);
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
auto & cell = cells[cell_idx];
|
||||
|
||||
for (const auto attribute_idx : ext::range(0, attributes.size()))
|
||||
{
|
||||
const auto & attribute_column = *attribute_columns[attribute_idx];
|
||||
auto & attribute = attributes[attribute_idx];
|
||||
|
||||
setAttributeValue(attribute, cell_idx, attribute_column[row]);
|
||||
}
|
||||
|
||||
/// if cell id is zero and zero does not map to this cell, then the cell is unused
|
||||
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
|
||||
element_count.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
/// handle memory allocated for old key
|
||||
if (key == cell.key)
|
||||
{
|
||||
freeKey(key);
|
||||
key = cell.key;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// new key is different from the old one
|
||||
if (cell.key.data)
|
||||
freeKey(cell.key);
|
||||
|
||||
cell.key = key;
|
||||
}
|
||||
|
||||
cell.hash = hash;
|
||||
|
||||
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
|
||||
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
|
||||
else
|
||||
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
|
||||
|
||||
/// inform caller
|
||||
on_cell_updated(key, cell_idx);
|
||||
/// mark corresponding id as found
|
||||
remaining_keys[key] = true;
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, in_requested_rows.size());
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed());
|
||||
}
|
||||
|
||||
size_t found_num = 0;
|
||||
size_t not_found_num = 0;
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
/// Check which ids have not been found and require setting null_value
|
||||
for (const auto & key_found_pair : remaining_keys)
|
||||
{
|
||||
if (key_found_pair.getMapped())
|
||||
{
|
||||
++found_num;
|
||||
continue;
|
||||
}
|
||||
|
||||
++not_found_num;
|
||||
|
||||
auto key = key_found_pair.getKey();
|
||||
const auto hash = StringRefHash{}(key);
|
||||
const auto find_result = findCellIdx(key, now, hash);
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
auto & cell = cells[cell_idx];
|
||||
|
||||
/// Set null_value for each attribute
|
||||
for (auto & attribute : attributes)
|
||||
setDefaultAttributeValue(attribute, cell_idx);
|
||||
|
||||
/// Check if cell had not been occupied before and increment element counter if it hadn't
|
||||
if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
|
||||
element_count.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
if (key == cell.key)
|
||||
key = cell.key;
|
||||
else
|
||||
{
|
||||
if (cell.key.data)
|
||||
freeKey(cell.key);
|
||||
|
||||
/// copy key from temporary pool
|
||||
key = copyKey(key);
|
||||
cell.key = key;
|
||||
}
|
||||
|
||||
cell.hash = hash;
|
||||
|
||||
if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
|
||||
cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
|
||||
else
|
||||
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
|
||||
|
||||
cell.setDefault();
|
||||
|
||||
/// inform caller that the cell has not been found
|
||||
on_key_not_found(key, cell_idx);
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num);
|
||||
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num);
|
||||
}
|
||||
AbsentKeyHandler && on_key_not_found) const;
|
||||
|
||||
UInt64 getCellIdx(const StringRef key) const;
|
||||
|
||||
|
@ -1,45 +0,0 @@
|
||||
#include "ComplexKeyCacheDictionary.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
ComplexKeyCacheDictionary::Attribute
|
||||
ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||
{
|
||||
Attribute attr{type, {}, {}};
|
||||
|
||||
switch (type)
|
||||
{
|
||||
#define DISPATCH(TYPE) \
|
||||
case AttributeUnderlyingType::ut##TYPE: \
|
||||
attr.null_values = TYPE(null_value.get<NearestFieldType<TYPE>>()); /* NOLINT */ \
|
||||
attr.arrays = std::make_unique<ContainerType<TYPE>>(size); /* NOLINT */ \
|
||||
bytes_allocated += size * sizeof(TYPE); \
|
||||
break;
|
||||
DISPATCH(UInt8)
|
||||
DISPATCH(UInt16)
|
||||
DISPATCH(UInt32)
|
||||
DISPATCH(UInt64)
|
||||
DISPATCH(UInt128)
|
||||
DISPATCH(Int8)
|
||||
DISPATCH(Int16)
|
||||
DISPATCH(Int32)
|
||||
DISPATCH(Int64)
|
||||
DISPATCH(Decimal32)
|
||||
DISPATCH(Decimal64)
|
||||
DISPATCH(Decimal128)
|
||||
DISPATCH(Float32)
|
||||
DISPATCH(Float64)
|
||||
#undef DISPATCH
|
||||
case AttributeUnderlyingType::utString:
|
||||
attr.null_values = null_value.get<String>();
|
||||
attr.arrays = std::make_unique<ContainerType<StringRef>>(size);
|
||||
bytes_allocated += size * sizeof(StringRef);
|
||||
if (!string_arena)
|
||||
string_arena = std::make_unique<ArenaWithFreeLists>();
|
||||
break;
|
||||
}
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
#include <Dictionaries/ComplexKeyCacheDictionary.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
#define DEFINE(TYPE) \
|
||||
void ComplexKeyCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
const auto null_value = std::get<TYPE>(attribute.null_values); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t) { return null_value; }); \
|
||||
}
|
||||
|
||||
DEFINE(UInt8)
|
||||
DEFINE(UInt16)
|
||||
DEFINE(UInt32)
|
||||
DEFINE(UInt64)
|
||||
DEFINE(UInt128)
|
||||
DEFINE(Int8)
|
||||
DEFINE(Int16)
|
||||
DEFINE(Int32)
|
||||
DEFINE(Int64)
|
||||
DEFINE(Float32)
|
||||
DEFINE(Float64)
|
||||
DEFINE(Decimal32)
|
||||
DEFINE(Decimal64)
|
||||
DEFINE(Decimal128)
|
||||
|
||||
#undef DEFINE
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
#include <Dictionaries/ComplexKeyCacheDictionary.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
#define DEFINE(TYPE) \
|
||||
void ComplexKeyCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
|
||||
DEFINE(UInt8)
|
||||
DEFINE(UInt16)
|
||||
DEFINE(UInt32)
|
||||
DEFINE(UInt64)
|
||||
DEFINE(UInt128)
|
||||
DEFINE(Int8)
|
||||
DEFINE(Int16)
|
||||
DEFINE(Int32)
|
||||
DEFINE(Int64)
|
||||
DEFINE(Float32)
|
||||
DEFINE(Float64)
|
||||
DEFINE(Decimal32)
|
||||
DEFINE(Decimal64)
|
||||
DEFINE(Decimal128)
|
||||
|
||||
#undef DEFINE
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
#include <Dictionaries/ComplexKeyCacheDictionary.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
#define DEFINE(TYPE) \
|
||||
void ComplexKeyCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t) { return def; }); \
|
||||
}
|
||||
|
||||
DEFINE(UInt8)
|
||||
DEFINE(UInt16)
|
||||
DEFINE(UInt32)
|
||||
DEFINE(UInt64)
|
||||
DEFINE(UInt128)
|
||||
DEFINE(Int8)
|
||||
DEFINE(Int16)
|
||||
DEFINE(Int32)
|
||||
DEFINE(Int64)
|
||||
DEFINE(Float32)
|
||||
DEFINE(Float64)
|
||||
DEFINE(Decimal32)
|
||||
DEFINE(Decimal64)
|
||||
DEFINE(Decimal128)
|
||||
|
||||
#undef DEFINE
|
||||
}
|
@ -1,78 +0,0 @@
|
||||
#include "ComplexKeyCacheDictionary.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const size_t idx, const Field & value) const
|
||||
{
|
||||
switch (attribute.type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = value.get<UInt64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = value.get<UInt64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = value.get<UInt64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = value.get<UInt64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = value.get<UInt128>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = value.get<Int64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = value.get<Int64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = value.get<Int64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = value.get<Int64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = value.get<Float64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = value.get<Float64>();
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = value.get<Decimal32>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = value.get<Decimal64>();
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = value.get<Decimal128>();
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
const auto & string = value.get<String>();
|
||||
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
|
||||
const auto & null_value_ref = std::get<String>(attribute.null_values);
|
||||
|
||||
/// free memory unless it points to a null_value
|
||||
if (string_ref.data && string_ref.data != null_value_ref.data())
|
||||
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
|
||||
|
||||
const auto str_size = string.size();
|
||||
if (str_size != 0)
|
||||
{
|
||||
auto * str_ptr = string_arena->alloc(str_size);
|
||||
std::copy(string.data(), string.data() + str_size, str_ptr);
|
||||
string_ref = StringRef{str_ptr, str_size};
|
||||
}
|
||||
else
|
||||
string_ref = {};
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,71 +0,0 @@
|
||||
#include "ComplexKeyCacheDictionary.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute, const size_t idx) const
|
||||
{
|
||||
switch (attribute.type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = std::get<UInt8>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = std::get<UInt16>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = std::get<UInt32>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = std::get<UInt64>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = std::get<UInt128>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = std::get<Int8>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = std::get<Int16>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = std::get<Int32>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = std::get<Int64>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = std::get<Float32>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = std::get<Float64>(attribute.null_values);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = std::get<Decimal32>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = std::get<Decimal64>(attribute.null_values);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = std::get<Decimal128>(attribute.null_values);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
const auto & null_value_ref = std::get<String>(attribute.null_values);
|
||||
auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
|
||||
|
||||
if (string_ref.data != null_value_ref.data())
|
||||
{
|
||||
if (string_ref.data)
|
||||
string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
|
||||
|
||||
string_ref = StringRef{null_value_ref};
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -3,6 +3,9 @@
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
#include <Core/Defines.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -31,194 +34,151 @@ ComplexKeyDirectDictionary::ComplexKeyDirectDictionary(
|
||||
createAttributes();
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void ComplexKeyDirectDictionary::get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
const auto null_value = std::get<TYPE>(attribute.null_values); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void ComplexKeyDirectDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
|
||||
ColumnPtr ComplexKeyDirectDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto & null_value = std::get<StringRef>(attribute.null_values);
|
||||
getItemsStringImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
|
||||
[&](const size_t) { return String(null_value.data, null_value.size); });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void ComplexKeyDirectDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void ComplexKeyDirectDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
ColumnPtr result;
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
getItemsStringImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
|
||||
[&](const size_t row) { const auto ref = def->getDataAt(row); return String(ref.data, ref.size); });
|
||||
}
|
||||
auto keys_size = key_columns.front()->size();
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void ComplexKeyDirectDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const TYPE def, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void ComplexKeyDirectDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const String & def, ColumnString * const out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
ComplexKeyDirectDictionary::getItemsStringImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
|
||||
[&](const size_t) { return def; });
|
||||
}
|
||||
|
||||
|
||||
void ComplexKeyDirectDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
ColumnUInt8::MutablePtr col_null_map_to;
|
||||
ColumnUInt8::Container * vec_null_map_to = nullptr;
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
has<UInt8>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
has<UInt16>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
has<UInt32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
has<UInt64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
has<UInt128>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
has<Int8>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
has<Int16>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
has<Int32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
has<Int64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
has<Float32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
has<Float64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
has<String>(attribute, key_columns, out);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
has<Decimal32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
has<Decimal64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
has<Decimal128>(attribute, key_columns, out);
|
||||
break;
|
||||
col_null_map_to = ColumnUInt8::create(keys_size, false);
|
||||
vec_null_map_to = &col_null_map_to->getData();
|
||||
}
|
||||
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
|
||||
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
|
||||
getItemsImpl<String, String>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const String value, bool is_null)
|
||||
{
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
const auto ref = StringRef{value};
|
||||
out->insertData(ref.data, ref.size);
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
getItemsImpl<AttributeType, AttributeType>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const auto value, bool is_null)
|
||||
{
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
out[row] = value;
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
result = ColumnNullable::create(result, std::move(col_null_map_to));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
ColumnUInt8::Ptr ComplexKeyDirectDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
auto size = key_columns.front()->size();
|
||||
auto result = ColumnUInt8::create(size);
|
||||
auto& out = result->getData();
|
||||
|
||||
const auto rows = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys_array(keys_size);
|
||||
MapType<UInt8> has_key;
|
||||
Arena temporary_keys_pool;
|
||||
std::vector<size_t> to_load(rows);
|
||||
PODArray<StringRef> keys(rows);
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
|
||||
keys[row] = key;
|
||||
has_key[key] = 0;
|
||||
to_load[row] = row;
|
||||
}
|
||||
|
||||
auto stream = source_ptr->loadKeys(key_columns, to_load);
|
||||
|
||||
stream->readPrefix();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
const auto columns = ext::map<Columns>(
|
||||
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
|
||||
|
||||
Arena pool;
|
||||
|
||||
StringRefs keys_temp(keys_size);
|
||||
|
||||
const auto columns_size = columns.front()->size();
|
||||
|
||||
for (const auto row_idx : ext::range(0, columns_size))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
|
||||
if (has_key.has(key))
|
||||
{
|
||||
has_key[key] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
out[row] = has_key[keys[row]];
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void ComplexKeyDirectDictionary::createAttributes()
|
||||
{
|
||||
@ -229,7 +189,7 @@ void ComplexKeyDirectDictionary::createAttributes()
|
||||
{
|
||||
attribute_index_by_name.emplace(attribute.name, attributes.size());
|
||||
attribute_name_by_index.emplace(attributes.size(), attribute.name);
|
||||
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value, attribute.name));
|
||||
attributes.push_back(createAttribute(attribute, attribute.null_value, attribute.name));
|
||||
|
||||
if (attribute.hierarchical)
|
||||
throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
|
||||
@ -237,7 +197,6 @@ void ComplexKeyDirectDictionary::createAttributes()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void ComplexKeyDirectDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
@ -254,59 +213,19 @@ void ComplexKeyDirectDictionary::createAttributeImpl<String>(Attribute & attribu
|
||||
}
|
||||
|
||||
|
||||
ComplexKeyDirectDictionary::Attribute ComplexKeyDirectDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & attr_name)
|
||||
ComplexKeyDirectDictionary::Attribute ComplexKeyDirectDictionary::createAttribute(
|
||||
const DictionaryAttribute & attribute, const Field & null_value, const std::string & attr_name)
|
||||
{
|
||||
Attribute attr{type, {}, {}, attr_name};
|
||||
Attribute attr{attribute.underlying_type, attribute.is_nullable, {}, {}, attr_name};
|
||||
|
||||
switch (type)
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
createAttributeImpl<UInt8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
createAttributeImpl<UInt16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
createAttributeImpl<UInt32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
createAttributeImpl<UInt64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
createAttributeImpl<UInt128>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
createAttributeImpl<Int8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
createAttributeImpl<Int16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
createAttributeImpl<Int32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
createAttributeImpl<Int64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
createAttributeImpl<Float32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
createAttributeImpl<Float64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
createAttributeImpl<String>(attr, null_value);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
createAttributeImpl<AttributeType>(attr, null_value);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
createAttributeImpl<Decimal32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
createAttributeImpl<Decimal64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
createAttributeImpl<Decimal128>(attr, null_value);
|
||||
break;
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
@ -356,14 +275,18 @@ StringRef ComplexKeyDirectDictionary::placeKeysInPool(
|
||||
}
|
||||
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void ComplexKeyDirectDictionary::getItemsImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto rows = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys_array(keys_size);
|
||||
MapType<OutputType> value_by_key;
|
||||
HashMapWithSavedHash<StringRef, bool, StringRefHash> value_is_null;
|
||||
Arena temporary_keys_pool;
|
||||
std::vector<size_t> to_load(rows);
|
||||
PODArray<StringRef> keys(rows);
|
||||
@ -372,8 +295,9 @@ void ComplexKeyDirectDictionary::getItemsImpl(
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
|
||||
keys[row] = key;
|
||||
value_by_key[key] = get_default(row);
|
||||
value_by_key[key] = static_cast<AttributeType>(default_value_extractor[row]);
|
||||
to_load[row] = row;
|
||||
value_is_null[key] = false;
|
||||
}
|
||||
|
||||
auto stream = source_ptr->loadKeys(key_columns, to_load);
|
||||
@ -392,6 +316,11 @@ void ComplexKeyDirectDictionary::getItemsImpl(
|
||||
});
|
||||
for (const size_t attribute_idx : ext::range(0, attributes.size()))
|
||||
{
|
||||
if (attribute.name != attribute_name_by_index.at(attribute_idx))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
const IColumn & attribute_column = *attribute_columns[attribute_idx];
|
||||
Arena pool;
|
||||
|
||||
@ -402,17 +331,15 @@ void ComplexKeyDirectDictionary::getItemsImpl(
|
||||
for (const auto row_idx : ext::range(0, columns_size))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
|
||||
if (value_by_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
|
||||
|
||||
if (value_by_key.has(key))
|
||||
{
|
||||
if (attribute.type == AttributeUnderlyingType::utFloat32)
|
||||
{
|
||||
value_by_key[key] = static_cast<Float32>(attribute_column[row_idx].template get<Float64>());
|
||||
}
|
||||
auto value = attribute_column[row_idx];
|
||||
|
||||
if (value.isNull())
|
||||
value_is_null[key] = true;
|
||||
else
|
||||
{
|
||||
value_by_key[key] = static_cast<OutputType>(attribute_column[row_idx].template get<AttributeType>());
|
||||
}
|
||||
|
||||
value_by_key[key] = static_cast<OutputType>(value.template get<NearestFieldType<AttributeType>>());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -422,78 +349,13 @@ void ComplexKeyDirectDictionary::getItemsImpl(
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
set_value(row, value_by_key[keys[row]]);
|
||||
auto key = keys[row];
|
||||
set_value(row, value_by_key[key], value_is_null[key]);
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void ComplexKeyDirectDictionary::getItemsStringImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
{
|
||||
const auto rows = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys_array(keys_size);
|
||||
MapType<String> value_by_key;
|
||||
Arena temporary_keys_pool;
|
||||
std::vector<size_t> to_load(rows);
|
||||
PODArray<StringRef> keys(rows);
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
|
||||
keys[row] = key;
|
||||
value_by_key[key] = get_default(row);
|
||||
to_load[row] = row;
|
||||
}
|
||||
|
||||
auto stream = source_ptr->loadKeys(key_columns, to_load);
|
||||
const auto attributes_size = attributes.size();
|
||||
|
||||
stream->readPrefix();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
const auto columns = ext::map<Columns>(
|
||||
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
|
||||
|
||||
const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
|
||||
{
|
||||
return block.safeGetByPosition(keys_size + attribute_idx).column;
|
||||
});
|
||||
for (const size_t attribute_idx : ext::range(0, attributes.size()))
|
||||
{
|
||||
const IColumn & attribute_column = *attribute_columns[attribute_idx];
|
||||
Arena pool;
|
||||
|
||||
StringRefs keys_temp(keys_size);
|
||||
|
||||
const auto columns_size = columns.front()->size();
|
||||
|
||||
for (const auto row_idx : ext::range(0, columns_size))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
|
||||
if (value_by_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
|
||||
{
|
||||
const String from_source = attribute_column[row_idx].template get<String>();
|
||||
value_by_key[key] = from_source;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
set_value(row, value_by_key[keys[row]]);
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
|
||||
const ComplexKeyDirectDictionary::Attribute & ComplexKeyDirectDictionary::getAttribute(const std::string & attribute_name) const
|
||||
{
|
||||
const auto it = attribute_index_by_name.find(attribute_name);
|
||||
@ -503,65 +365,6 @@ const ComplexKeyDirectDictionary::Attribute & ComplexKeyDirectDictionary::getAtt
|
||||
return attributes[it->second];
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void ComplexKeyDirectDictionary::has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
const auto rows = key_columns.front()->size();
|
||||
const auto keys_size = dict_struct.key->size();
|
||||
StringRefs keys_array(keys_size);
|
||||
MapType<UInt8> has_key;
|
||||
Arena temporary_keys_pool;
|
||||
std::vector<size_t> to_load(rows);
|
||||
PODArray<StringRef> keys(rows);
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
|
||||
keys[row] = key;
|
||||
has_key[key] = 0;
|
||||
to_load[row] = row;
|
||||
}
|
||||
|
||||
auto stream = source_ptr->loadKeys(key_columns, to_load);
|
||||
|
||||
stream->readPrefix();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
const auto columns = ext::map<Columns>(
|
||||
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
|
||||
|
||||
for (const size_t attribute_idx : ext::range(0, attributes.size()))
|
||||
{
|
||||
Arena pool;
|
||||
|
||||
StringRefs keys_temp(keys_size);
|
||||
|
||||
const auto columns_size = columns.front()->size();
|
||||
|
||||
for (const auto row_idx : ext::range(0, columns_size))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
|
||||
if (has_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
|
||||
{
|
||||
has_key[key] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
out[row] = has_key[keys[row]];
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
|
||||
BlockInputStreamPtr ComplexKeyDirectDictionary::getBlockInputStream(const Names & /* column_names */, size_t /* max_block_size */) const
|
||||
{
|
||||
return source_ptr->loadAll();
|
||||
|
@ -12,14 +12,13 @@
|
||||
#include <ext/range.h>
|
||||
#include <ext/size.h>
|
||||
#include <ext/map.h>
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
|
||||
#include "DictionaryStructure.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using BlockPtr = std::shared_ptr<Block>;
|
||||
|
||||
class ComplexKeyDirectDictionary final : public IDictionaryBase
|
||||
{
|
||||
@ -60,78 +59,16 @@ public:
|
||||
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const TYPE def, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const String & def, ColumnString * const out) const;
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
@ -142,6 +79,8 @@ private:
|
||||
struct Attribute final
|
||||
{
|
||||
AttributeUnderlyingType type;
|
||||
bool is_nullable;
|
||||
|
||||
std::variant<
|
||||
UInt8,
|
||||
UInt16,
|
||||
@ -168,27 +107,21 @@ private:
|
||||
template <typename T>
|
||||
void addAttributeSize(const Attribute & attribute);
|
||||
|
||||
void calculateBytesAllocated();
|
||||
|
||||
template <typename T>
|
||||
void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & name);
|
||||
static Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value, const std::string & name);
|
||||
|
||||
template <typename Pool>
|
||||
StringRef placeKeysInPool(
|
||||
const size_t row, const Columns & key_columns, StringRefs & keys, const std::vector<DictionaryAttribute> & key_attributes, Pool & pool) const;
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void getItemsStringImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
|
||||
template <typename T>
|
||||
void resize(Attribute & attribute, const Key id);
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename T>
|
||||
void setAttributeValueImpl(Attribute & attribute, const Key id, const T & value);
|
||||
@ -197,9 +130,6 @@ private:
|
||||
|
||||
const Attribute & getAttribute(const std::string & attribute_name) const;
|
||||
|
||||
template <typename T>
|
||||
void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
const DictionaryStructure dict_struct;
|
||||
const DictionarySourcePtr source_ptr;
|
||||
const DictionaryLifetime dict_lifetime;
|
||||
|
@ -1,6 +1,10 @@
|
||||
#include "ComplexKeyHashedDictionary.h"
|
||||
#include <ext/map.h>
|
||||
#include <ext/range.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
|
||||
@ -32,216 +36,111 @@ ComplexKeyHashedDictionary::ComplexKeyHashedDictionary(
|
||||
calculateBytesAllocated();
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void ComplexKeyHashedDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
\
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
const auto null_value = std::get<TYPE>(attribute.null_values); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, \
|
||||
key_columns, \
|
||||
[&](const size_t row, const auto value) { out[row] = value; }, \
|
||||
[&](const size_t) { return null_value; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void ComplexKeyHashedDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return null_value; });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void ComplexKeyHashedDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
\
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, \
|
||||
key_columns, \
|
||||
[&](const size_t row, const auto value) { out[row] = value; }, \
|
||||
[&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void ComplexKeyHashedDictionary::getString(
|
||||
ColumnPtr ComplexKeyHashedDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
ColumnPtr result;
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t row) { return def->getDataAt(row); });
|
||||
}
|
||||
auto keys_size = key_columns.front()->size();
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void ComplexKeyHashedDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
dict_struct.validateKeyTypes(key_types); \
|
||||
\
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
|
||||
ColumnUInt8::MutablePtr col_null_map_to;
|
||||
ColumnUInt8::Container * vec_null_map_to = nullptr;
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
col_null_map_to = ColumnUInt8::create(keys_size, false);
|
||||
vec_null_map_to = &col_null_map_to->getData();
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void ComplexKeyHashedDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
|
||||
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return StringRef{def}; });
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const StringRef value, bool is_null)
|
||||
{
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
out->insertData(value.data, value.size);
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
getItemsImpl<AttributeType, AttributeType>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const auto value, bool is_null)
|
||||
{
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
out[row] = value;
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
result = ColumnNullable::create(result, std::move(col_null_map_to));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void ComplexKeyHashedDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
|
||||
ColumnUInt8::Ptr ComplexKeyHashedDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
auto size = key_columns.front()->size();
|
||||
auto result = ColumnUInt8::create(size);
|
||||
auto& out = result->getData();
|
||||
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
has<UInt8>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
has<UInt16>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
has<UInt32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
has<UInt64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
has<UInt128>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
has<Int8>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
has<Int16>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
has<Int32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
has<Int64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
has<Float32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
has<Float64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
has<StringRef>(attribute, key_columns, out);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
has<Decimal32>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
has<Decimal64>(attribute, key_columns, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
has<Decimal128>(attribute, key_columns, out);
|
||||
break;
|
||||
}
|
||||
has<ValueType>(attribute, key_columns, out);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void ComplexKeyHashedDictionary::createAttributes()
|
||||
@ -252,7 +151,7 @@ void ComplexKeyHashedDictionary::createAttributes()
|
||||
for (const auto & attribute : dict_struct.attributes)
|
||||
{
|
||||
attribute_index_by_name.emplace(attribute.name, attributes.size());
|
||||
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
|
||||
attributes.push_back(createAttribute(attribute, attribute.null_value));
|
||||
|
||||
if (attribute.hierarchical)
|
||||
throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
|
||||
@ -407,66 +306,30 @@ void ComplexKeyHashedDictionary::addAttributeSize(const Attribute & attribute)
|
||||
bucket_count = map_ref.getBufferSizeInCells();
|
||||
}
|
||||
|
||||
template <>
|
||||
void ComplexKeyHashedDictionary::addAttributeSize<String>(const Attribute & attribute)
|
||||
{
|
||||
const auto & map_ref = std::get<ContainerType<StringRef>>(attribute.maps);
|
||||
bytes_allocated += sizeof(ContainerType<StringRef>) + map_ref.getBufferSizeInBytes();
|
||||
bucket_count = map_ref.getBufferSizeInCells();
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
}
|
||||
|
||||
void ComplexKeyHashedDictionary::calculateBytesAllocated()
|
||||
{
|
||||
bytes_allocated += attributes.size() * sizeof(attributes.front());
|
||||
|
||||
for (const auto & attribute : attributes)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
addAttributeSize<UInt8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
addAttributeSize<UInt16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
addAttributeSize<UInt32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
addAttributeSize<UInt64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
addAttributeSize<UInt128>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
addAttributeSize<Int8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
addAttributeSize<Int16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
addAttributeSize<Int32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
addAttributeSize<Int64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
addAttributeSize<Float32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
addAttributeSize<Float64>(attribute);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
addAttributeSize<Decimal32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
addAttributeSize<Decimal64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
addAttributeSize<Decimal128>(attribute);
|
||||
break;
|
||||
addAttributeSize<AttributeType>(attribute);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
bytes_allocated += keys_pool.size();
|
||||
@ -479,73 +342,41 @@ void ComplexKeyHashedDictionary::createAttributeImpl(Attribute & attribute, cons
|
||||
attribute.maps.emplace<ContainerType<T>>();
|
||||
}
|
||||
|
||||
ComplexKeyHashedDictionary::Attribute
|
||||
ComplexKeyHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||
template <>
|
||||
void ComplexKeyHashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
Attribute attr{type, {}, {}, {}};
|
||||
attribute.string_arena = std::make_unique<Arena>();
|
||||
const String & string = null_value.get<String>();
|
||||
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
|
||||
attribute.maps.emplace<ContainerType<StringRef>>();
|
||||
}
|
||||
|
||||
switch (type)
|
||||
ComplexKeyHashedDictionary::Attribute
|
||||
ComplexKeyHashedDictionary::createAttribute(const DictionaryAttribute & attribute, const Field & null_value)
|
||||
{
|
||||
auto nullable_set = attribute.is_nullable ? std::make_unique<NullableSet>() : nullptr;
|
||||
Attribute attr{attribute.underlying_type, attribute.is_nullable, std::move(nullable_set), {}, {}, {}};
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
createAttributeImpl<UInt8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
createAttributeImpl<UInt16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
createAttributeImpl<UInt32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
createAttributeImpl<UInt64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
createAttributeImpl<UInt128>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
createAttributeImpl<Int8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
createAttributeImpl<Int16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
createAttributeImpl<Int32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
createAttributeImpl<Int64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
createAttributeImpl<Float32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
createAttributeImpl<Float64>(attr, null_value);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
createAttributeImpl<AttributeType>(attr, null_value);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
createAttributeImpl<Decimal32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
createAttributeImpl<Decimal64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
createAttributeImpl<Decimal128>(attr, null_value);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
attr.null_values = null_value.get<String>();
|
||||
attr.maps.emplace<ContainerType<StringRef>>();
|
||||
attr.string_arena = std::make_unique<Arena>();
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void ComplexKeyHashedDictionary::getItemsImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto & attr = std::get<ContainerType<AttributeType>>(attribute.maps);
|
||||
|
||||
@ -560,7 +391,18 @@ void ComplexKeyHashedDictionary::getItemsImpl(
|
||||
const auto key = placeKeysInPool(i, key_columns, keys, temporary_keys_pool);
|
||||
|
||||
const auto it = attr.find(key);
|
||||
set_value(i, it ? static_cast<OutputType>(it->getMapped()) : get_default(i));
|
||||
|
||||
if (it)
|
||||
{
|
||||
set_value(i, static_cast<OutputType>(it->getMapped()), false);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (attribute.is_nullable && attribute.nullable_set->find(key) != nullptr)
|
||||
set_value(i, default_value_extractor[i], true);
|
||||
else
|
||||
set_value(i, default_value_extractor[i], false);
|
||||
}
|
||||
|
||||
/// free memory allocated for the key
|
||||
temporary_keys_pool.rollback(key.size);
|
||||
@ -578,51 +420,42 @@ bool ComplexKeyHashedDictionary::setAttributeValueImpl(Attribute & attribute, co
|
||||
return pair.second;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool ComplexKeyHashedDictionary::setAttributeValueImpl<String>(Attribute & attribute, const StringRef key, const String value)
|
||||
{
|
||||
const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
|
||||
return setAttributeValueImpl<StringRef>(attribute, key, StringRef{string_in_arena, value.size()});
|
||||
}
|
||||
|
||||
bool ComplexKeyHashedDictionary::setAttributeValue(Attribute & attribute, const StringRef key, const Field & value)
|
||||
{
|
||||
switch (attribute.type)
|
||||
bool result = false;
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
return setAttributeValueImpl<UInt8>(attribute, key, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
return setAttributeValueImpl<UInt16>(attribute, key, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
return setAttributeValueImpl<UInt32>(attribute, key, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
return setAttributeValueImpl<UInt64>(attribute, key, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
return setAttributeValueImpl<UInt128>(attribute, key, value.get<UInt128>());
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
return setAttributeValueImpl<Int8>(attribute, key, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
return setAttributeValueImpl<Int16>(attribute, key, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
return setAttributeValueImpl<Int32>(attribute, key, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
return setAttributeValueImpl<Int64>(attribute, key, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
return setAttributeValueImpl<Float32>(attribute, key, value.get<Float64>());
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
return setAttributeValueImpl<Float64>(attribute, key, value.get<Float64>());
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
return setAttributeValueImpl<Decimal32>(attribute, key, value.get<Decimal32>());
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
return setAttributeValueImpl<Decimal64>(attribute, key, value.get<Decimal64>());
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
return setAttributeValueImpl<Decimal128>(attribute, key, value.get<Decimal128>());
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
auto & map = std::get<ContainerType<StringRef>>(attribute.maps);
|
||||
const auto & string = value.get<String>();
|
||||
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
const auto pair = map.insert({key, StringRef{string_in_arena, string.size()}});
|
||||
return pair.second;
|
||||
if (value.isNull())
|
||||
{
|
||||
attribute.nullable_set->insert(key);
|
||||
result = true;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
attribute.nullable_set->erase(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
result = setAttributeValueImpl<AttributeType>(attribute, key, value.get<NearestFieldType<AttributeType>>());
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
const ComplexKeyHashedDictionary::Attribute & ComplexKeyHashedDictionary::getAttribute(const std::string & attribute_name) const
|
||||
@ -673,6 +506,9 @@ void ComplexKeyHashedDictionary::has(const Attribute & attribute, const Columns
|
||||
const auto it = attr.find(key);
|
||||
out[i] = static_cast<bool>(it);
|
||||
|
||||
if (attribute.is_nullable && !out[i])
|
||||
out[i] = attribute.nullable_set->find(key) != nullptr;
|
||||
|
||||
/// free memory allocated for the key
|
||||
temporary_keys_pool.rollback(key.size);
|
||||
}
|
||||
@ -684,41 +520,26 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys() const
|
||||
{
|
||||
const Attribute & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
return getKeys<UInt8>(attribute);
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
return getKeys<UInt16>(attribute);
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
return getKeys<UInt32>(attribute);
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
return getKeys<UInt64>(attribute);
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
return getKeys<UInt128>(attribute);
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
return getKeys<Int8>(attribute);
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
return getKeys<Int16>(attribute);
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
return getKeys<Int32>(attribute);
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
return getKeys<Int64>(attribute);
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
return getKeys<Float32>(attribute);
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
return getKeys<Float64>(attribute);
|
||||
case AttributeUnderlyingType::utString:
|
||||
return getKeys<StringRef>(attribute);
|
||||
std::vector<StringRef> result;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
return getKeys<Decimal32>(attribute);
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
return getKeys<Decimal64>(attribute);
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
return getKeys<Decimal128>(attribute);
|
||||
}
|
||||
return {};
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
result = getKeys<StringRef>(attribute);
|
||||
}
|
||||
else
|
||||
{
|
||||
result = getKeys<AttributeType>(attribute);
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -730,12 +551,18 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys(const Attribute & att
|
||||
for (const auto & key : attr)
|
||||
keys.push_back(key.getKey());
|
||||
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
for (const auto & key: *attribute.nullable_set)
|
||||
keys.push_back(key.getKey());
|
||||
}
|
||||
|
||||
return keys;
|
||||
}
|
||||
|
||||
BlockInputStreamPtr ComplexKeyHashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyHashedDictionary, UInt64>;
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getKeys(), column_names);
|
||||
}
|
||||
|
||||
|
@ -7,17 +7,17 @@
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Core/Block.h>
|
||||
#include <common/StringRef.h>
|
||||
#include <ext/range.h>
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
|
||||
#include "DictionaryStructure.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using BlockPtr = std::shared_ptr<Block>;
|
||||
|
||||
class ComplexKeyHashedDictionary final : public IDictionaryBase
|
||||
{
|
||||
@ -60,91 +60,16 @@ public:
|
||||
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const;
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const;
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
@ -152,9 +77,14 @@ private:
|
||||
template <typename Value>
|
||||
using ContainerType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
|
||||
|
||||
using NullableSet = HashSetWithSavedHash<StringRef, StringRefHash>;
|
||||
|
||||
struct Attribute final
|
||||
{
|
||||
AttributeUnderlyingType type;
|
||||
bool is_nullable;
|
||||
std::unique_ptr<NullableSet> nullable_set;
|
||||
|
||||
std::variant<
|
||||
UInt8,
|
||||
UInt16,
|
||||
@ -170,7 +100,7 @@ private:
|
||||
Decimal128,
|
||||
Float32,
|
||||
Float64,
|
||||
String>
|
||||
StringRef>
|
||||
null_values;
|
||||
std::variant<
|
||||
ContainerType<UInt8>,
|
||||
@ -206,18 +136,21 @@ private:
|
||||
void calculateBytesAllocated();
|
||||
|
||||
template <typename T>
|
||||
void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
static Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value);
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void
|
||||
getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsImpl(
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename T>
|
||||
bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
|
||||
static bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
|
||||
|
||||
bool setAttributeValue(Attribute & attribute, const StringRef key, const Field & value);
|
||||
static bool setAttributeValue(Attribute & attribute, const StringRef key, const Field & value);
|
||||
|
||||
const Attribute & getAttribute(const std::string & attribute_name) const;
|
||||
|
||||
|
@ -25,12 +25,10 @@ namespace ErrorCodes
|
||||
/* BlockInputStream implementation for external dictionaries
|
||||
* read() returns blocks consisting of the in-memory contents of the dictionaries
|
||||
*/
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Key>
|
||||
class DictionaryBlockInputStream : public DictionaryBlockInputStreamBase
|
||||
{
|
||||
public:
|
||||
using DictionaryPtr = std::shared_ptr<DictionaryType const>;
|
||||
|
||||
DictionaryBlockInputStream(
|
||||
std::shared_ptr<const IDictionaryBase> dictionary, UInt64 max_block_size, PaddedPODArray<Key> && ids, const Names & column_names);
|
||||
|
||||
@ -60,111 +58,9 @@ protected:
|
||||
Block getBlock(size_t start, size_t size) const override;
|
||||
|
||||
private:
|
||||
// pointer types to getXXX functions
|
||||
// for single key dictionaries
|
||||
template <typename Type>
|
||||
using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, PaddedPODArray<Type> &) const;
|
||||
|
||||
template <typename Type>
|
||||
using DictionaryDecimalGetter
|
||||
= void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, DecimalPaddedPODArray<Type> &) const;
|
||||
|
||||
using DictionaryStringGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, ColumnString *) const;
|
||||
|
||||
// for complex complex key dictionaries
|
||||
template <typename Type>
|
||||
using GetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, PaddedPODArray<Type> & out) const;
|
||||
|
||||
template <typename Type>
|
||||
using DecimalGetterByKey
|
||||
= void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, DecimalPaddedPODArray<Type> & out) const;
|
||||
|
||||
using StringGetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, ColumnString * out) const;
|
||||
|
||||
// call getXXX
|
||||
// for single key dictionaries
|
||||
template <typename Type, typename Container>
|
||||
void callGetter(
|
||||
DictionaryGetter<Type> getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
|
||||
template <typename Type, typename Container>
|
||||
void callGetter(
|
||||
DictionaryDecimalGetter<Type> getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
|
||||
template <typename Container>
|
||||
void callGetter(
|
||||
DictionaryStringGetter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
|
||||
// for complex complex key dictionaries
|
||||
template <typename Type, typename Container>
|
||||
void callGetter(
|
||||
GetterByKey<Type> getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
|
||||
template <typename Type, typename Container>
|
||||
void callGetter(
|
||||
DecimalGetterByKey<Type> getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
|
||||
template <typename Container>
|
||||
void callGetter(
|
||||
StringGetterByKey getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
|
||||
template <template <typename> class Getter, template <typename> class DecimalGetter, typename StringGetter>
|
||||
Block
|
||||
fillBlock(const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const;
|
||||
|
||||
|
||||
template <typename AttributeType, typename Getter>
|
||||
ColumnPtr getColumnFromAttribute(
|
||||
Getter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
template <typename Getter>
|
||||
ColumnPtr getColumnFromStringAttribute(
|
||||
Getter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dictionary) const;
|
||||
ColumnPtr getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const;
|
||||
|
||||
void fillKeyColumns(
|
||||
@ -174,65 +70,54 @@ private:
|
||||
const DictionaryStructure & dictionary_structure,
|
||||
ColumnsWithTypeAndName & columns) const;
|
||||
|
||||
DictionaryPtr dictionary;
|
||||
std::shared_ptr<const IDictionaryBase> dictionary;
|
||||
Names column_names;
|
||||
PaddedPODArray<Key> ids;
|
||||
ColumnsWithTypeAndName key_columns;
|
||||
Poco::Logger * logger;
|
||||
|
||||
using FillBlockFunction = Block (DictionaryBlockInputStream<DictionaryType, Key>::*)(
|
||||
const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const;
|
||||
|
||||
FillBlockFunction fill_block_function;
|
||||
|
||||
Columns data_columns;
|
||||
GetColumnsFunction get_key_columns_function;
|
||||
GetColumnsFunction get_view_columns_function;
|
||||
|
||||
enum class DictionaryKeyType
|
||||
enum class DictionaryInputStreamKeyType
|
||||
{
|
||||
Id,
|
||||
ComplexKey,
|
||||
Callback
|
||||
};
|
||||
|
||||
DictionaryKeyType key_type;
|
||||
DictionaryInputStreamKeyType key_type;
|
||||
};
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
|
||||
template <typename Key>
|
||||
DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
|
||||
std::shared_ptr<const IDictionaryBase> dictionary_, UInt64 max_block_size_, PaddedPODArray<Key> && ids_, const Names & column_names_)
|
||||
: DictionaryBlockInputStreamBase(ids_.size(), max_block_size_)
|
||||
, dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
|
||||
, dictionary(dictionary_)
|
||||
, column_names(column_names_)
|
||||
, ids(std::move(ids_))
|
||||
, logger(&Poco::Logger::get("DictionaryBlockInputStream"))
|
||||
, fill_block_function(
|
||||
&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<DictionaryGetter, DictionaryDecimalGetter, DictionaryStringGetter>)
|
||||
, key_type(DictionaryKeyType::Id)
|
||||
, key_type(DictionaryInputStreamKeyType::Id)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
|
||||
template <typename Key>
|
||||
DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
|
||||
std::shared_ptr<const IDictionaryBase> dictionary_,
|
||||
UInt64 max_block_size_,
|
||||
const std::vector<StringRef> & keys,
|
||||
const Names & column_names_)
|
||||
: DictionaryBlockInputStreamBase(keys.size(), max_block_size_)
|
||||
, dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
|
||||
, dictionary(dictionary_)
|
||||
, column_names(column_names_)
|
||||
, logger(&Poco::Logger::get("DictionaryBlockInputStream"))
|
||||
, fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>)
|
||||
, key_type(DictionaryKeyType::ComplexKey)
|
||||
, key_type(DictionaryInputStreamKeyType::ComplexKey)
|
||||
{
|
||||
const DictionaryStructure & dictionaty_structure = dictionary->getStructure();
|
||||
fillKeyColumns(keys, 0, keys.size(), dictionaty_structure, key_columns);
|
||||
const DictionaryStructure & dictionary_structure = dictionary->getStructure();
|
||||
fillKeyColumns(keys, 0, keys.size(), dictionary_structure, key_columns);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
|
||||
template <typename Key>
|
||||
DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
|
||||
std::shared_ptr<const IDictionaryBase> dictionary_,
|
||||
UInt64 max_block_size_,
|
||||
const Columns & data_columns_,
|
||||
@ -240,24 +125,23 @@ DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
|
||||
GetColumnsFunction && get_key_columns_function_,
|
||||
GetColumnsFunction && get_view_columns_function_)
|
||||
: DictionaryBlockInputStreamBase(data_columns_.front()->size(), max_block_size_)
|
||||
, dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
|
||||
, dictionary(dictionary_)
|
||||
, column_names(column_names_)
|
||||
, logger(&Poco::Logger::get("DictionaryBlockInputStream"))
|
||||
, fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>)
|
||||
, data_columns(data_columns_)
|
||||
, get_key_columns_function(get_key_columns_function_)
|
||||
, get_view_columns_function(get_view_columns_function_)
|
||||
, key_type(DictionaryKeyType::Callback)
|
||||
, get_key_columns_function(std::move(get_key_columns_function_))
|
||||
, get_view_columns_function(std::move(get_view_columns_function_))
|
||||
, key_type(DictionaryInputStreamKeyType::Callback)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, size_t length) const
|
||||
template <typename Key>
|
||||
Block DictionaryBlockInputStream<Key>::getBlock(size_t start, size_t length) const
|
||||
{
|
||||
/// TODO: Rewrite
|
||||
switch (key_type)
|
||||
{
|
||||
case DictionaryKeyType::ComplexKey:
|
||||
case DictionaryInputStreamKeyType::ComplexKey:
|
||||
{
|
||||
Columns columns;
|
||||
ColumnsWithTypeAndName view_columns;
|
||||
@ -268,16 +152,16 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, si
|
||||
columns.emplace_back(column);
|
||||
view_columns.emplace_back(column, key_column.type, key_column.name);
|
||||
}
|
||||
return (this->*fill_block_function)({}, columns, {}, std::move(view_columns));
|
||||
return fillBlock({}, columns, {}, std::move(view_columns));
|
||||
}
|
||||
|
||||
case DictionaryKeyType::Id:
|
||||
case DictionaryInputStreamKeyType::Id:
|
||||
{
|
||||
PaddedPODArray<Key> ids_to_fill(ids.begin() + start, ids.begin() + start + length);
|
||||
return (this->*fill_block_function)(ids_to_fill, {}, {}, {});
|
||||
return fillBlock(ids_to_fill, {}, {}, {});
|
||||
}
|
||||
|
||||
case DictionaryKeyType::Callback:
|
||||
case DictionaryInputStreamKeyType::Callback:
|
||||
{
|
||||
Columns columns;
|
||||
columns.reserve(data_columns.size());
|
||||
@ -294,102 +178,15 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, si
|
||||
columns.push_back(key_column.column);
|
||||
types.push_back(key_column.type);
|
||||
}
|
||||
return (this->*fill_block_function)({}, columns, types, std::move(view_with_type_and_name));
|
||||
return fillBlock({}, columns, types, std::move(view_with_type_and_name));
|
||||
}
|
||||
}
|
||||
|
||||
throw Exception("Unexpected DictionaryKeyType.", ErrorCodes::LOGICAL_ERROR);
|
||||
throw Exception("Unexpected DictionaryInputStreamKeyType.", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Type, typename Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
DictionaryGetter<Type> getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & /*keys*/,
|
||||
const DataTypes & /*data_types*/,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
(dict.*getter)(attribute.name, ids_to_fill, container);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Type, typename Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
DictionaryDecimalGetter<Type> getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & /*keys*/,
|
||||
const DataTypes & /*data_types*/,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
(dict.*getter)(attribute.name, ids_to_fill, container);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
DictionaryStringGetter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & /*keys*/,
|
||||
const DataTypes & /*data_types*/,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
(dict.*getter)(attribute.name, ids_to_fill, container);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Type, typename Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
GetterByKey<Type> getter,
|
||||
const PaddedPODArray<Key> & /*ids_to_fill*/,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
(dict.*getter)(attribute.name, keys, data_types, container);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Type, typename Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
DecimalGetterByKey<Type> getter,
|
||||
const PaddedPODArray<Key> & /*ids_to_fill*/,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
(dict.*getter)(attribute.name, keys, data_types, container);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
StringGetterByKey getter,
|
||||
const PaddedPODArray<Key> & /*ids_to_fill*/,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
Container & container,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
(dict.*getter)(attribute.name, keys, data_types, container);
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <template <typename> class Getter, template <typename> class DecimalGetter, typename StringGetter>
|
||||
Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||
template <typename Key>
|
||||
Block DictionaryBlockInputStream<Key>::fillBlock(
|
||||
const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const
|
||||
{
|
||||
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
|
||||
@ -408,9 +205,14 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||
block_columns.push_back(column);
|
||||
|
||||
const DictionaryStructure & structure = dictionary->getStructure();
|
||||
ColumnPtr ids_column = getColumnFromIds(ids_to_fill);
|
||||
|
||||
if (structure.id && names.find(structure.id->name) != names.end())
|
||||
block_columns.emplace_back(getColumnFromIds(ids_to_fill), std::make_shared<DataTypeUInt64>(), structure.id->name);
|
||||
{
|
||||
block_columns.emplace_back(ids_column, std::make_shared<DataTypeUInt64>(), structure.id->name);
|
||||
}
|
||||
|
||||
auto dictionary_key_type = dictionary->getKeyType();
|
||||
|
||||
for (const auto idx : ext::range(0, structure.attributes.size()))
|
||||
{
|
||||
@ -418,126 +220,35 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||
if (names.find(attribute.name) != names.end())
|
||||
{
|
||||
ColumnPtr column;
|
||||
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
|
||||
column = getColumnFromAttribute<TYPE, Getter<TYPE>>(&DictionaryType::get##TYPE, ids_to_fill, keys, data_types, attribute, *dictionary)
|
||||
switch (attribute.underlying_type)
|
||||
|
||||
if (dictionary_key_type == DictionaryKeyType::simple)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt128);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int8);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int16);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Float32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Float64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
{
|
||||
column = getColumnFromAttribute<Decimal32, DecimalGetter<Decimal32>>(
|
||||
&DictionaryType::getDecimal32, ids_to_fill, keys, data_types, attribute, *dictionary);
|
||||
break;
|
||||
}
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
{
|
||||
column = getColumnFromAttribute<Decimal64, DecimalGetter<Decimal64>>(
|
||||
&DictionaryType::getDecimal64, ids_to_fill, keys, data_types, attribute, *dictionary);
|
||||
break;
|
||||
}
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
{
|
||||
column = getColumnFromAttribute<Decimal128, DecimalGetter<Decimal128>>(
|
||||
&DictionaryType::getDecimal128, ids_to_fill, keys, data_types, attribute, *dictionary);
|
||||
break;
|
||||
}
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
column = getColumnFromStringAttribute<StringGetter>(
|
||||
&DictionaryType::getString, ids_to_fill, keys, data_types, attribute, *dictionary);
|
||||
break;
|
||||
}
|
||||
column = dictionary->getColumn(
|
||||
attribute.name,
|
||||
attribute.type,
|
||||
{ids_column},
|
||||
{std::make_shared<DataTypeUInt64>()},
|
||||
nullptr /* default_values_column */);
|
||||
}
|
||||
#undef GET_COLUMN_FORM_ATTRIBUTE
|
||||
else
|
||||
{
|
||||
column = dictionary->getColumn(
|
||||
attribute.name,
|
||||
attribute.type,
|
||||
keys,
|
||||
data_types,
|
||||
nullptr /* default_values_column*/);
|
||||
}
|
||||
|
||||
block_columns.emplace_back(column, attribute.type, attribute.name);
|
||||
}
|
||||
}
|
||||
|
||||
return Block(block_columns);
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename AttributeType, typename Getter>
|
||||
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribute(
|
||||
Getter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
if constexpr (IsDecimalNumber<AttributeType>)
|
||||
{
|
||||
auto size = ids_to_fill.size();
|
||||
if (!keys.empty())
|
||||
size = keys.front()->size();
|
||||
auto column = ColumnDecimal<AttributeType>::create(size, 0); /// NOTE: There's wrong scale here, but it's unused.
|
||||
callGetter(getter, ids_to_fill, keys, data_types, column->getData(), attribute, dict);
|
||||
return column;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto size = ids_to_fill.size();
|
||||
if (!keys.empty())
|
||||
size = keys.front()->size();
|
||||
auto column_vector = ColumnVector<AttributeType>::create(size);
|
||||
callGetter(getter, ids_to_fill, keys, data_types, column_vector->getData(), attribute, dict);
|
||||
return column_vector;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
template <typename Getter>
|
||||
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromStringAttribute(
|
||||
Getter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const Columns & keys,
|
||||
const DataTypes & data_types,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & dict) const
|
||||
{
|
||||
auto column_string = ColumnString::create();
|
||||
auto ptr = column_string.get();
|
||||
callGetter(getter, ids_to_fill, keys, data_types, ptr, attribute, dict);
|
||||
return column_string;
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const
|
||||
template <typename Key>
|
||||
ColumnPtr DictionaryBlockInputStream<Key>::getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const
|
||||
{
|
||||
auto column_vector = ColumnVector<UInt64>::create();
|
||||
column_vector->getData().reserve(ids_to_fill.size());
|
||||
@ -547,8 +258,8 @@ ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(cons
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::fillKeyColumns(
|
||||
template <typename Key>
|
||||
void DictionaryBlockInputStream<Key>::fillKeyColumns(
|
||||
const std::vector<StringRef> & keys,
|
||||
size_t start,
|
||||
size_t size,
|
||||
|
149
src/Dictionaries/DictionaryHelpers.h
Normal file
149
src/Dictionaries/DictionaryHelpers.h
Normal file
@ -0,0 +1,149 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include "DictionaryStructure.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TYPE_MISMATCH;
|
||||
}
|
||||
|
||||
/**
|
||||
* In Dictionaries implementation String attribute is stored in arena and StringRefs are pointing to it.
|
||||
*/
|
||||
template <typename DictionaryAttributeType>
|
||||
using DictionaryValueType =
|
||||
std::conditional_t<std::is_same_v<DictionaryAttributeType, String>, StringRef, DictionaryAttributeType>;
|
||||
|
||||
/**
|
||||
* Used to create column with right type for DictionaryAttributeType.
|
||||
*/
|
||||
template <typename DictionaryAttributeType>
|
||||
class DictionaryAttributeColumnProvider
|
||||
{
|
||||
public:
|
||||
using ColumnType =
|
||||
std::conditional_t<std::is_same_v<DictionaryAttributeType, String>, ColumnString,
|
||||
std::conditional_t<IsDecimalNumber<DictionaryAttributeType>, ColumnDecimal<DictionaryAttributeType>,
|
||||
ColumnVector<DictionaryAttributeType>>>;
|
||||
|
||||
using ColumnPtr = typename ColumnType::MutablePtr;
|
||||
|
||||
static ColumnPtr getColumn(const DictionaryAttribute & dictionary_attribute, size_t size)
|
||||
{
|
||||
if constexpr (std::is_same_v<DictionaryAttributeType, String>)
|
||||
{
|
||||
return ColumnType::create();
|
||||
}
|
||||
if constexpr (IsDecimalNumber<DictionaryAttributeType>)
|
||||
{
|
||||
auto scale = getDecimalScale(*dictionary_attribute.nested_type);
|
||||
return ColumnType::create(size, scale);
|
||||
}
|
||||
else if constexpr (IsNumber<DictionaryAttributeType>)
|
||||
return ColumnType::create(size);
|
||||
else
|
||||
throw Exception{"Unsupported attribute type.", ErrorCodes::TYPE_MISMATCH};
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* DictionaryDefaultValueExtractor used to simplify getting default value for IDictionary function `getColumn`.
|
||||
* Provides interface for getting default value with operator[];
|
||||
*
|
||||
* If default_values_column is null then attribute_default_value will be used.
|
||||
* If default_values_column is not null in constructor than this column values will be used as default values.
|
||||
*/
|
||||
template <typename DictionaryAttributeType>
|
||||
class DictionaryDefaultValueExtractor
|
||||
{
|
||||
using DefaultColumnType = typename DictionaryAttributeColumnProvider<DictionaryAttributeType>::ColumnType;
|
||||
|
||||
public:
|
||||
using DefaultValueType = DictionaryValueType<DictionaryAttributeType>;
|
||||
|
||||
DictionaryDefaultValueExtractor(DictionaryAttributeType attribute_default_value, ColumnPtr default_values_column_ = nullptr)
|
||||
: default_value(std::move(attribute_default_value))
|
||||
{
|
||||
if (default_values_column_ == nullptr)
|
||||
use_default_value_from_column = false;
|
||||
else
|
||||
{
|
||||
if (const auto * const default_col = checkAndGetColumn<DefaultColumnType>(*default_values_column_))
|
||||
{
|
||||
default_values_column = default_col;
|
||||
use_default_value_from_column = true;
|
||||
}
|
||||
else if (const auto * const default_col_const = checkAndGetColumnConst<DefaultColumnType>(default_values_column_.get()))
|
||||
{
|
||||
default_value = default_col_const->template getValue<DictionaryAttributeType>();
|
||||
use_default_value_from_column = false;
|
||||
}
|
||||
else
|
||||
throw Exception{"Type of default column is not the same as dictionary attribute type.", ErrorCodes::TYPE_MISMATCH};
|
||||
}
|
||||
}
|
||||
|
||||
DefaultValueType operator[](size_t row)
|
||||
{
|
||||
if (!use_default_value_from_column)
|
||||
return static_cast<DefaultValueType>(default_value);
|
||||
|
||||
assert(default_values_column != nullptr);
|
||||
|
||||
if constexpr (std::is_same_v<DefaultColumnType, ColumnString>)
|
||||
return default_values_column->getDataAt(row);
|
||||
else
|
||||
return default_values_column->getData()[row];
|
||||
}
|
||||
private:
|
||||
DictionaryAttributeType default_value;
|
||||
const DefaultColumnType * default_values_column = nullptr;
|
||||
bool use_default_value_from_column = false;
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns ColumnVector data as PaddedPodArray.
|
||||
|
||||
* If column is constant parameter backup_storage is used to store values.
|
||||
*/
|
||||
template <typename T>
|
||||
static const PaddedPODArray<T> & getColumnVectorData(
|
||||
const IDictionaryBase * dictionary,
|
||||
const ColumnPtr column,
|
||||
PaddedPODArray<T> & backup_storage)
|
||||
{
|
||||
bool is_const_column = isColumnConst(*column);
|
||||
auto full_column = column->convertToFullColumnIfConst();
|
||||
auto vector_col = checkAndGetColumn<ColumnVector<T>>(full_column.get());
|
||||
|
||||
if (!vector_col)
|
||||
{
|
||||
throw Exception{ErrorCodes::TYPE_MISMATCH,
|
||||
"{}: type mismatch: column has wrong type expected {}",
|
||||
dictionary->getDictionaryID().getNameForLogs(),
|
||||
TypeName<T>::get()};
|
||||
}
|
||||
|
||||
if (is_const_column)
|
||||
{
|
||||
// With type conversion and const columns we need to use backup storage here
|
||||
auto & data = vector_col->getData();
|
||||
backup_storage.assign(data);
|
||||
|
||||
return backup_storage;
|
||||
}
|
||||
else
|
||||
{
|
||||
return vector_col->getData();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -2,6 +2,8 @@
|
||||
#include <Columns/IColumn.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/Operators.h>
|
||||
@ -12,7 +14,6 @@
|
||||
#include <unordered_set>
|
||||
#include <ext/range.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
@ -41,54 +42,46 @@ namespace
|
||||
}
|
||||
|
||||
|
||||
AttributeUnderlyingType getAttributeUnderlyingType(const std::string & type)
|
||||
AttributeUnderlyingType getAttributeUnderlyingType(const DataTypePtr & type)
|
||||
{
|
||||
static const std::unordered_map<std::string, AttributeUnderlyingType> dictionary
|
||||
auto type_index = type->getTypeId();
|
||||
|
||||
switch (type_index)
|
||||
{
|
||||
{"UInt8", AttributeUnderlyingType::utUInt8},
|
||||
{"UInt16", AttributeUnderlyingType::utUInt16},
|
||||
{"UInt32", AttributeUnderlyingType::utUInt32},
|
||||
{"UInt64", AttributeUnderlyingType::utUInt64},
|
||||
{"UUID", AttributeUnderlyingType::utUInt128},
|
||||
{"Int8", AttributeUnderlyingType::utInt8},
|
||||
{"Int16", AttributeUnderlyingType::utInt16},
|
||||
{"Int32", AttributeUnderlyingType::utInt32},
|
||||
{"Int64", AttributeUnderlyingType::utInt64},
|
||||
{"Float32", AttributeUnderlyingType::utFloat32},
|
||||
{"Float64", AttributeUnderlyingType::utFloat64},
|
||||
{"String", AttributeUnderlyingType::utString},
|
||||
{"Date", AttributeUnderlyingType::utUInt16},
|
||||
};
|
||||
case TypeIndex::UInt8: return AttributeUnderlyingType::utUInt8;
|
||||
case TypeIndex::UInt16: return AttributeUnderlyingType::utUInt16;
|
||||
case TypeIndex::UInt32: return AttributeUnderlyingType::utUInt32;
|
||||
case TypeIndex::UInt64: return AttributeUnderlyingType::utUInt64;
|
||||
case TypeIndex::UInt128: return AttributeUnderlyingType::utUInt128;
|
||||
|
||||
const auto it = dictionary.find(type);
|
||||
if (it != std::end(dictionary))
|
||||
return it->second;
|
||||
case TypeIndex::Int8: return AttributeUnderlyingType::utInt8;
|
||||
case TypeIndex::Int16: return AttributeUnderlyingType::utInt16;
|
||||
case TypeIndex::Int32: return AttributeUnderlyingType::utInt32;
|
||||
case TypeIndex::Int64: return AttributeUnderlyingType::utInt64;
|
||||
|
||||
/// Can contain arbitrary scale and timezone parameters.
|
||||
if (type.find("DateTime64") == 0)
|
||||
return AttributeUnderlyingType::utUInt64;
|
||||
case TypeIndex::Float32: return AttributeUnderlyingType::utFloat32;
|
||||
case TypeIndex::Float64: return AttributeUnderlyingType::utFloat64;
|
||||
|
||||
/// Can contain arbitrary timezone as parameter.
|
||||
if (type.find("DateTime") == 0)
|
||||
return AttributeUnderlyingType::utUInt32;
|
||||
case TypeIndex::Decimal32: return AttributeUnderlyingType::utDecimal32;
|
||||
case TypeIndex::Decimal64: return AttributeUnderlyingType::utDecimal64;
|
||||
case TypeIndex::Decimal128: return AttributeUnderlyingType::utDecimal128;
|
||||
|
||||
if (type.find("Decimal") == 0)
|
||||
{
|
||||
size_t start = strlen("Decimal");
|
||||
if (type.find("32", start) == start)
|
||||
return AttributeUnderlyingType::utDecimal32;
|
||||
if (type.find("64", start) == start)
|
||||
return AttributeUnderlyingType::utDecimal64;
|
||||
if (type.find("128", start) == start)
|
||||
return AttributeUnderlyingType::utDecimal128;
|
||||
case TypeIndex::Date: return AttributeUnderlyingType::utUInt16;
|
||||
case TypeIndex::DateTime: return AttributeUnderlyingType::utUInt32;
|
||||
case TypeIndex::DateTime64: return AttributeUnderlyingType::utUInt64;
|
||||
|
||||
case TypeIndex::UUID: return AttributeUnderlyingType::utUInt128;
|
||||
|
||||
case TypeIndex::String: return AttributeUnderlyingType::utString;
|
||||
|
||||
// Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries.
|
||||
// TODO: This should be fixed by fully supporting arrays in dictionaries.
|
||||
case TypeIndex::Array: return AttributeUnderlyingType::utString;
|
||||
|
||||
default: break;
|
||||
}
|
||||
|
||||
// Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries.
|
||||
// TODO: This should be fixed by fully supporting arrays in dictionaries.
|
||||
if (type.find("Array") == 0)
|
||||
return AttributeUnderlyingType::utString;
|
||||
|
||||
throw Exception{"Unknown type " + type, ErrorCodes::UNKNOWN_TYPE};
|
||||
throw Exception{"Unknown type for dictionary" + type->getName(), ErrorCodes::UNKNOWN_TYPE};
|
||||
}
|
||||
|
||||
|
||||
@ -215,16 +208,32 @@ void DictionaryStructure::validateKeyTypes(const DataTypes & key_types) const
|
||||
|
||||
for (const auto i : ext::range(0, key_types.size()))
|
||||
{
|
||||
const auto & expected_type = (*key)[i].type->getName();
|
||||
const auto & actual_type = key_types[i]->getName();
|
||||
const auto & expected_type = (*key)[i].type;
|
||||
const auto & actual_type = key_types[i];
|
||||
|
||||
if (expected_type != actual_type)
|
||||
throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type + ", found "
|
||||
+ actual_type,
|
||||
ErrorCodes::TYPE_MISMATCH};
|
||||
if (!areTypesEqual(expected_type, actual_type))
|
||||
throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type->getName() + ", found "
|
||||
+ actual_type->getName(),
|
||||
ErrorCodes::TYPE_MISMATCH};
|
||||
}
|
||||
}
|
||||
|
||||
const DictionaryAttribute & DictionaryStructure::getAttribute(const String& attribute_name, const DataTypePtr & type) const
|
||||
{
|
||||
auto find_iter
|
||||
= std::find_if(attributes.begin(), attributes.end(), [&](const auto & attribute) { return attribute.name == attribute_name; });
|
||||
|
||||
if (find_iter == attributes.end())
|
||||
throw Exception{"No such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
|
||||
|
||||
const auto & attribute = *find_iter;
|
||||
|
||||
if (!areTypesEqual(attribute.type, type))
|
||||
throw Exception{"Attribute type does not match, expected " + attribute.type->getName() + ", found " + type->getName(),
|
||||
ErrorCodes::TYPE_MISMATCH};
|
||||
|
||||
return *find_iter;
|
||||
}
|
||||
|
||||
std::string DictionaryStructure::getKeyDescription() const
|
||||
{
|
||||
@ -318,9 +327,20 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
|
||||
if ((range_min && name == range_min->name) || (range_max && name == range_max->name))
|
||||
continue;
|
||||
|
||||
|
||||
const auto type_string = config.getString(prefix + "type");
|
||||
const auto type = DataTypeFactory::instance().get(type_string);
|
||||
const auto underlying_type = getAttributeUnderlyingType(type_string);
|
||||
const auto initial_type = DataTypeFactory::instance().get(type_string);
|
||||
auto type = initial_type;
|
||||
bool is_array = false;
|
||||
bool is_nullable = false;
|
||||
|
||||
if (type->isNullable())
|
||||
{
|
||||
is_nullable = true;
|
||||
type = removeNullable(type);
|
||||
}
|
||||
|
||||
const auto underlying_type = getAttributeUnderlyingType(type);
|
||||
|
||||
const auto expression = config.getString(prefix + "expression", "");
|
||||
if (!expression.empty())
|
||||
@ -333,7 +353,9 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
|
||||
try
|
||||
{
|
||||
if (null_value_string.empty())
|
||||
{
|
||||
null_value = type->getDefault();
|
||||
}
|
||||
else
|
||||
{
|
||||
ReadBufferFromString null_value_buffer{null_value_string};
|
||||
@ -365,8 +387,18 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
|
||||
|
||||
has_hierarchy = has_hierarchy || hierarchical;
|
||||
|
||||
res_attributes.emplace_back(
|
||||
DictionaryAttribute{name, underlying_type, type, expression, null_value, hierarchical, injective, is_object_id});
|
||||
res_attributes.emplace_back(DictionaryAttribute{
|
||||
name,
|
||||
underlying_type,
|
||||
initial_type,
|
||||
type,
|
||||
expression,
|
||||
null_value,
|
||||
hierarchical,
|
||||
injective,
|
||||
is_object_id,
|
||||
is_nullable,
|
||||
is_array});
|
||||
}
|
||||
|
||||
return res_attributes;
|
||||
|
@ -42,7 +42,6 @@ std::string toString(const AttributeUnderlyingType type);
|
||||
/// Min and max lifetimes for a dictionary or it's entry
|
||||
using DictionaryLifetime = ExternalLoadableLifetime;
|
||||
|
||||
|
||||
/** Holds the description of a single dictionary attribute:
|
||||
* - name, used for lookup into dictionary and source;
|
||||
* - type, used in conjunction with DataTypeFactory and getAttributeUnderlyingTypeByname;
|
||||
@ -57,13 +56,74 @@ struct DictionaryAttribute final
|
||||
const std::string name;
|
||||
const AttributeUnderlyingType underlying_type;
|
||||
const DataTypePtr type;
|
||||
const DataTypePtr nested_type;
|
||||
const std::string expression;
|
||||
const Field null_value;
|
||||
const bool hierarchical;
|
||||
const bool injective;
|
||||
const bool is_object_id;
|
||||
const bool is_nullable;
|
||||
const bool is_array;
|
||||
};
|
||||
|
||||
template <typename Type>
|
||||
struct DictionaryAttributeType
|
||||
{
|
||||
using AttributeType = Type;
|
||||
};
|
||||
|
||||
template <typename F>
|
||||
void callOnDictionaryAttributeType(AttributeUnderlyingType type, F&& func)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
func(DictionaryAttributeType<UInt8>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
func(DictionaryAttributeType<UInt16>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
func(DictionaryAttributeType<UInt32>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
func(DictionaryAttributeType<UInt64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
func(DictionaryAttributeType<UInt128>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
func(DictionaryAttributeType<Int8>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
func(DictionaryAttributeType<Int16>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
func(DictionaryAttributeType<Int32>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
func(DictionaryAttributeType<Int64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
func(DictionaryAttributeType<Float32>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
func(DictionaryAttributeType<Float64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
func(DictionaryAttributeType<String>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
func(DictionaryAttributeType<Decimal32>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
func(DictionaryAttributeType<Decimal64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
func(DictionaryAttributeType<Decimal128>());
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
struct DictionarySpecialAttribute final
|
||||
{
|
||||
@ -94,10 +154,10 @@ struct DictionaryStructure final
|
||||
DictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
|
||||
|
||||
void validateKeyTypes(const DataTypes & key_types) const;
|
||||
const DictionaryAttribute &getAttribute(const String& attribute_name, const DataTypePtr & type) const;
|
||||
std::string getKeyDescription() const;
|
||||
bool isKeySizeFixed() const;
|
||||
size_t getKeySize() const;
|
||||
|
||||
private:
|
||||
/// range_min and range_max have to be parsed before this function call
|
||||
std::vector<DictionaryAttribute> getAttributes(
|
||||
|
@ -3,7 +3,10 @@
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
#include <Core/Defines.h>
|
||||
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -35,11 +38,13 @@ DirectDictionary::DirectDictionary(
|
||||
void DirectDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
|
||||
{
|
||||
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
|
||||
DictionaryDefaultValueExtractor<UInt64> extractor(null_value);
|
||||
|
||||
getItemsImpl<UInt64, UInt64>(
|
||||
*hierarchical_attribute,
|
||||
ids,
|
||||
[&](const size_t row, const UInt64 value) { out[row] = value; },
|
||||
[&](const size_t) { return null_value; });
|
||||
[&](const size_t row, const UInt64 value, bool) { out[row] = value; },
|
||||
extractor);
|
||||
}
|
||||
|
||||
|
||||
@ -128,395 +133,101 @@ void DirectDictionary::isInConstantVector(const Key child_id, const PaddedPODArr
|
||||
isInImpl(child_id, ancestor_ids, out);
|
||||
}
|
||||
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void DirectDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
const auto null_value = std::get<TYPE>(attribute.null_values); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void DirectDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
|
||||
ColumnPtr DirectDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
ColumnPtr result;
|
||||
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto & null_value = std::get<StringRef>(attribute.null_values);
|
||||
getItemsStringImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
|
||||
[&](const size_t) { return String(null_value.data, null_value.size); });
|
||||
}
|
||||
auto keys_size = ids.size();
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void DirectDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void DirectDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
|
||||
{
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsStringImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
|
||||
[&](const size_t row) { const auto ref = def->getDataAt(row); return String(ref.data, ref.size); });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void DirectDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void DirectDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
|
||||
{
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
DirectDictionary::getItemsStringImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
|
||||
[&](const size_t) { return def; });
|
||||
}
|
||||
|
||||
|
||||
void DirectDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
ColumnUInt8::MutablePtr col_null_map_to;
|
||||
ColumnUInt8::Container * vec_null_map_to = nullptr;
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
has<UInt8>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
has<UInt16>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
has<UInt32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
has<UInt64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
has<UInt128>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
has<Int8>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
has<Int16>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
has<Int32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
has<Int64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
has<Float32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
has<Float64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
has<String>(attribute, ids, out);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
has<Decimal32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
has<Decimal64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
has<Decimal128>(attribute, ids, out);
|
||||
break;
|
||||
col_null_map_to = ColumnUInt8::create(keys_size, false);
|
||||
vec_null_map_to = &col_null_map_to->getData();
|
||||
}
|
||||
}
|
||||
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
void DirectDictionary::createAttributes()
|
||||
{
|
||||
const auto size = dict_struct.attributes.size();
|
||||
attributes.reserve(size);
|
||||
|
||||
for (const auto & attribute : dict_struct.attributes)
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
attribute_index_by_name.emplace(attribute.name, attributes.size());
|
||||
attribute_name_by_index.emplace(attributes.size(), attribute.name);
|
||||
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value, attribute.name));
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
if (attribute.hierarchical)
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
|
||||
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
hierarchical_attribute = &attributes.back();
|
||||
auto * out = column.get();
|
||||
|
||||
if (hierarchical_attribute->type != AttributeUnderlyingType::utUInt64)
|
||||
throw Exception{full_name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void DirectDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
|
||||
}
|
||||
|
||||
template <>
|
||||
void DirectDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
attribute.string_arena = std::make_unique<Arena>();
|
||||
const String & string = null_value.get<String>();
|
||||
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
|
||||
}
|
||||
|
||||
|
||||
DirectDictionary::Attribute DirectDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & attr_name)
|
||||
{
|
||||
Attribute attr{type, {}, {}, attr_name};
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
createAttributeImpl<UInt8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
createAttributeImpl<UInt16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
createAttributeImpl<UInt32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
createAttributeImpl<UInt64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
createAttributeImpl<UInt128>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
createAttributeImpl<Int8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
createAttributeImpl<Int16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
createAttributeImpl<Int32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
createAttributeImpl<Int64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
createAttributeImpl<Float32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
createAttributeImpl<Float64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
createAttributeImpl<String>(attr, null_value);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
createAttributeImpl<Decimal32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
createAttributeImpl<Decimal64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
createAttributeImpl<Decimal128>(attr, null_value);
|
||||
break;
|
||||
}
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void DirectDictionary::getItemsImpl(
|
||||
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
{
|
||||
const auto rows = ext::size(ids);
|
||||
|
||||
HashMap<Key, OutputType> value_by_key;
|
||||
for (const auto row : ext::range(0, rows))
|
||||
value_by_key[ids[row]] = get_default(row);
|
||||
|
||||
std::vector<Key> to_load;
|
||||
to_load.reserve(value_by_key.size());
|
||||
for (auto it = value_by_key.begin(); it != value_by_key.end(); ++it)
|
||||
to_load.emplace_back(static_cast<Key>(it->getKey()));
|
||||
|
||||
auto stream = source_ptr->loadIds(to_load);
|
||||
stream->readPrefix();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
const IColumn & id_column = *block.safeGetByPosition(0).column;
|
||||
|
||||
for (const size_t attribute_idx : ext::range(0, attributes.size()))
|
||||
{
|
||||
const IColumn & attribute_column = *block.safeGetByPosition(attribute_idx + 1).column;
|
||||
|
||||
for (const auto row_idx : ext::range(0, id_column.size()))
|
||||
{
|
||||
const auto key = id_column[row_idx].get<UInt64>();
|
||||
|
||||
if (value_by_key.find(key) != value_by_key.end() && attribute.name == attribute_name_by_index.at(attribute_idx))
|
||||
getItemsImpl<String, String>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t row, const String value, bool is_null)
|
||||
{
|
||||
if (attribute.type == AttributeUnderlyingType::utFloat32)
|
||||
{
|
||||
value_by_key[key] = static_cast<Float32>(attribute_column[row_idx].get<Float64>());
|
||||
}
|
||||
else
|
||||
{
|
||||
value_by_key[key] = static_cast<OutputType>(attribute_column[row_idx].get<AttributeType>());
|
||||
}
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
}
|
||||
}
|
||||
const auto ref = StringRef{value};
|
||||
out->insertData(ref.data, ref.size);
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
set_value(row, value_by_key[ids[row]]);
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void DirectDictionary::getItemsStringImpl(
|
||||
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
{
|
||||
const auto rows = ext::size(ids);
|
||||
|
||||
HashMap<Key, String> value_by_key;
|
||||
for (const auto row : ext::range(0, rows))
|
||||
value_by_key[ids[row]] = get_default(row);
|
||||
|
||||
std::vector<Key> to_load;
|
||||
to_load.reserve(value_by_key.size());
|
||||
for (auto it = value_by_key.begin(); it != value_by_key.end(); ++it)
|
||||
to_load.emplace_back(static_cast<Key>(it->getKey()));
|
||||
|
||||
auto stream = source_ptr->loadIds(to_load);
|
||||
stream->readPrefix();
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
const IColumn & id_column = *block.safeGetByPosition(0).column;
|
||||
|
||||
for (const size_t attribute_idx : ext::range(0, attributes.size()))
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
const IColumn & attribute_column = *block.safeGetByPosition(attribute_idx + 1).column;
|
||||
|
||||
for (const auto row_idx : ext::range(0, id_column.size()))
|
||||
{
|
||||
const auto key = id_column[row_idx].get<UInt64>();
|
||||
if (value_by_key.find(key) != value_by_key.end() && attribute.name == attribute_name_by_index.at(attribute_idx))
|
||||
getItemsImpl<AttributeType, AttributeType>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t row, const auto value, bool is_null)
|
||||
{
|
||||
const String from_source = attribute_column[row_idx].get<String>();
|
||||
value_by_key[key] = from_source;
|
||||
}
|
||||
}
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
out[row] = value;
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
result = ColumnNullable::create(result, std::move(col_null_map_to));
|
||||
}
|
||||
stream->readSuffix();
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
set_value(row, value_by_key[ids[row]]);
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
const DirectDictionary::Attribute & DirectDictionary::getAttribute(const std::string & attribute_name) const
|
||||
ColumnUInt8::Ptr DirectDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
|
||||
{
|
||||
const auto it = attribute_index_by_name.find(attribute_name);
|
||||
if (it == std::end(attribute_index_by_name))
|
||||
throw Exception{full_name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
|
||||
return attributes[it->second];
|
||||
}
|
||||
auto result = ColumnUInt8::create(ext::size(ids));
|
||||
auto& out = result->getData();
|
||||
|
||||
|
||||
template <typename T>
|
||||
void DirectDictionary::has(const Attribute &, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
const auto rows = ext::size(ids);
|
||||
|
||||
HashMap<Key, UInt8> has_key;
|
||||
@ -548,6 +259,137 @@ void DirectDictionary::has(const Attribute &, const PaddedPODArray<Key> & ids, P
|
||||
out[row] = has_key[ids[row]];
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void DirectDictionary::createAttributes()
|
||||
{
|
||||
const auto size = dict_struct.attributes.size();
|
||||
attributes.reserve(size);
|
||||
|
||||
for (const auto & attribute : dict_struct.attributes)
|
||||
{
|
||||
attribute_index_by_name.emplace(attribute.name, attributes.size());
|
||||
attribute_name_by_index.emplace(attributes.size(), attribute.name);
|
||||
attributes.push_back(createAttribute(attribute, attribute.null_value, attribute.name));
|
||||
|
||||
if (attribute.hierarchical)
|
||||
{
|
||||
hierarchical_attribute = &attributes.back();
|
||||
|
||||
if (hierarchical_attribute->type != AttributeUnderlyingType::utUInt64)
|
||||
throw Exception{full_name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void DirectDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
|
||||
}
|
||||
|
||||
template <>
|
||||
void DirectDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
attribute.string_arena = std::make_unique<Arena>();
|
||||
const String & string = null_value.get<String>();
|
||||
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
|
||||
}
|
||||
|
||||
|
||||
DirectDictionary::Attribute DirectDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value, const std::string & attr_name)
|
||||
{
|
||||
Attribute attr{attribute.underlying_type, attribute.is_nullable, {}, {}, attr_name};
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
createAttributeImpl<AttributeType>(attr, null_value);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void DirectDictionary::getItemsImpl(
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto rows = ext::size(ids);
|
||||
|
||||
HashMap<Key, OutputType> value_by_key;
|
||||
HashSet<Key> value_is_null;
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
auto key = ids[row];
|
||||
value_by_key[key] = static_cast<AttributeType>(default_value_extractor[row]);
|
||||
}
|
||||
|
||||
std::vector<Key> to_load;
|
||||
to_load.reserve(value_by_key.size());
|
||||
for (auto it = value_by_key.begin(); it != value_by_key.end(); ++it)
|
||||
to_load.emplace_back(static_cast<Key>(it->getKey()));
|
||||
|
||||
auto stream = source_ptr->loadIds(to_load);
|
||||
stream->readPrefix();
|
||||
|
||||
const auto it = attribute_index_by_name.find(attribute.name);
|
||||
if (it == std::end(attribute_index_by_name))
|
||||
throw Exception{full_name + ": no such attribute '" + attribute.name + "'", ErrorCodes::BAD_ARGUMENTS};
|
||||
|
||||
auto attribute_index = it->second;
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
const IColumn & id_column = *block.safeGetByPosition(0).column;
|
||||
|
||||
const IColumn & attribute_column = *block.safeGetByPosition(attribute_index + 1).column;
|
||||
|
||||
for (const auto row_idx : ext::range(0, id_column.size()))
|
||||
{
|
||||
const auto key = id_column[row_idx].get<UInt64>();
|
||||
|
||||
if (value_by_key.find(key) != value_by_key.end())
|
||||
{
|
||||
auto value = attribute_column[row_idx];
|
||||
|
||||
if (value.isNull())
|
||||
value_is_null.insert(key);
|
||||
else
|
||||
value_by_key[key] = static_cast<OutputType>(value.get<NearestFieldType<AttributeType>>());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
auto key = ids[row];
|
||||
set_value(row, value_by_key[key], value_is_null.find(key) != nullptr);
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
const DirectDictionary::Attribute & DirectDictionary::getAttribute(const std::string & attribute_name) const
|
||||
{
|
||||
const auto it = attribute_index_by_name.find(attribute_name);
|
||||
if (it == std::end(attribute_index_by_name))
|
||||
throw Exception{full_name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
|
||||
|
||||
return attributes[it->second];
|
||||
}
|
||||
|
||||
|
||||
|
@ -13,11 +13,10 @@
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using BlockPtr = std::shared_ptr<Block>;
|
||||
|
||||
class DirectDictionary final : public IDictionary
|
||||
{
|
||||
@ -65,76 +64,16 @@ public:
|
||||
void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
|
||||
void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void
|
||||
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
|
||||
const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
|
||||
|
||||
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
@ -142,6 +81,7 @@ private:
|
||||
struct Attribute final
|
||||
{
|
||||
AttributeUnderlyingType type;
|
||||
bool is_nullable;
|
||||
std::variant<
|
||||
UInt8,
|
||||
UInt16,
|
||||
@ -168,23 +108,17 @@ private:
|
||||
template <typename T>
|
||||
void addAttributeSize(const Attribute & attribute);
|
||||
|
||||
void calculateBytesAllocated();
|
||||
|
||||
template <typename T>
|
||||
void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & name);
|
||||
static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value, const std::string & name);
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void getItemsStringImpl(
|
||||
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsImpl(
|
||||
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
|
||||
template <typename T>
|
||||
void resize(Attribute & attribute, const Key id);
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename T>
|
||||
void setAttributeValueImpl(Attribute & attribute, const Key id, const T & value);
|
||||
@ -193,9 +127,6 @@ private:
|
||||
|
||||
const Attribute & getAttribute(const std::string & attribute_name) const;
|
||||
|
||||
template <typename T>
|
||||
void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
Key getValueOrNullByKey(const Key & to_find) const;
|
||||
|
||||
template <typename ChildType, typename AncestorType>
|
||||
|
@ -1,9 +1,14 @@
|
||||
#include "FlatDictionary.h"
|
||||
|
||||
#include <Core/Defines.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
#include <Core/Defines.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -44,12 +49,13 @@ FlatDictionary::FlatDictionary(
|
||||
void FlatDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
|
||||
{
|
||||
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
|
||||
DictionaryDefaultValueExtractor<UInt64> extractor(null_value);
|
||||
|
||||
getItemsImpl<UInt64, UInt64>(
|
||||
*hierarchical_attribute,
|
||||
ids,
|
||||
[&](const size_t row, const UInt64 value) { out[row] = value; },
|
||||
[&](const size_t) { return null_value; });
|
||||
extractor);
|
||||
}
|
||||
|
||||
|
||||
@ -102,186 +108,103 @@ void FlatDictionary::isInConstantVector(const Key child_id, const PaddedPODArray
|
||||
isInImpl(child_id, ancestor_ids, out);
|
||||
}
|
||||
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void FlatDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
const auto null_value = std::get<TYPE>(attribute.null_values); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void FlatDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
|
||||
ColumnPtr FlatDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
ColumnPtr result;
|
||||
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
|
||||
auto size = ids.size();
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
const auto & null_value = std::get<StringRef>(attribute.null_values);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return null_value; });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void FlatDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void FlatDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
|
||||
{
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t row) { return def->getDataAt(row); });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void FlatDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void FlatDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
|
||||
{
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
FlatDictionary::getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return StringRef{def}; });
|
||||
}
|
||||
|
||||
|
||||
void FlatDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
has<UInt8>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
has<UInt16>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
has<UInt32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
has<UInt64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
has<UInt128>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
has<Int8>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
has<Int16>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
has<Int32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
has<Int64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
has<Float32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
has<Float64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
has<String>(attribute, ids, out);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
has<Decimal32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
has<Decimal64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
has<Decimal128>(attribute, ids, out);
|
||||
break;
|
||||
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
|
||||
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, size);
|
||||
|
||||
if constexpr (std::is_same_v<ValueType, StringRef>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
|
||||
getItemsImpl<ValueType, ValueType>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
getItemsImpl<ValueType, ValueType>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t row, const auto value) { out[row] = value; },
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
if (attribute.nullable_set)
|
||||
{
|
||||
ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, false);
|
||||
ColumnUInt8::Container& vec_null_map_to = col_null_map_to->getData();
|
||||
|
||||
for (size_t row = 0; row < ids.size(); ++row)
|
||||
{
|
||||
auto id = ids[row];
|
||||
|
||||
if (attribute.nullable_set->find(id) != nullptr)
|
||||
vec_null_map_to[row] = true;
|
||||
}
|
||||
|
||||
result = ColumnNullable::create(result, std::move(col_null_map_to));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
ColumnUInt8::Ptr FlatDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
|
||||
{
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
|
||||
auto result = ColumnUInt8::create(ext::size(ids));
|
||||
auto& out = result->getData();
|
||||
|
||||
const auto ids_count = ext::size(ids);
|
||||
|
||||
for (const auto i : ext::range(0, ids_count))
|
||||
{
|
||||
const auto id = ids[i];
|
||||
out[i] = id < loaded_ids.size() && loaded_ids[id];
|
||||
}
|
||||
|
||||
query_count.fetch_add(ids_count, std::memory_order_relaxed);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void FlatDictionary::createAttributes()
|
||||
{
|
||||
const auto size = dict_struct.attributes.size();
|
||||
@ -290,7 +213,7 @@ void FlatDictionary::createAttributes()
|
||||
for (const auto & attribute : dict_struct.attributes)
|
||||
{
|
||||
attribute_index_by_name.emplace(attribute.name, attributes.size());
|
||||
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
|
||||
attributes.push_back(createAttribute(attribute, attribute.null_value));
|
||||
|
||||
if (attribute.hierarchical)
|
||||
{
|
||||
@ -416,6 +339,14 @@ void FlatDictionary::addAttributeSize(const Attribute & attribute)
|
||||
bucket_count = array_ref.capacity();
|
||||
}
|
||||
|
||||
template <>
|
||||
void FlatDictionary::addAttributeSize<String>(const Attribute & attribute)
|
||||
{
|
||||
const auto & array_ref = std::get<ContainerType<StringRef>>(attribute.arrays);
|
||||
bytes_allocated += sizeof(PaddedPODArray<StringRef>) + array_ref.allocated_bytes();
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
bucket_count = array_ref.capacity();
|
||||
}
|
||||
|
||||
void FlatDictionary::calculateBytesAllocated()
|
||||
{
|
||||
@ -423,60 +354,15 @@ void FlatDictionary::calculateBytesAllocated()
|
||||
|
||||
for (const auto & attribute : attributes)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
addAttributeSize<UInt8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
addAttributeSize<UInt16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
addAttributeSize<UInt32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
addAttributeSize<UInt64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
addAttributeSize<UInt128>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
addAttributeSize<Int8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
addAttributeSize<Int16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
addAttributeSize<Int32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
addAttributeSize<Int64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
addAttributeSize<Float32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
addAttributeSize<Float64>(attribute);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
addAttributeSize<Decimal32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
addAttributeSize<Decimal64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
addAttributeSize<Decimal128>(attribute);
|
||||
break;
|
||||
addAttributeSize<AttributeType>(attribute);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
}
|
||||
|
||||
@ -500,67 +386,31 @@ void FlatDictionary::createAttributeImpl<String>(Attribute & attribute, const Fi
|
||||
}
|
||||
|
||||
|
||||
FlatDictionary::Attribute FlatDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||
FlatDictionary::Attribute FlatDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
|
||||
{
|
||||
Attribute attr{type, {}, {}, {}};
|
||||
auto nullable_set = attribute.is_nullable ? std::make_optional<NullableSet>() : std::optional<NullableSet>{};
|
||||
Attribute attr{attribute.underlying_type, std::move(nullable_set), {}, {}, {}};
|
||||
|
||||
switch (type)
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
createAttributeImpl<UInt8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
createAttributeImpl<UInt16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
createAttributeImpl<UInt32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
createAttributeImpl<UInt64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
createAttributeImpl<UInt128>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
createAttributeImpl<Int8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
createAttributeImpl<Int16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
createAttributeImpl<Int32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
createAttributeImpl<Int64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
createAttributeImpl<Float32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
createAttributeImpl<Float64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
createAttributeImpl<String>(attr, null_value);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
createAttributeImpl<Decimal32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
createAttributeImpl<Decimal64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
createAttributeImpl<Decimal128>(attr, null_value);
|
||||
break;
|
||||
}
|
||||
createAttributeImpl<AttributeType>(attr, null_value);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void FlatDictionary::getItemsImpl(
|
||||
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto & attr = std::get<ContainerType<AttributeType>>(attribute.arrays);
|
||||
const auto rows = ext::size(ids);
|
||||
@ -568,7 +418,7 @@ void FlatDictionary::getItemsImpl(
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
const auto id = ids[row];
|
||||
set_value(row, id < ext::size(attr) && loaded_ids[id] ? static_cast<OutputType>(attr[id]) : get_default(row));
|
||||
set_value(row, id < ext::size(attr) && loaded_ids[id] ? static_cast<OutputType>(attr[id]) : default_value_extractor[row]);
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
@ -592,7 +442,6 @@ void FlatDictionary::resize(Attribute & attribute, const Key id)
|
||||
template <typename T>
|
||||
void FlatDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const T & value)
|
||||
{
|
||||
resize<T>(attribute, id);
|
||||
auto & array = std::get<ContainerType<T>>(attribute.arrays);
|
||||
array[id] = value;
|
||||
loaded_ids[id] = true;
|
||||
@ -601,64 +450,38 @@ void FlatDictionary::setAttributeValueImpl(Attribute & attribute, const Key id,
|
||||
template <>
|
||||
void FlatDictionary::setAttributeValueImpl<String>(Attribute & attribute, const Key id, const String & value)
|
||||
{
|
||||
resize<StringRef>(attribute, id);
|
||||
const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
|
||||
auto & array = std::get<ContainerType<StringRef>>(attribute.arrays);
|
||||
array[id] = StringRef{string_in_arena, value.size()};
|
||||
loaded_ids[id] = true;
|
||||
setAttributeValueImpl(attribute, id, StringRef{string_in_arena, value.size()});
|
||||
}
|
||||
|
||||
void FlatDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
setAttributeValueImpl<String>(attribute, id, value.get<String>());
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ResizeType = std::conditional_t<std::is_same_v<AttributeType, String>, StringRef, AttributeType>;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal32>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>());
|
||||
break;
|
||||
}
|
||||
resize<ResizeType>(attribute, id);
|
||||
|
||||
if (attribute.nullable_set)
|
||||
{
|
||||
if (value.isNull())
|
||||
{
|
||||
attribute.nullable_set->insert(id);
|
||||
loaded_ids[id] = true;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
attribute.nullable_set->erase(id);
|
||||
}
|
||||
}
|
||||
|
||||
setAttributeValueImpl<AttributeType>(attribute, id, value.get<NearestFieldType<AttributeType>>());
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
|
||||
@ -671,27 +494,13 @@ const FlatDictionary::Attribute & FlatDictionary::getAttribute(const std::string
|
||||
return attributes[it->second];
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void FlatDictionary::has(const Attribute &, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
const auto ids_count = ext::size(ids);
|
||||
|
||||
for (const auto i : ext::range(0, ids_count))
|
||||
{
|
||||
const auto id = ids[i];
|
||||
out[i] = id < loaded_ids.size() && loaded_ids[id];
|
||||
}
|
||||
|
||||
query_count.fetch_add(ids_count, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
|
||||
PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const
|
||||
{
|
||||
const auto ids_count = ext::size(loaded_ids);
|
||||
|
||||
PaddedPODArray<Key> ids;
|
||||
ids.reserve(ids_count);
|
||||
|
||||
for (auto idx : ext::range(0, ids_count))
|
||||
if (loaded_ids[idx])
|
||||
ids.push_back(idx);
|
||||
@ -700,7 +509,7 @@ PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const
|
||||
|
||||
BlockInputStreamPtr FlatDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<FlatDictionary, Key>;
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<Key>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds(), column_names);
|
||||
}
|
||||
|
||||
|
@ -3,20 +3,25 @@
|
||||
#include <atomic>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
#include <optional>
|
||||
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <Core/Block.h>
|
||||
#include <ext/range.h>
|
||||
#include <ext/size.h>
|
||||
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using BlockPtr = std::shared_ptr<Block>;
|
||||
|
||||
class FlatDictionary final : public IDictionary
|
||||
{
|
||||
@ -66,76 +71,16 @@ public:
|
||||
void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
|
||||
void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void
|
||||
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
|
||||
const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
|
||||
|
||||
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
@ -143,9 +88,13 @@ private:
|
||||
template <typename Value>
|
||||
using ContainerType = PaddedPODArray<Value>;
|
||||
|
||||
using NullableSet = HashSet<Key, DefaultHash<Key>>;
|
||||
|
||||
struct Attribute final
|
||||
{
|
||||
AttributeUnderlyingType type;
|
||||
std::optional<NullableSet> nullable_set;
|
||||
|
||||
std::variant<
|
||||
UInt8,
|
||||
UInt16,
|
||||
@ -180,6 +129,7 @@ private:
|
||||
ContainerType<Float64>,
|
||||
ContainerType<StringRef>>
|
||||
arrays;
|
||||
|
||||
std::unique_ptr<Arena> string_arena;
|
||||
};
|
||||
|
||||
@ -194,13 +144,16 @@ private:
|
||||
void calculateBytesAllocated();
|
||||
|
||||
template <typename T>
|
||||
void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsImpl(
|
||||
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename T>
|
||||
void resize(Attribute & attribute, const Key id);
|
||||
@ -212,9 +165,6 @@ private:
|
||||
|
||||
const Attribute & getAttribute(const std::string & attribute_name) const;
|
||||
|
||||
template <typename T>
|
||||
void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
template <typename ChildType, typename AncestorType>
|
||||
void isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
|
@ -4,7 +4,10 @@
|
||||
#include "DictionaryFactory.h"
|
||||
#include "ClickHouseDictionarySource.h"
|
||||
#include <Core/Defines.h>
|
||||
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
@ -57,12 +60,13 @@ HashedDictionary::HashedDictionary(
|
||||
void HashedDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
|
||||
{
|
||||
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
|
||||
DictionaryDefaultValueExtractor<UInt64> extractor(null_value);
|
||||
|
||||
getItemsImpl<UInt64, UInt64>(
|
||||
*hierarchical_attribute,
|
||||
ids,
|
||||
[&](const size_t row, const UInt64 value) { out[row] = value; },
|
||||
[&](const size_t) { return null_value; });
|
||||
extractor);
|
||||
}
|
||||
|
||||
|
||||
@ -125,183 +129,105 @@ void HashedDictionary::isInConstantVector(const Key child_id, const PaddedPODArr
|
||||
isInImpl(child_id, ancestor_ids, out);
|
||||
}
|
||||
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void HashedDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) \
|
||||
const \
|
||||
{ \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
const auto null_value = std::get<TYPE>(attribute.null_values); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void HashedDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
|
||||
ColumnPtr HashedDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
ColumnPtr result;
|
||||
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
|
||||
auto size = ids.size();
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return null_value; });
|
||||
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
|
||||
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
getItemsImpl<AttributeType, AttributeType>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t row, const auto value) { return out[row] = value; },
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
if (attribute.nullable_set)
|
||||
{
|
||||
ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, false);
|
||||
ColumnUInt8::Container& vec_null_map_to = col_null_map_to->getData();
|
||||
|
||||
for (size_t row = 0; row < ids.size(); ++row)
|
||||
{
|
||||
auto id = ids[row];
|
||||
|
||||
if (attribute.nullable_set->find(id) != nullptr)
|
||||
vec_null_map_to[row] = true;
|
||||
}
|
||||
|
||||
result = ColumnNullable::create(result, std::move(col_null_map_to));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void HashedDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void HashedDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
|
||||
ColumnUInt8::Ptr HashedDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
|
||||
{
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t row) { return def->getDataAt(row); });
|
||||
}
|
||||
size_t ids_count = ext::size(ids);
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void HashedDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE & def, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
auto result = ColumnUInt8::create(ext::size(ids));
|
||||
auto& out = result->getData();
|
||||
|
||||
void HashedDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
|
||||
{
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
ids,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return StringRef{def}; });
|
||||
}
|
||||
|
||||
void HashedDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
has<UInt8>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
has<UInt16>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
has<UInt32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
has<UInt64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
has<UInt128>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
has<Int8>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
has<Int16>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
has<Int32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
has<Int64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
has<Float32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
has<Float64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
has<StringRef>(attribute, ids, out);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
has<AttributeType>(attribute, ids, out);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
has<Decimal32>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
has<Decimal64>(attribute, ids, out);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
has<Decimal128>(attribute, ids, out);
|
||||
break;
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
query_count.fetch_add(ids_count, std::memory_order_relaxed);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void HashedDictionary::createAttributes()
|
||||
@ -312,7 +238,7 @@ void HashedDictionary::createAttributes()
|
||||
for (const auto & attribute : dict_struct.attributes)
|
||||
{
|
||||
attribute_index_by_name.emplace(attribute.name, attributes.size());
|
||||
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
|
||||
attributes.push_back(createAttribute(attribute, attribute.null_value));
|
||||
|
||||
if (attribute.hierarchical)
|
||||
{
|
||||
@ -429,6 +355,13 @@ void HashedDictionary::resize(Attribute & attribute, size_t added_rows)
|
||||
map_ref->resize(added_rows);
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void HashedDictionary::resize<String>(Attribute & attribute, size_t added_rows)
|
||||
{
|
||||
resize<StringRef>(attribute, added_rows);
|
||||
}
|
||||
|
||||
void HashedDictionary::resize(size_t added_rows)
|
||||
{
|
||||
if (!added_rows)
|
||||
@ -436,56 +369,14 @@ void HashedDictionary::resize(size_t added_rows)
|
||||
|
||||
for (auto & attribute : attributes)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
resize<UInt8>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
resize<UInt16>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
resize<UInt32>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
resize<UInt64>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
resize<UInt128>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
resize<Int8>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
resize<Int16>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
resize<Int32>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
resize<Int64>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
resize<Float32>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
resize<Float64>(attribute, added_rows);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
resize<AttributeType>(attribute, added_rows);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
resize<Decimal32>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
resize<Decimal64>(attribute, added_rows);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
resize<Decimal128>(attribute, added_rows);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
resize<StringRef>(attribute, added_rows);
|
||||
break;
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
}
|
||||
|
||||
@ -562,66 +453,27 @@ void HashedDictionary::addAttributeSize(const Attribute & attribute)
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void HashedDictionary::addAttributeSize<String>(const Attribute & attribute)
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
}
|
||||
|
||||
void HashedDictionary::calculateBytesAllocated()
|
||||
{
|
||||
bytes_allocated += attributes.size() * sizeof(attributes.front());
|
||||
|
||||
for (const auto & attribute : attributes)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
addAttributeSize<UInt8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
addAttributeSize<UInt16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
addAttributeSize<UInt32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
addAttributeSize<UInt64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
addAttributeSize<UInt128>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
addAttributeSize<Int8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
addAttributeSize<Int16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
addAttributeSize<Int32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
addAttributeSize<Int64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
addAttributeSize<Float32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
addAttributeSize<Float64>(attribute);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
addAttributeSize<AttributeType>(attribute);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
addAttributeSize<Decimal32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
addAttributeSize<Decimal64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
addAttributeSize<Decimal128>(attribute);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
}
|
||||
|
||||
@ -635,93 +487,66 @@ void HashedDictionary::createAttributeImpl(Attribute & attribute, const Field &
|
||||
attribute.sparse_maps = std::make_unique<SparseCollectionType<T>>();
|
||||
}
|
||||
|
||||
HashedDictionary::Attribute HashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||
template <>
|
||||
void HashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
Attribute attr{type, {}, {}, {}, {}};
|
||||
attribute.string_arena = std::make_unique<Arena>();
|
||||
const String & string = null_value.get<String>();
|
||||
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
|
||||
|
||||
switch (type)
|
||||
if (!sparse)
|
||||
attribute.maps = std::make_unique<CollectionType<StringRef>>();
|
||||
else
|
||||
attribute.sparse_maps = std::make_unique<SparseCollectionType<StringRef>>();
|
||||
}
|
||||
|
||||
HashedDictionary::Attribute HashedDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
|
||||
{
|
||||
auto nullable_set = attribute.is_nullable ? std::make_optional<NullableSet>() : std::optional<NullableSet>{};
|
||||
Attribute attr{attribute.underlying_type, std::move(nullable_set), {}, {}, {}, {}};
|
||||
|
||||
auto type_call = [&, this](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
createAttributeImpl<UInt8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
createAttributeImpl<UInt16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
createAttributeImpl<UInt32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
createAttributeImpl<UInt64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
createAttributeImpl<UInt128>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
createAttributeImpl<Int8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
createAttributeImpl<Int16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
createAttributeImpl<Int32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
createAttributeImpl<Int64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
createAttributeImpl<Float32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
createAttributeImpl<Float64>(attr, null_value);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
createAttributeImpl<AttributeType>(attr, null_value);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
createAttributeImpl<Decimal32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
createAttributeImpl<Decimal64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
createAttributeImpl<Decimal128>(attr, null_value);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
attr.null_values = null_value.get<String>();
|
||||
if (!sparse)
|
||||
attr.maps = std::make_unique<CollectionType<StringRef>>();
|
||||
else
|
||||
attr.sparse_maps = std::make_unique<SparseCollectionType<StringRef>>();
|
||||
attr.string_arena = std::make_unique<Arena>();
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
|
||||
template <typename OutputType, typename AttrType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename MapType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void HashedDictionary::getItemsAttrImpl(
|
||||
const AttrType & attr, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
const MapType & attr,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto rows = ext::size(ids);
|
||||
|
||||
for (const auto i : ext::range(0, rows))
|
||||
{
|
||||
const auto it = attr.find(ids[i]);
|
||||
set_value(i, it != attr.end() ? static_cast<OutputType>(second(*it)) : get_default(i));
|
||||
set_value(i, it != attr.end() ? static_cast<OutputType>(second(*it)) : default_value_extractor[i]);
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void HashedDictionary::getItemsImpl(
|
||||
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
if (!sparse)
|
||||
return getItemsAttrImpl<OutputType>(*std::get<CollectionPtrType<AttributeType>>(attribute.maps), ids, set_value, get_default);
|
||||
return getItemsAttrImpl<OutputType>(*std::get<SparseCollectionPtrType<AttributeType>>(attribute.sparse_maps), ids, set_value, get_default);
|
||||
return getItemsAttrImpl<AttributeType, OutputType>(*std::get<CollectionPtrType<AttributeType>>(attribute.maps), ids, set_value, default_value_extractor);
|
||||
return getItemsAttrImpl<AttributeType, OutputType>(*std::get<SparseCollectionPtrType<AttributeType>>(attribute.sparse_maps), ids, set_value, default_value_extractor);
|
||||
}
|
||||
|
||||
|
||||
@ -740,58 +565,41 @@ bool HashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HashedDictionary::setAttributeValueImpl<String>(Attribute & attribute, const Key id, const String value)
|
||||
{
|
||||
const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
|
||||
return setAttributeValueImpl<StringRef>(attribute, id, StringRef{string_in_arena, value.size()});
|
||||
}
|
||||
|
||||
bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
|
||||
{
|
||||
switch (attribute.type)
|
||||
bool result = false;
|
||||
|
||||
auto type_call = [&, this](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
return setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
return setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
return setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
return setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
return setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>());
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
return setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
return setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
return setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
return setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
return setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>());
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
return setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>());
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
return setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal32>());
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
return setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal64>());
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
return setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>());
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
if (attribute.nullable_set)
|
||||
{
|
||||
const auto & string = value.get<String>();
|
||||
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
if (!sparse)
|
||||
if (value.isNull())
|
||||
{
|
||||
auto & map = *std::get<CollectionPtrType<StringRef>>(attribute.maps);
|
||||
return map.insert({id, StringRef{string_in_arena, string.size()}}).second;
|
||||
result = attribute.nullable_set->insert(id).second;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & map = *std::get<SparseCollectionPtrType<StringRef>>(attribute.sparse_maps);
|
||||
return map.insert({id, StringRef{string_in_arena, string.size()}}).second;
|
||||
attribute.nullable_set->erase(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw Exception{"Invalid attribute type", ErrorCodes::BAD_ARGUMENTS};
|
||||
result = setAttributeValueImpl<AttributeType>(attribute, id, value.get<NearestFieldType<AttributeType>>());
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
const HashedDictionary::Attribute & HashedDictionary::getAttribute(const std::string & attribute_name) const
|
||||
@ -810,9 +618,18 @@ void HashedDictionary::has(const Attribute & attribute, const PaddedPODArray<Key
|
||||
const auto rows = ext::size(ids);
|
||||
|
||||
for (const auto i : ext::range(0, rows))
|
||||
{
|
||||
out[i] = attr.find(ids[i]) != nullptr;
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
if (attribute.nullable_set && !out[i])
|
||||
out[i] = attribute.nullable_set->find(ids[i]) != nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void HashedDictionary::has<String>(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
has<StringRef>(attribute, ids, out);
|
||||
}
|
||||
|
||||
template <typename T, typename AttrType>
|
||||
@ -833,50 +650,39 @@ PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds(const Attribute &
|
||||
return getIdsAttrImpl<T>(*std::get<SparseCollectionPtrType<T>>(attribute.sparse_maps));
|
||||
}
|
||||
|
||||
template <>
|
||||
PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds<String>(const Attribute & attribute) const
|
||||
{
|
||||
return getIds<StringRef>(attribute);
|
||||
}
|
||||
|
||||
PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds() const
|
||||
{
|
||||
const auto & attribute = attributes.front();
|
||||
PaddedPODArray<HashedDictionary::Key> result;
|
||||
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
return getIds<UInt8>(attribute);
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
return getIds<UInt16>(attribute);
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
return getIds<UInt32>(attribute);
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
return getIds<UInt64>(attribute);
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
return getIds<UInt128>(attribute);
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
return getIds<Int8>(attribute);
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
return getIds<Int16>(attribute);
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
return getIds<Int32>(attribute);
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
return getIds<Int64>(attribute);
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
return getIds<Float32>(attribute);
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
return getIds<Float64>(attribute);
|
||||
case AttributeUnderlyingType::utString:
|
||||
return getIds<StringRef>(attribute);
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
/// TODO: Check if order is satisfied
|
||||
result = getIds<AttributeType>(attribute);
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
return getIds<Decimal32>(attribute);
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
return getIds<Decimal64>(attribute);
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
return getIds<Decimal128>(attribute);
|
||||
}
|
||||
return PaddedPODArray<Key>();
|
||||
if (attribute.nullable_set)
|
||||
{
|
||||
for (const auto& value: *attribute.nullable_set)
|
||||
result.push_back(value.getKey());
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BlockInputStreamPtr HashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<HashedDictionary, Key>;
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<Key>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds(), column_names);
|
||||
}
|
||||
|
||||
|
@ -3,15 +3,18 @@
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <variant>
|
||||
#include <optional>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Core/Block.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <sparsehash/sparse_hash_map>
|
||||
#include <ext/range.h>
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
/** This dictionary stores all content in a hash table in memory
|
||||
* (a separate Key -> Value map for each attribute)
|
||||
@ -20,7 +23,6 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using BlockPtr = std::shared_ptr<Block>;
|
||||
|
||||
class HashedDictionary final : public IDictionary
|
||||
{
|
||||
@ -66,77 +68,16 @@ public:
|
||||
|
||||
void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override;
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void
|
||||
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
|
||||
const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE & def, ResultArrayType<TYPE> & out) \
|
||||
const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
|
||||
|
||||
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
void isInVectorVector(
|
||||
const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
|
||||
@ -162,9 +103,13 @@ private:
|
||||
template <typename Value>
|
||||
using SparseCollectionPtrType = std::unique_ptr<SparseCollectionType<Value>>;
|
||||
|
||||
using NullableSet = HashSet<Key, DefaultHash<Key>>;
|
||||
|
||||
struct Attribute final
|
||||
{
|
||||
AttributeUnderlyingType type;
|
||||
std::optional<NullableSet> nullable_set;
|
||||
|
||||
std::variant<
|
||||
UInt8,
|
||||
UInt16,
|
||||
@ -180,7 +125,7 @@ private:
|
||||
Decimal128,
|
||||
Float32,
|
||||
Float64,
|
||||
String>
|
||||
StringRef>
|
||||
null_values;
|
||||
std::variant<
|
||||
CollectionPtrType<UInt8>,
|
||||
@ -235,14 +180,21 @@ private:
|
||||
template <typename T>
|
||||
void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
|
||||
|
||||
template <typename OutputType, typename AttrType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename MapType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsAttrImpl(
|
||||
const AttrType & attr, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
const MapType & attr,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsImpl(
|
||||
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename T>
|
||||
bool setAttributeValueImpl(Attribute & attribute, const Key id, const T value);
|
||||
|
@ -10,6 +10,8 @@
|
||||
#include <common/StringRef.h>
|
||||
#include "IDictionarySource.h"
|
||||
#include <Dictionaries/DictionaryStructure.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <memory>
|
||||
@ -20,15 +22,31 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int TYPE_MISMATCH;
|
||||
}
|
||||
|
||||
struct IDictionaryBase;
|
||||
using DictionaryPtr = std::unique_ptr<IDictionaryBase>;
|
||||
|
||||
struct DictionaryStructure;
|
||||
class ColumnString;
|
||||
/** DictionaryKeyType provides IDictionary client information about
|
||||
* which key type is supported by dictionary.
|
||||
*
|
||||
* Simple is for dictionaries that support UInt64 key column.
|
||||
*
|
||||
* Complex is for dictionaries that support any combination of key columns.
|
||||
*
|
||||
* Range is for dictionary that support combination of UInt64 key column,
|
||||
* and numeric representable range key column.
|
||||
*/
|
||||
enum class DictionaryKeyType
|
||||
{
|
||||
simple,
|
||||
complex,
|
||||
range
|
||||
};
|
||||
|
||||
/**
|
||||
* Base class for Dictionaries implementation.
|
||||
*/
|
||||
struct IDictionaryBase : public IExternalLoadable
|
||||
{
|
||||
using Key = UInt64;
|
||||
@ -85,6 +103,33 @@ struct IDictionaryBase : public IExternalLoadable
|
||||
|
||||
virtual bool isInjective(const std::string & attribute_name) const = 0;
|
||||
|
||||
/** Subclass must provide key type that is supported by dictionary.
|
||||
* Client will use that key type to provide valid key columns for `getColumn` and `has` functions.
|
||||
*/
|
||||
virtual DictionaryKeyType getKeyType() const = 0;
|
||||
|
||||
/** Subclass must validate key columns and keys types
|
||||
* and return column representation of dictionary attribute.
|
||||
*
|
||||
* Parameter default_values_column must be used to provide default values
|
||||
* for keys that are not in dictionary. If null pointer is passed,
|
||||
* then default attribute value must be used.
|
||||
*/
|
||||
virtual ColumnPtr getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const = 0;
|
||||
|
||||
/** Subclass must validate key columns and key types and return ColumnUInt8 that
|
||||
* is bitmask representation of is key in dictionary or not.
|
||||
* If key is in dictionary then value of associated row will be 1, otherwise 0.
|
||||
*/
|
||||
virtual ColumnUInt8::Ptr hasKeys(
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types) const = 0;
|
||||
|
||||
virtual BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const = 0;
|
||||
|
||||
bool supportUpdates() const override { return true; }
|
||||
@ -115,7 +160,6 @@ protected:
|
||||
const String full_name;
|
||||
};
|
||||
|
||||
|
||||
struct IDictionary : IDictionaryBase
|
||||
{
|
||||
IDictionary(const StorageID & dict_id_) : IDictionaryBase(dict_id_) {}
|
||||
@ -124,8 +168,7 @@ struct IDictionary : IDictionaryBase
|
||||
|
||||
virtual void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const = 0;
|
||||
|
||||
virtual void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const = 0;
|
||||
|
||||
/// TODO: Rewrite
|
||||
/// Methods for hierarchy.
|
||||
|
||||
virtual void isInVectorVector(
|
||||
@ -157,14 +200,4 @@ struct IDictionary : IDictionaryBase
|
||||
}
|
||||
};
|
||||
|
||||
/// Implicit conversions in dictGet functions is disabled.
|
||||
inline void checkAttributeType(const IDictionaryBase * dictionary, const std::string & attribute_name,
|
||||
AttributeUnderlyingType attribute_type, AttributeUnderlyingType to)
|
||||
{
|
||||
if (attribute_type != to)
|
||||
throw Exception{ErrorCodes::TYPE_MISMATCH, "{}: type mismatch: attribute {} has type {}, expected {}",
|
||||
dictionary->getDictionaryID().getNameForLogs(),
|
||||
attribute_name, toString(attribute_type), toString(to)};
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <DataTypes/DataTypeFixedString.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <IO/WriteIntText.h>
|
||||
#include <Poco/ByteOrder.h>
|
||||
#include <Common/formatIPv6.h>
|
||||
@ -16,6 +17,7 @@
|
||||
#include <ext/range.h>
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -266,167 +268,75 @@ IPAddressDictionary::IPAddressDictionary(
|
||||
calculateBytesAllocated();
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void IPAddressDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
validateKeyTypes(key_types); \
|
||||
\
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
const auto null_value = std::get<TYPE>(attribute.null_values); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, \
|
||||
key_columns, \
|
||||
[&](const size_t row, const auto value) { out[row] = value; }, \
|
||||
[&](const size_t) { return null_value; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void IPAddressDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
|
||||
{
|
||||
validateKeyTypes(key_types);
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return null_value; });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void IPAddressDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
validateKeyTypes(key_types); \
|
||||
\
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, \
|
||||
key_columns, \
|
||||
[&](const size_t row, const auto value) { out[row] = value; }, \
|
||||
[&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void IPAddressDictionary::getString(
|
||||
ColumnPtr IPAddressDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
validateKeyTypes(key_types);
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t row) { return def->getDataAt(row); });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void IPAddressDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
validateKeyTypes(key_types); \
|
||||
\
|
||||
const auto & attribute = getAttribute(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void IPAddressDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const
|
||||
{
|
||||
validateKeyTypes(key_types);
|
||||
ColumnPtr result;
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return StringRef{def}; });
|
||||
auto size = key_columns.front()->size();
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto & null_value = std::get<AttributeType>(attribute.null_values);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, size);
|
||||
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
|
||||
getItemsImpl<ValueType, ValueType>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
getItemsImpl<ValueType, ValueType>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const auto value) { return out[row] = value; },
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void IPAddressDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
|
||||
|
||||
ColumnUInt8::Ptr IPAddressDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
||||
{
|
||||
validateKeyTypes(key_types);
|
||||
|
||||
const auto first_column = key_columns.front();
|
||||
const auto rows = first_column->size();
|
||||
|
||||
auto result = ColumnUInt8::create(rows);
|
||||
auto& out = result->getData();
|
||||
|
||||
if (first_column->isNumeric())
|
||||
{
|
||||
uint8_t addrv6_buf[IPV6_BINARY_LENGTH];
|
||||
@ -451,6 +361,8 @@ void IPAddressDictionary::has(const Columns & key_columns, const DataTypes & key
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void IPAddressDictionary::createAttributes()
|
||||
@ -652,6 +564,13 @@ void IPAddressDictionary::addAttributeSize(const Attribute & attribute)
|
||||
bucket_count = vec.size();
|
||||
}
|
||||
|
||||
template <>
|
||||
void IPAddressDictionary::addAttributeSize<String>(const Attribute & attribute)
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
}
|
||||
|
||||
void IPAddressDictionary::calculateBytesAllocated()
|
||||
{
|
||||
if (auto * ipv4_col = std::get_if<IPv4Container>(&ip_column))
|
||||
@ -669,64 +588,18 @@ void IPAddressDictionary::calculateBytesAllocated()
|
||||
|
||||
for (const auto & attribute : attributes)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
addAttributeSize<UInt8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
addAttributeSize<UInt16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
addAttributeSize<UInt32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
addAttributeSize<UInt64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
addAttributeSize<UInt128>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
addAttributeSize<Int8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
addAttributeSize<Int16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
addAttributeSize<Int32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
addAttributeSize<Int64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
addAttributeSize<Float32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
addAttributeSize<Float64>(attribute);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
addAttributeSize<Decimal32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
addAttributeSize<Decimal64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
addAttributeSize<Decimal128>(attribute);
|
||||
break;
|
||||
addAttributeSize<AttributeType>(attribute);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
@ -734,65 +607,27 @@ void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field
|
||||
attribute.maps.emplace<ContainerType<T>>();
|
||||
}
|
||||
|
||||
template <>
|
||||
void IPAddressDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
attribute.null_values = null_value.isNull() ? String() : null_value.get<String>();
|
||||
attribute.maps.emplace<ContainerType<StringRef>>();
|
||||
attribute.string_arena = std::make_unique<Arena>();
|
||||
}
|
||||
|
||||
IPAddressDictionary::Attribute IPAddressDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||
{
|
||||
Attribute attr{type, {}, {}, {}};
|
||||
|
||||
switch (type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
createAttributeImpl<UInt8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
createAttributeImpl<UInt16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
createAttributeImpl<UInt32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
createAttributeImpl<UInt64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
createAttributeImpl<UInt128>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
createAttributeImpl<Int8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
createAttributeImpl<Int16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
createAttributeImpl<Int32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
createAttributeImpl<Int64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
createAttributeImpl<Float32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
createAttributeImpl<Float64>(attr, null_value);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
createAttributeImpl<Decimal32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
createAttributeImpl<Decimal64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
createAttributeImpl<Decimal128>(attr, null_value);
|
||||
break;
|
||||
createAttributeImpl<AttributeType>(attr, null_value);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
|
||||
attr.null_values = null_value.isNull() ? String() : null_value.get<String>();
|
||||
attr.maps.emplace<ContainerType<StringRef>>();
|
||||
attr.string_arena = std::make_unique<Arena>();
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
@ -802,9 +637,12 @@ const uint8_t * IPAddressDictionary::getIPv6FromOffset(const IPAddressDictionary
|
||||
return reinterpret_cast<const uint8_t *>(&ipv6_col[i * IPV6_BINARY_LENGTH]);
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto first_column = key_columns.front();
|
||||
const auto rows = first_column->size();
|
||||
@ -841,7 +679,7 @@ void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
|
||||
set_value(i, static_cast<OutputType>(vec[row_idx[*found_it]]));
|
||||
}
|
||||
else
|
||||
set_value(i, get_default(i));
|
||||
set_value(i, default_value_extractor[i]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -876,13 +714,16 @@ void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
|
||||
mask_column[*found_it] == mask))
|
||||
set_value(i, static_cast<OutputType>(vec[row_idx[*found_it]]));
|
||||
else
|
||||
set_value(i, get_default(i));
|
||||
set_value(i, default_value_extractor[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void IPAddressDictionary::getItemsImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto first_column = key_columns.front();
|
||||
const auto rows = first_column->size();
|
||||
@ -891,7 +732,7 @@ void IPAddressDictionary::getItemsImpl(
|
||||
if (unlikely(key_columns.size() == 2))
|
||||
{
|
||||
getItemsByTwoKeyColumnsImpl<AttributeType, OutputType>(
|
||||
attribute, key_columns, std::forward<ValueSetter>(set_value), std::forward<DefaultGetter>(get_default));
|
||||
attribute, key_columns, std::forward<ValueSetter>(set_value), default_value_extractor);
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
return;
|
||||
}
|
||||
@ -909,7 +750,7 @@ void IPAddressDictionary::getItemsImpl(
|
||||
if (found != ipNotFound())
|
||||
set_value(i, static_cast<OutputType>(vec[*found]));
|
||||
else
|
||||
set_value(i, get_default(i));
|
||||
set_value(i, default_value_extractor[i]);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -924,7 +765,7 @@ void IPAddressDictionary::getItemsImpl(
|
||||
if (found != ipNotFound())
|
||||
set_value(i, static_cast<OutputType>(vec[*found]));
|
||||
else
|
||||
set_value(i, get_default(i));
|
||||
set_value(i, default_value_extractor[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -940,45 +781,24 @@ void IPAddressDictionary::setAttributeValueImpl(Attribute & attribute, const T v
|
||||
|
||||
void IPAddressDictionary::setAttributeValue(Attribute & attribute, const Field & value)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
return setAttributeValueImpl<UInt8>(attribute, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
return setAttributeValueImpl<UInt16>(attribute, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
return setAttributeValueImpl<UInt32>(attribute, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
return setAttributeValueImpl<UInt64>(attribute, value.get<UInt64>());
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
return setAttributeValueImpl<UInt128>(attribute, value.get<UInt128>());
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
return setAttributeValueImpl<Int8>(attribute, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
return setAttributeValueImpl<Int16>(attribute, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
return setAttributeValueImpl<Int32>(attribute, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
return setAttributeValueImpl<Int64>(attribute, value.get<Int64>());
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
return setAttributeValueImpl<Float32>(attribute, value.get<Float64>());
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
return setAttributeValueImpl<Float64>(attribute, value.get<Float64>());
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
return setAttributeValueImpl<Decimal32>(attribute, value.get<Decimal32>());
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
return setAttributeValueImpl<Decimal64>(attribute, value.get<Decimal64>());
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
return setAttributeValueImpl<Decimal128>(attribute, value.get<Decimal128>());
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
const auto & string = value.get<String>();
|
||||
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
return setAttributeValueImpl<StringRef>(attribute, StringRef{string_in_arena, string.size()});
|
||||
setAttributeValueImpl<StringRef>(attribute, StringRef{string_in_arena, string.size()});
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
setAttributeValueImpl<AttributeType>(attribute, value.get<NearestFieldType<AttributeType>>());
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
const IPAddressDictionary::Attribute & IPAddressDictionary::getAttribute(const std::string & attribute_name) const
|
||||
@ -1045,7 +865,7 @@ static auto keyViewGetter()
|
||||
|
||||
BlockInputStreamPtr IPAddressDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<IPAddressDictionary, UInt64>;
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
|
||||
|
||||
|
||||
const bool is_ipv4 = std::get_if<IPv4Container>(&ip_column) != nullptr;
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -61,91 +62,16 @@ public:
|
||||
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const;
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const;
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
@ -211,17 +137,23 @@ private:
|
||||
void calculateBytesAllocated();
|
||||
|
||||
template <typename T>
|
||||
void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
static Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsByTwoKeyColumnsImpl(
|
||||
const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void
|
||||
getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsImpl(
|
||||
const Attribute & attribute,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename T>
|
||||
void setAttributeValueImpl(Attribute & attribute, const T value);
|
||||
|
@ -5,6 +5,8 @@
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
#include <numeric>
|
||||
|
||||
@ -92,6 +94,61 @@ bool IPolygonDictionary::isInjective(const std::string &) const
|
||||
return false;
|
||||
}
|
||||
|
||||
ColumnPtr IPolygonDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
ColumnPtr result;
|
||||
|
||||
const auto index = getAttributeIndex(attribute_name);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
auto keys_size = key_columns.front()->size();
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto & null_value = std::get<AttributeType>(null_values[index]);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
auto column_string = ColumnString::create();
|
||||
auto * out = column.get();
|
||||
|
||||
getItemsImpl<String, StringRef>(
|
||||
index,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); },
|
||||
default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
getItemsImpl<AttributeType, AttributeType>(
|
||||
index,
|
||||
key_columns,
|
||||
[&](const size_t row, const auto value) { return out[row] = value; },
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BlockInputStreamPtr IPolygonDictionary::getBlockInputStream(const Names &, size_t) const
|
||||
{
|
||||
// TODO: In order for this to work one would first have to support retrieving arrays from dictionaries.
|
||||
@ -255,8 +312,12 @@ std::vector<IPolygonDictionary::Point> IPolygonDictionary::extractPoints(const C
|
||||
return result;
|
||||
}
|
||||
|
||||
void IPolygonDictionary::has(const Columns & key_columns, const DataTypes &, PaddedPODArray<UInt8> & out) const
|
||||
ColumnUInt8::Ptr IPolygonDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
|
||||
{
|
||||
auto size = key_columns.front()->size();
|
||||
auto result = ColumnUInt8::create(size);
|
||||
auto& out = result->getData();
|
||||
|
||||
size_t row = 0;
|
||||
for (const auto & pt : extractPoints(key_columns))
|
||||
{
|
||||
@ -266,6 +327,8 @@ void IPolygonDictionary::has(const Columns & key_columns, const DataTypes &, Pad
|
||||
}
|
||||
|
||||
query_count.fetch_add(row, std::memory_order_relaxed);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name) const
|
||||
@ -276,152 +339,12 @@ size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name)
|
||||
return it->second;
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void IPolygonDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto ind = getAttributeIndex(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
const auto null_value = std::get<TYPE>(null_values[ind]); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
ind, \
|
||||
key_columns, \
|
||||
[&](const size_t row, const auto value) { out[row] = value; }, \
|
||||
[&](const size_t) { return null_value; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void IPolygonDictionary::getString(
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ColumnString * out) const
|
||||
{
|
||||
const auto ind = getAttributeIndex(attribute_name);
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto & null_value = StringRef{std::get<String>(null_values[ind])};
|
||||
|
||||
getItemsImpl<String, StringRef>(
|
||||
ind,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return null_value; });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void IPolygonDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes &, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto ind = getAttributeIndex(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
ind, \
|
||||
key_columns, \
|
||||
[&](const size_t row, const auto value) { out[row] = value; }, \
|
||||
[&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void IPolygonDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const
|
||||
{
|
||||
const auto ind = getAttributeIndex(attribute_name);
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsImpl<String, StringRef>(
|
||||
ind,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t row) { return def->getDataAt(row); });
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void IPolygonDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes &, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto ind = getAttributeIndex(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
||||
\
|
||||
getItemsImpl<TYPE, TYPE>( \
|
||||
ind, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void IPolygonDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const String & def,
|
||||
ColumnString * const out) const
|
||||
{
|
||||
const auto ind = getAttributeIndex(attribute_name);
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsImpl<String, StringRef>(
|
||||
ind,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return StringRef{def}; });
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void IPolygonDictionary::getItemsImpl(
|
||||
size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||
size_t attribute_ind,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto points = extractPoints(key_columns);
|
||||
|
||||
@ -437,7 +360,7 @@ void IPolygonDictionary::getItemsImpl(
|
||||
id = ids[id];
|
||||
if (!found)
|
||||
{
|
||||
set_value(i, static_cast<OutputType>(get_default(i)));
|
||||
set_value(i, static_cast<OutputType>(default_value_extractor[i]));
|
||||
continue;
|
||||
}
|
||||
if constexpr (std::is_same<AttributeType, String>::value)
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -78,101 +79,19 @@ public:
|
||||
|
||||
bool isInjective(const std::string & attribute_name) const override;
|
||||
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
|
||||
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
|
||||
/** Functions used to retrieve attributes of specific type by key. */
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes &, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const ColumnString * const def,
|
||||
ColumnString * const out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes &, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const;
|
||||
|
||||
/** Checks whether or not a point can be found in one of the polygons in the dictionary.
|
||||
* The check is performed for multiple points represented by columns of their x and y coordinates.
|
||||
* The boolean result is written to out.
|
||||
*/
|
||||
// TODO: Refactor the whole dictionary design to perform stronger checks, i.e. make this an override.
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
/** Single coordinate type. */
|
||||
using Coord = Float32;
|
||||
/** A two-dimensional point in Euclidean coordinates. */
|
||||
@ -224,8 +143,12 @@ private:
|
||||
void appendNullValue(AttributeUnderlyingType type, const Field & value);
|
||||
|
||||
/** Helper function for retrieving the value of an attribute by key. */
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void getItemsImpl(size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsImpl(
|
||||
size_t attribute_ind,
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
/** A mapping from the names of the attributes to their index in the two vectors defined below. */
|
||||
std::map<std::string, size_t> attribute_index_by_name;
|
||||
|
@ -37,26 +37,6 @@ protected:
|
||||
Block getBlock(size_t start, size_t length) const override;
|
||||
|
||||
private:
|
||||
template <typename Type>
|
||||
using DictionaryGetter = void (DictionaryType::*)(
|
||||
const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, PaddedPODArray<Type> &) const;
|
||||
|
||||
template <typename Type>
|
||||
using DictionaryDecimalGetter = void (DictionaryType::*)(
|
||||
const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, DecimalPaddedPODArray<Type> &) const;
|
||||
|
||||
template <typename AttributeType, typename Getter>
|
||||
ColumnPtr getColumnFromAttribute(
|
||||
Getter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const PaddedPODArray<Int64> & dates,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & concrete_dictionary) const;
|
||||
ColumnPtr getColumnFromAttributeString(
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const PaddedPODArray<Int64> & dates,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & concrete_dictionary) const;
|
||||
template <typename T>
|
||||
ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array) const;
|
||||
|
||||
@ -122,41 +102,6 @@ Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getBlock(
|
||||
return fillBlock(block_ids, block_start_dates, block_end_dates);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename RangeType, typename Key>
|
||||
template <typename AttributeType, typename Getter>
|
||||
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttribute(
|
||||
Getter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const PaddedPODArray<Int64> & dates,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & concrete_dictionary) const
|
||||
{
|
||||
if constexpr (IsDecimalNumber<AttributeType>)
|
||||
{
|
||||
auto column = ColumnDecimal<AttributeType>::create(ids_to_fill.size(), 0); /// NOTE: There's wrong scale here, but it's unused.
|
||||
(concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column->getData());
|
||||
return column;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto column_vector = ColumnVector<AttributeType>::create(ids_to_fill.size());
|
||||
(concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column_vector->getData());
|
||||
return column_vector;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename RangeType, typename Key>
|
||||
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttributeString(
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const PaddedPODArray<Int64> & dates,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & concrete_dictionary) const
|
||||
{
|
||||
auto column_string = ColumnString::create();
|
||||
concrete_dictionary.getString(attribute.name, ids_to_fill, dates, column_string.get());
|
||||
return column_string;
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename RangeType, typename Key>
|
||||
template <typename T>
|
||||
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromPODArray(const PaddedPODArray<T> & array) const
|
||||
@ -168,7 +113,6 @@ ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getCo
|
||||
return column_vector;
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename RangeType, typename Key>
|
||||
template <typename DictionarySpecialAttributeType, typename T>
|
||||
void RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::addSpecialColumn(
|
||||
@ -216,68 +160,24 @@ Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::fillBlock
|
||||
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
|
||||
|
||||
addSpecialColumn(structure.id, std::make_shared<DataTypeUInt64>(), "ID", names, ids_to_fill, columns);
|
||||
auto ids_column = columns.back().column;
|
||||
addSpecialColumn(structure.range_min, structure.range_max->type, "Range Start", names, block_start_dates, columns);
|
||||
addSpecialColumn(structure.range_max, structure.range_max->type, "Range End", names, block_end_dates, columns);
|
||||
|
||||
auto date_key = makeDateKey(block_start_dates, block_end_dates);
|
||||
auto date_column = getColumnFromPODArray(date_key);
|
||||
|
||||
for (const auto idx : ext::range(0, structure.attributes.size()))
|
||||
{
|
||||
const DictionaryAttribute & attribute = structure.attributes[idx];
|
||||
if (names.find(attribute.name) != names.end())
|
||||
{
|
||||
ColumnPtr column;
|
||||
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
|
||||
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids_to_fill, date_key, attribute, *dictionary)
|
||||
switch (attribute.underlying_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt128);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int8);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int16);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Float32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Float64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Decimal32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Decimal64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Decimal128);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
column = getColumnFromAttributeString(ids_to_fill, date_key, attribute, *dictionary);
|
||||
break;
|
||||
}
|
||||
#undef GET_COLUMN_FORM_ATTRIBUTE
|
||||
ColumnPtr column = dictionary->getColumn(
|
||||
attribute.name,
|
||||
attribute.type,
|
||||
{ids_column, date_column},
|
||||
{std::make_shared<DataTypeUInt64>(), std::make_shared<DataTypeInt64>()},
|
||||
nullptr);
|
||||
columns.emplace_back(column, attribute.type, attribute.name);
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,8 @@
|
||||
#include <ext/range.h>
|
||||
#include "DictionaryFactory.h"
|
||||
#include "RangeDictionaryBlockInputStream.h"
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
@ -50,6 +52,7 @@ namespace ErrorCodes
|
||||
extern const int DICTIONARY_IS_EMPTY;
|
||||
extern const int TYPE_MISMATCH;
|
||||
extern const int UNSUPPORTED_METHOD;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
bool RangeHashedDictionary::Range::isCorrectDate(const RangeStorageType & date)
|
||||
@ -85,66 +88,101 @@ RangeHashedDictionary::RangeHashedDictionary(
|
||||
calculateBytesAllocated();
|
||||
}
|
||||
|
||||
|
||||
#define DECLARE_MULTIPLE_GETTER(TYPE) \
|
||||
void RangeHashedDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<RangeStorageType> & dates, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItems<TYPE>(attribute, ids, dates, out); \
|
||||
}
|
||||
DECLARE_MULTIPLE_GETTER(UInt8)
|
||||
DECLARE_MULTIPLE_GETTER(UInt16)
|
||||
DECLARE_MULTIPLE_GETTER(UInt32)
|
||||
DECLARE_MULTIPLE_GETTER(UInt64)
|
||||
DECLARE_MULTIPLE_GETTER(UInt128)
|
||||
DECLARE_MULTIPLE_GETTER(Int8)
|
||||
DECLARE_MULTIPLE_GETTER(Int16)
|
||||
DECLARE_MULTIPLE_GETTER(Int32)
|
||||
DECLARE_MULTIPLE_GETTER(Int64)
|
||||
DECLARE_MULTIPLE_GETTER(Float32)
|
||||
DECLARE_MULTIPLE_GETTER(Float64)
|
||||
DECLARE_MULTIPLE_GETTER(Decimal32)
|
||||
DECLARE_MULTIPLE_GETTER(Decimal64)
|
||||
DECLARE_MULTIPLE_GETTER(Decimal128)
|
||||
#undef DECLARE_MULTIPLE_GETTER
|
||||
|
||||
void RangeHashedDictionary::getString(
|
||||
ColumnPtr RangeHashedDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
ColumnString * out) const
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::utString);
|
||||
const auto & attr = *std::get<Ptr<StringRef>>(attribute.maps);
|
||||
const auto & null_value = std::get<String>(attribute.null_values);
|
||||
ColumnPtr result;
|
||||
|
||||
for (const auto i : ext::range(0, ids.size()))
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
auto keys_size = key_columns.front()->size();
|
||||
|
||||
/// Cast second column to storage type
|
||||
Columns modified_key_columns = key_columns;
|
||||
|
||||
auto range_storage_column = key_columns[1];
|
||||
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types[1], ""};
|
||||
|
||||
auto range_column_storage_type = std::make_shared<DataTypeInt64>();
|
||||
modified_key_columns[1] = castColumnAccurate(column_to_cast, range_column_storage_type);
|
||||
|
||||
ColumnUInt8::MutablePtr col_null_map_to;
|
||||
ColumnUInt8::Container * vec_null_map_to = nullptr;
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
const auto * it = attr.find(ids[i]);
|
||||
if (it)
|
||||
{
|
||||
const auto date = dates[i];
|
||||
const auto & ranges_and_values = it->getMapped();
|
||||
const auto val_it
|
||||
= std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<StringRef> & v)
|
||||
{
|
||||
return v.range.contains(date);
|
||||
});
|
||||
col_null_map_to = ColumnUInt8::create(keys_size, false);
|
||||
vec_null_map_to = &col_null_map_to->getData();
|
||||
}
|
||||
|
||||
const auto string_ref = val_it != std::end(ranges_and_values) ? val_it->value : StringRef{null_value};
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
|
||||
AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
|
||||
getItemsImpl<ValueType, ValueType>(
|
||||
attribute,
|
||||
modified_key_columns,
|
||||
[&](const size_t row, const StringRef value, bool is_null)
|
||||
{
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
out->insertData(value.data, value.size);
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
else
|
||||
out->insertData(null_value.data(), null_value.size());
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
getItemsImpl<ValueType, ValueType>(
|
||||
attribute,
|
||||
modified_key_columns,
|
||||
[&](const size_t row, const auto value, bool is_null)
|
||||
{
|
||||
if (attribute.is_nullable)
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
|
||||
out[row] = value;
|
||||
},
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
if (attribute.is_nullable)
|
||||
{
|
||||
result = ColumnNullable::create(result, std::move(col_null_map_to));
|
||||
}
|
||||
|
||||
query_count.fetch_add(ids.size(), std::memory_order_relaxed);
|
||||
return result;
|
||||
}
|
||||
|
||||
ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Columns &, const DataTypes &) const
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
|
||||
"Has not supported", getDictionaryID().getNameForLogs());
|
||||
}
|
||||
|
||||
void RangeHashedDictionary::createAttributes()
|
||||
{
|
||||
@ -154,7 +192,7 @@ void RangeHashedDictionary::createAttributes()
|
||||
for (const auto & attribute : dict_struct.attributes)
|
||||
{
|
||||
attribute_index_by_name.emplace(attribute.name, attributes.size());
|
||||
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
|
||||
attributes.push_back(createAttribute(attribute, attribute.null_value));
|
||||
|
||||
if (attribute.hierarchical)
|
||||
throw Exception{ErrorCodes::BAD_ARGUMENTS, "Hierarchical attributes not supported by {} dictionary.",
|
||||
@ -220,66 +258,27 @@ void RangeHashedDictionary::addAttributeSize(const Attribute & attribute)
|
||||
bucket_count = map_ref->getBufferSizeInCells();
|
||||
}
|
||||
|
||||
template <>
|
||||
void RangeHashedDictionary::addAttributeSize<String>(const Attribute & attribute)
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
}
|
||||
|
||||
void RangeHashedDictionary::calculateBytesAllocated()
|
||||
{
|
||||
bytes_allocated += attributes.size() * sizeof(attributes.front());
|
||||
|
||||
for (const auto & attribute : attributes)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
addAttributeSize<UInt8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
addAttributeSize<UInt16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
addAttributeSize<UInt32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
addAttributeSize<UInt64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
addAttributeSize<UInt128>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
addAttributeSize<Int8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
addAttributeSize<Int16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
addAttributeSize<Int32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
addAttributeSize<Int64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
addAttributeSize<Float32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
addAttributeSize<Float64>(attribute);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
addAttributeSize<AttributeType>(attribute);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
addAttributeSize<Decimal32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
addAttributeSize<Decimal64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
addAttributeSize<Decimal128>(attribute);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
}
|
||||
|
||||
@ -290,125 +289,80 @@ void RangeHashedDictionary::createAttributeImpl(Attribute & attribute, const Fie
|
||||
attribute.maps = std::make_unique<Collection<T>>();
|
||||
}
|
||||
|
||||
RangeHashedDictionary::Attribute
|
||||
RangeHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||
template <>
|
||||
void RangeHashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
Attribute attr{type, {}, {}, {}};
|
||||
attribute.string_arena = std::make_unique<Arena>();
|
||||
const String & string = null_value.get<String>();
|
||||
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
|
||||
attribute.maps = std::make_unique<Collection<StringRef>>();
|
||||
}
|
||||
|
||||
switch (type)
|
||||
RangeHashedDictionary::Attribute
|
||||
RangeHashedDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
|
||||
{
|
||||
Attribute attr{attribute.underlying_type, attribute.is_nullable, {}, {}, {}};
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
createAttributeImpl<UInt8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
createAttributeImpl<UInt16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
createAttributeImpl<UInt32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
createAttributeImpl<UInt64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
createAttributeImpl<UInt128>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
createAttributeImpl<Int8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
createAttributeImpl<Int16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
createAttributeImpl<Int32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
createAttributeImpl<Int64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
createAttributeImpl<Float32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
createAttributeImpl<Float64>(attr, null_value);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
createAttributeImpl<AttributeType>(attr, null_value);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
createAttributeImpl<Decimal32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
createAttributeImpl<Decimal64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
createAttributeImpl<Decimal128>(attr, null_value);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
attr.null_values = null_value.get<String>();
|
||||
attr.maps = std::make_unique<Collection<StringRef>>();
|
||||
attr.string_arena = std::make_unique<Arena>();
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.underlying_type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
|
||||
template <typename OutputType>
|
||||
void RangeHashedDictionary::getItems(
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
PaddedPODArray<OutputType> & out) const
|
||||
{
|
||||
if (false) {} // NOLINT
|
||||
#define DISPATCH(TYPE) else if (attribute.type == AttributeUnderlyingType::ut##TYPE) getItemsImpl<TYPE, OutputType>(attribute, ids, dates, out);
|
||||
DISPATCH(UInt8)
|
||||
DISPATCH(UInt16)
|
||||
DISPATCH(UInt32)
|
||||
DISPATCH(UInt64)
|
||||
DISPATCH(UInt128)
|
||||
DISPATCH(Int8)
|
||||
DISPATCH(Int16)
|
||||
DISPATCH(Int32)
|
||||
DISPATCH(Int64)
|
||||
DISPATCH(Float32)
|
||||
DISPATCH(Float64)
|
||||
DISPATCH(Decimal32)
|
||||
DISPATCH(Decimal64)
|
||||
DISPATCH(Decimal128)
|
||||
#undef DISPATCH
|
||||
else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void RangeHashedDictionary::getItemsImpl(
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
PaddedPODArray<OutputType> & out) const
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
|
||||
const auto null_value = std::get<AttributeType>(attribute.null_values);
|
||||
PaddedPODArray<Key> key_backup_storage;
|
||||
PaddedPODArray<RangeStorageType> range_backup_storage;
|
||||
|
||||
for (const auto i : ext::range(0, ids.size()))
|
||||
const PaddedPODArray<Key> & ids = getColumnVectorData(this, key_columns[0], key_backup_storage);
|
||||
const PaddedPODArray<RangeStorageType> & dates = getColumnVectorData(this, key_columns[1], range_backup_storage);
|
||||
|
||||
const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
|
||||
|
||||
for (const auto row : ext::range(0, ids.size()))
|
||||
{
|
||||
const auto it = attr.find(ids[i]);
|
||||
const auto it = attr.find(ids[row]);
|
||||
if (it)
|
||||
{
|
||||
const auto date = dates[i];
|
||||
const auto date = dates[row];
|
||||
const auto & ranges_and_values = it->getMapped();
|
||||
const auto val_it
|
||||
= std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<AttributeType> & v)
|
||||
{
|
||||
return v.range.contains(date);
|
||||
});
|
||||
const auto val_it = std::find_if(
|
||||
std::begin(ranges_and_values),
|
||||
std::end(ranges_and_values),
|
||||
[date](const Value<AttributeType> & v)
|
||||
{
|
||||
return v.range.contains(date);
|
||||
});
|
||||
|
||||
out[i] = static_cast<OutputType>(val_it != std::end(ranges_and_values) ? val_it->value : null_value); // NOLINT
|
||||
if (val_it != std::end(ranges_and_values))
|
||||
{
|
||||
auto& value = val_it->value;
|
||||
|
||||
if (value)
|
||||
set_value(row, static_cast<OutputType>(*value), false); // NOLINT
|
||||
else
|
||||
set_value(row, default_value_extractor[row], true);
|
||||
}
|
||||
else
|
||||
{
|
||||
set_value(row, default_value_extractor[row], false);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
out[i] = static_cast<OutputType>(null_value); // NOLINT
|
||||
set_value(row, default_value_extractor[row], false);
|
||||
}
|
||||
}
|
||||
|
||||
@ -417,9 +371,32 @@ void RangeHashedDictionary::getItemsImpl(
|
||||
|
||||
|
||||
template <typename T>
|
||||
void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const T value)
|
||||
void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const Field & value)
|
||||
{
|
||||
auto & map = *std::get<Ptr<T>>(attribute.maps);
|
||||
using ValueType = std::conditional_t<std::is_same_v<T, String>, StringRef, T>;
|
||||
auto & map = *std::get<Ptr<ValueType>>(attribute.maps);
|
||||
|
||||
Value<ValueType> value_to_insert;
|
||||
|
||||
if (attribute.is_nullable && value.isNull())
|
||||
{
|
||||
value_to_insert = { range, {} };
|
||||
}
|
||||
else
|
||||
{
|
||||
if constexpr (std::is_same_v<T, String>)
|
||||
{
|
||||
const auto & string = value.get<String>();
|
||||
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
const StringRef string_ref{string_in_arena, string.size()};
|
||||
value_to_insert = Value<ValueType>{ range, { string_ref }};
|
||||
}
|
||||
else
|
||||
{
|
||||
value_to_insert = Value<ValueType>{ range, { value.get<NearestFieldType<ValueType>>() }};
|
||||
}
|
||||
}
|
||||
|
||||
const auto it = map.find(id);
|
||||
|
||||
if (it)
|
||||
@ -427,92 +404,28 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const K
|
||||
auto & values = it->getMapped();
|
||||
|
||||
const auto insert_it
|
||||
= std::lower_bound(std::begin(values), std::end(values), range, [](const Value<T> & lhs, const Range & rhs_range)
|
||||
= std::lower_bound(std::begin(values), std::end(values), range, [](const Value<ValueType> & lhs, const Range & rhs_range)
|
||||
{
|
||||
return lhs.range < rhs_range;
|
||||
});
|
||||
|
||||
values.insert(insert_it, Value<T>{range, value});
|
||||
values.insert(insert_it, std::move(value_to_insert));
|
||||
}
|
||||
else
|
||||
map.insert({id, Values<T>{Value<T>{range, value}}});
|
||||
map.insert({id, Values<ValueType>{std::move(value_to_insert)}});
|
||||
}
|
||||
|
||||
void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
setAttributeValueImpl<UInt8>(attribute, id, range, value.get<UInt64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
setAttributeValueImpl<UInt16>(attribute, id, range, value.get<UInt64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
setAttributeValueImpl<UInt32>(attribute, id, range, value.get<UInt64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
setAttributeValueImpl<UInt64>(attribute, id, range, value.get<UInt64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
setAttributeValueImpl<UInt128>(attribute, id, range, value.get<UInt128>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
setAttributeValueImpl<Int8>(attribute, id, range, value.get<Int64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
setAttributeValueImpl<Int16>(attribute, id, range, value.get<Int64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
setAttributeValueImpl<Int32>(attribute, id, range, value.get<Int64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
setAttributeValueImpl<Int64>(attribute, id, range, value.get<Int64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
setAttributeValueImpl<Float32>(attribute, id, range, value.get<Float64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
setAttributeValueImpl<Float64>(attribute, id, range, value.get<Float64>());
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
setAttributeValueImpl<Decimal32>(attribute, id, range, value.get<Decimal32>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
setAttributeValueImpl<Decimal64>(attribute, id, range, value.get<Decimal64>());
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
setAttributeValueImpl<Decimal128>(attribute, id, range, value.get<Decimal128>());
|
||||
break;
|
||||
setAttributeValueImpl<AttributeType>(attribute, id, range, value);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
auto & map = *std::get<Ptr<StringRef>>(attribute.maps);
|
||||
const auto & string = value.get<String>();
|
||||
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
const StringRef string_ref{string_in_arena, string.size()};
|
||||
|
||||
auto * it = map.find(id);
|
||||
|
||||
if (it)
|
||||
{
|
||||
auto & values = it->getMapped();
|
||||
|
||||
const auto insert_it = std::lower_bound(
|
||||
std::begin(values), std::end(values), range, [](const Value<StringRef> & lhs, const Range & rhs_range)
|
||||
{
|
||||
return lhs.range < rhs_range;
|
||||
});
|
||||
|
||||
values.insert(insert_it, Value<StringRef>{range, string_ref});
|
||||
}
|
||||
else
|
||||
map.insert({id, Values<StringRef>{Value<StringRef>{range, string_ref}}});
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
const RangeHashedDictionary::Attribute & RangeHashedDictionary::getAttribute(const std::string & attribute_name) const
|
||||
@ -541,55 +454,18 @@ void RangeHashedDictionary::getIdsAndDates(
|
||||
{
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
getIdsAndDates<UInt8>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
getIdsAndDates<UInt16>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
getIdsAndDates<UInt32>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
getIdsAndDates<UInt64>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
getIdsAndDates<UInt128>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
getIdsAndDates<Int8>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
getIdsAndDates<Int16>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
getIdsAndDates<Int32>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
getIdsAndDates<Int64>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
getIdsAndDates<Float32>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
getIdsAndDates<Float64>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
getIdsAndDates<StringRef>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
getIdsAndDates<Decimal32>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
getIdsAndDates<Decimal64>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
getIdsAndDates<Decimal128>(attribute, ids, start_dates, end_dates);
|
||||
break;
|
||||
}
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
getIdsAndDates<StringRef>(attribute, ids, start_dates, end_dates);
|
||||
else
|
||||
getIdsAndDates<AttributeType>(attribute, ids, start_dates, end_dates);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
template <typename T, typename RangeType>
|
||||
|
@ -1,16 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <variant>
|
||||
#include <optional>
|
||||
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -52,38 +54,18 @@ public:
|
||||
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
|
||||
}
|
||||
|
||||
typedef Int64 RangeStorageType;
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::range; }
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
#define DECLARE_MULTIPLE_GETTER(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<RangeStorageType> & dates, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE_MULTIPLE_GETTER(UInt8)
|
||||
DECLARE_MULTIPLE_GETTER(UInt16)
|
||||
DECLARE_MULTIPLE_GETTER(UInt32)
|
||||
DECLARE_MULTIPLE_GETTER(UInt64)
|
||||
DECLARE_MULTIPLE_GETTER(UInt128)
|
||||
DECLARE_MULTIPLE_GETTER(Int8)
|
||||
DECLARE_MULTIPLE_GETTER(Int16)
|
||||
DECLARE_MULTIPLE_GETTER(Int32)
|
||||
DECLARE_MULTIPLE_GETTER(Int64)
|
||||
DECLARE_MULTIPLE_GETTER(Float32)
|
||||
DECLARE_MULTIPLE_GETTER(Float64)
|
||||
DECLARE_MULTIPLE_GETTER(Decimal32)
|
||||
DECLARE_MULTIPLE_GETTER(Decimal64)
|
||||
DECLARE_MULTIPLE_GETTER(Decimal128)
|
||||
#undef DECLARE_MULTIPLE_GETTER
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
ColumnString * out) const;
|
||||
using RangeStorageType = Int64;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
@ -101,7 +83,7 @@ private:
|
||||
struct Value final
|
||||
{
|
||||
Range range;
|
||||
T value;
|
||||
std::optional<T> value;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
@ -111,10 +93,14 @@ private:
|
||||
template <typename T>
|
||||
using Ptr = std::unique_ptr<Collection<T>>;
|
||||
|
||||
using NullableSet = HashSet<Key, DefaultHash<Key>>;
|
||||
|
||||
struct Attribute final
|
||||
{
|
||||
public:
|
||||
AttributeUnderlyingType type;
|
||||
bool is_nullable;
|
||||
|
||||
std::variant<
|
||||
UInt8,
|
||||
UInt16,
|
||||
@ -130,7 +116,7 @@ private:
|
||||
Decimal128,
|
||||
Float32,
|
||||
Float64,
|
||||
String>
|
||||
StringRef>
|
||||
null_values;
|
||||
std::variant<
|
||||
Ptr<UInt8>,
|
||||
@ -162,30 +148,21 @@ private:
|
||||
void calculateBytesAllocated();
|
||||
|
||||
template <typename T>
|
||||
void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
static void createAttributeImpl(Attribute & attribute, const Field & null_value);
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
|
||||
|
||||
|
||||
template <typename OutputType>
|
||||
void getItems(
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
PaddedPODArray<OutputType> & out) const;
|
||||
|
||||
template <typename AttributeType, typename OutputType>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsImpl(
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
PaddedPODArray<OutputType> & out) const;
|
||||
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename T>
|
||||
void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const T value);
|
||||
static void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const Field & value);
|
||||
|
||||
void setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value);
|
||||
static void setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value);
|
||||
|
||||
const Attribute & getAttribute(const std::string & attribute_name) const;
|
||||
|
||||
|
@ -22,7 +22,8 @@
|
||||
#include <filesystem>
|
||||
#include <city.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -445,7 +446,7 @@ void SSDCachePartition::flush()
|
||||
|
||||
template <typename Out, typename GetDefault>
|
||||
void SSDCachePartition::getValue(const size_t attribute_index, const PaddedPODArray<UInt64> & ids,
|
||||
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
|
||||
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & default_value_extractor,
|
||||
std::chrono::system_clock::time_point now) const
|
||||
{
|
||||
auto set_value = [&](const size_t index, ReadBuffer & buf)
|
||||
@ -456,7 +457,7 @@ void SSDCachePartition::getValue(const size_t attribute_index, const PaddedPODAr
|
||||
if (metadata.expiresAt() > now)
|
||||
{
|
||||
if (metadata.isDefault())
|
||||
out[index] = get_default(index);
|
||||
out[index] = default_value_extractor[index];
|
||||
else
|
||||
{
|
||||
ignoreFromBufferToAttributeIndex(attribute_index, buf);
|
||||
@ -939,14 +940,14 @@ SSDCacheStorage::~SSDCacheStorage()
|
||||
template <typename Out, typename GetDefault>
|
||||
void SSDCacheStorage::getValue(const size_t attribute_index, const PaddedPODArray<UInt64> & ids,
|
||||
ResultArrayType<Out> & out, std::unordered_map<Key, std::vector<size_t>> & not_found,
|
||||
GetDefault & get_default, std::chrono::system_clock::time_point now) const
|
||||
GetDefault & default_value_extractor, std::chrono::system_clock::time_point now) const
|
||||
{
|
||||
std::vector<bool> found(ids.size(), false);
|
||||
|
||||
{
|
||||
std::shared_lock lock(rw_lock);
|
||||
for (const auto & partition : partitions)
|
||||
partition->getValue<Out>(attribute_index, ids, out, found, get_default, now);
|
||||
partition->getValue<Out>(attribute_index, ids, out, found, default_value_extractor, now);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < ids.size(); ++i)
|
||||
@ -1327,102 +1328,62 @@ SSDCacheDictionary::SSDCacheDictionary(
|
||||
createAttributes();
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void SSDCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto index = getAttributeIndex(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
||||
const auto null_value = std::get<TYPE>(null_values[index]); /* NOLINT */ \
|
||||
getItemsNumberImpl<TYPE, TYPE>(index, ids, out, [&](const size_t) { return null_value; }); /* NOLINT */ \
|
||||
}
|
||||
ColumnPtr SSDCacheDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
ColumnPtr result;
|
||||
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
auto keys_size = ids.size();
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void SSDCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto index = getAttributeIndex(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>( \
|
||||
index, \
|
||||
ids, \
|
||||
out, \
|
||||
[&](const size_t row) { return def[row]; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
const auto index = getAttributeIndex(attribute_name);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void SSDCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto index = getAttributeIndex(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>( \
|
||||
index, \
|
||||
ids, \
|
||||
out, \
|
||||
[&](const size_t) { return def; }); \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto & null_value = std::get<AttributeType>(null_values[index]);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
getItemsStringImpl(index, ids, column.get(), default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
getItemsNumberImpl<AttributeType, AttributeType>(index, ids, out, default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultGetter>
|
||||
void SSDCacheDictionary::getItemsNumberImpl(
|
||||
const size_t attribute_index, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
|
||||
const size_t attribute_index,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ResultArrayType<OutputType> & out,
|
||||
DefaultGetter & default_value_extractor) const
|
||||
{
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
std::unordered_map<Key, std::vector<size_t>> not_found_ids;
|
||||
storage.getValue<OutputType>(attribute_index, ids, out, not_found_ids, get_default, now);
|
||||
storage.getValue<OutputType>(attribute_index, ids, out, not_found_ids, default_value_extractor, now);
|
||||
if (not_found_ids.empty())
|
||||
return;
|
||||
|
||||
@ -1440,42 +1401,17 @@ void SSDCacheDictionary::getItemsNumberImpl(
|
||||
[&](const size_t id)
|
||||
{
|
||||
for (const size_t row : not_found_ids[id])
|
||||
out[row] = get_default(row);
|
||||
out[row] = default_value_extractor[row];
|
||||
},
|
||||
getLifetime());
|
||||
}
|
||||
|
||||
void SSDCacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
|
||||
{
|
||||
const auto index = getAttributeIndex(attribute_name);
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto null_value = StringRef{std::get<String>(null_values[index])};
|
||||
|
||||
getItemsStringImpl(index, ids, out, [&](const size_t) { return null_value; });
|
||||
}
|
||||
|
||||
void SSDCacheDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
|
||||
{
|
||||
const auto index = getAttributeIndex(attribute_name);
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsStringImpl(index, ids, out, [&](const size_t row) { return def->getDataAt(row); });
|
||||
}
|
||||
|
||||
void SSDCacheDictionary::getString(
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
|
||||
{
|
||||
const auto index = getAttributeIndex(attribute_name);
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsStringImpl(index, ids, out, [&](const size_t) { return StringRef{def}; });
|
||||
}
|
||||
|
||||
template <typename DefaultGetter>
|
||||
void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const PaddedPODArray<Key> & ids,
|
||||
ColumnString * out, DefaultGetter && get_default) const
|
||||
void SSDCacheDictionary::getItemsStringImpl(
|
||||
const size_t attribute_index,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ColumnString * out,
|
||||
DefaultGetter & default_value_extractor) const
|
||||
{
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
@ -1494,7 +1430,7 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const
|
||||
{
|
||||
if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
|
||||
{
|
||||
auto to_insert = get_default(row);
|
||||
auto to_insert = default_value_extractor[row];
|
||||
out->insertData(to_insert.data, to_insert.size);
|
||||
++default_index;
|
||||
}
|
||||
@ -1525,7 +1461,7 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const
|
||||
const auto & id = ids[row];
|
||||
if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
|
||||
{
|
||||
auto to_insert = get_default(row);
|
||||
auto to_insert = default_value_extractor[row];
|
||||
out->insertData(to_insert.data, to_insert.size);
|
||||
++default_index;
|
||||
}
|
||||
@ -1539,20 +1475,30 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const
|
||||
}
|
||||
else
|
||||
{
|
||||
auto to_insert = get_default(row);
|
||||
auto to_insert = default_value_extractor[row];
|
||||
out->insertData(to_insert.data, to_insert.size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SSDCacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
|
||||
ColumnUInt8::Ptr SSDCacheDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
|
||||
{
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
|
||||
|
||||
auto result = ColumnUInt8::create(ext::size(ids));
|
||||
auto& out = result->getData();
|
||||
|
||||
const auto rows = ext::size(ids);
|
||||
for (const auto row : ext::range(0, rows))
|
||||
out[row] = false;
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
std::unordered_map<Key, std::vector<size_t>> not_found_ids;
|
||||
storage.has(ids, out, not_found_ids, now);
|
||||
if (not_found_ids.empty())
|
||||
return;
|
||||
return result;
|
||||
|
||||
std::vector<Key> required_ids(not_found_ids.size());
|
||||
std::transform(std::begin(not_found_ids), std::end(not_found_ids), std::begin(required_ids), [](const auto & pair) { return pair.first; });
|
||||
@ -1571,11 +1517,13 @@ void SSDCacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UIn
|
||||
out[row] = false;
|
||||
},
|
||||
getLifetime());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BlockInputStreamPtr SSDCacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<SSDCacheDictionary, Key>;
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<Key>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, storage.getCachedIds(), column_names);
|
||||
}
|
||||
|
||||
|
@ -2,11 +2,15 @@
|
||||
|
||||
#if defined(__linux__) || defined(__FreeBSD__)
|
||||
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <list>
|
||||
#include <shared_mutex>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include <Poco/Logger.h>
|
||||
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/ArenaWithFreeLists.h>
|
||||
@ -16,12 +20,11 @@
|
||||
#include <Core/Block.h>
|
||||
#include <Dictionaries/BucketCache.h>
|
||||
#include <IO/HashingWriteBuffer.h>
|
||||
#include <list>
|
||||
#include <pcg_random.hpp>
|
||||
#include <Poco/Logger.h>
|
||||
#include <shared_mutex>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -109,7 +112,7 @@ public:
|
||||
|
||||
template <typename Out, typename GetDefault>
|
||||
void getValue(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
|
||||
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
|
||||
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & default_value_extractor,
|
||||
std::chrono::system_clock::time_point now) const;
|
||||
|
||||
void getString(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
|
||||
@ -232,7 +235,7 @@ public:
|
||||
template <typename Out, typename GetDefault>
|
||||
void getValue(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
|
||||
ResultArrayType<Out> & out, std::unordered_map<Key, std::vector<size_t>> & not_found,
|
||||
GetDefault & get_default, std::chrono::system_clock::time_point now) const;
|
||||
GetDefault & default_value_extractor, std::chrono::system_clock::time_point now) const;
|
||||
|
||||
void getString(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
|
||||
StringRefs & refs, ArenaWithFreeLists & arena, std::unordered_map<Key, std::vector<size_t>> & not_found,
|
||||
@ -351,77 +354,20 @@ public:
|
||||
|
||||
std::exception_ptr getLastException() const override { return storage.getLastException(); }
|
||||
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
|
||||
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = SSDCacheStorage::ResultArrayType<T>;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void
|
||||
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * def, ColumnString * out)
|
||||
const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * out) const;
|
||||
|
||||
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
private:
|
||||
@ -434,11 +380,17 @@ private:
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultGetter>
|
||||
void getItemsNumberImpl(
|
||||
size_t attribute_index, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
|
||||
size_t attribute_index,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ResultArrayType<OutputType> & out,
|
||||
DefaultGetter & default_value_extractor) const;
|
||||
|
||||
template <typename DefaultGetter>
|
||||
void getItemsStringImpl(size_t attribute_index, const PaddedPODArray<Key> & ids,
|
||||
ColumnString * out, DefaultGetter && get_default) const;
|
||||
void getItemsStringImpl(
|
||||
size_t attribute_index,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
ColumnString * out,
|
||||
DefaultGetter & default_value_extractor) const;
|
||||
|
||||
const std::string name;
|
||||
const DictionaryStructure dict_struct;
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <Common/ProfilingScopedRWLock.h>
|
||||
#include <Common/MemorySanitizer.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include "DictionaryBlockInputStream.h"
|
||||
#include "DictionaryFactory.h"
|
||||
#include <IO/AIO.h>
|
||||
@ -23,7 +24,7 @@
|
||||
#include <filesystem>
|
||||
#include <city.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -461,8 +462,12 @@ void SSDComplexKeyCachePartition::flush()
|
||||
|
||||
template <typename Out, typename GetDefault>
|
||||
void SSDComplexKeyCachePartition::getValue(
|
||||
const size_t attribute_index, const Columns & key_columns, const DataTypes & key_types,
|
||||
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
|
||||
const size_t attribute_index,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
ResultArrayType<Out> & out,
|
||||
std::vector<bool> & found,
|
||||
GetDefault & default_value_extractor,
|
||||
std::chrono::system_clock::time_point now) const
|
||||
{
|
||||
auto set_value = [&](const size_t index, ReadBuffer & buf)
|
||||
@ -474,7 +479,7 @@ void SSDComplexKeyCachePartition::getValue(
|
||||
if (metadata.expiresAt() > now)
|
||||
{
|
||||
if (metadata.isDefault())
|
||||
out[index] = get_default(index);
|
||||
out[index] = default_value_extractor[index];
|
||||
else
|
||||
{
|
||||
ignoreFromBufferToAttributeIndex(attribute_index, buf);
|
||||
@ -520,7 +525,7 @@ void SSDComplexKeyCachePartition::getString(const size_t attribute_index,
|
||||
getImpl(key_columns, key_types, set_value, found);
|
||||
}
|
||||
|
||||
void SSDComplexKeyCachePartition::has(
|
||||
void SSDComplexKeyCachePartition::hasKeys(
|
||||
const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
|
||||
std::vector<bool> & found, std::chrono::system_clock::time_point now) const
|
||||
{
|
||||
@ -1018,7 +1023,7 @@ void SSDComplexKeyCacheStorage::getString(
|
||||
hit_count.fetch_add(n - count_not_found, std::memory_order_release);
|
||||
}
|
||||
|
||||
void SSDComplexKeyCacheStorage::has(
|
||||
void SSDComplexKeyCacheStorage::hasKeys(
|
||||
const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
|
||||
std::unordered_map<KeyRef, std::vector<size_t>> & not_found,
|
||||
TemporalComplexKeysPool & not_found_pool, std::chrono::system_clock::time_point now) const
|
||||
@ -1031,7 +1036,7 @@ void SSDComplexKeyCacheStorage::has(
|
||||
{
|
||||
std::shared_lock lock(rw_lock);
|
||||
for (const auto & partition : partitions)
|
||||
partition->has(key_columns, key_types, out, found, now);
|
||||
partition->hasKeys(key_columns, key_types, out, found, now);
|
||||
}
|
||||
|
||||
size_t count_not_found = 0;
|
||||
@ -1376,96 +1381,64 @@ SSDComplexKeyCacheDictionary::SSDComplexKeyCacheDictionary(
|
||||
createAttributes();
|
||||
}
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void SSDComplexKeyCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto index = getAttributeIndex(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
||||
const auto null_value = std::get<TYPE>(null_values[index]); /* NOLINT */ \
|
||||
getItemsNumberImpl<TYPE, TYPE>(index, key_columns, key_types, out, [&](const size_t) { return null_value; }); /* NOLINT */ \
|
||||
}
|
||||
ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const
|
||||
{
|
||||
ColumnPtr result;
|
||||
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void SSDComplexKeyCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto index = getAttributeIndex(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(index, key_columns, key_types, out, [&](const size_t row) { return def[row]; }); /* NOLINT */ \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
const auto index = getAttributeIndex(attribute_name);
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void SSDComplexKeyCacheDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto index = getAttributeIndex(attribute_name); \
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItemsNumberImpl<TYPE, TYPE>(index, key_columns, key_types, out, [&](const size_t) { return def; }); /* NOLINT */ \
|
||||
}
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
auto keys_size = key_columns.front()->size();
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultGetter>
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
const auto & null_value = std::get<AttributeType>(null_values[index]);
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
getItemsStringImpl(index, key_columns, key_types, out, default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
getItemsNumberImpl<AttributeType, AttributeType>(
|
||||
index,
|
||||
key_columns,
|
||||
key_types,
|
||||
out,
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
|
||||
void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
|
||||
const size_t attribute_index,
|
||||
const Columns & key_columns, const DataTypes & key_types,
|
||||
ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
ResultArrayType<OutputType> & out,
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
assert(dict_struct.key);
|
||||
assert(key_columns.size() == key_types.size());
|
||||
@ -1476,7 +1449,7 @@ void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
|
||||
|
||||
TemporalComplexKeysPool not_found_pool;
|
||||
std::unordered_map<KeyRef, std::vector<size_t>> not_found_keys;
|
||||
storage.getValue<OutputType>(attribute_index, key_columns, key_types, out, not_found_keys, not_found_pool, get_default, now);
|
||||
storage.getValue<OutputType>(attribute_index, key_columns, key_types, out, not_found_keys, not_found_pool, default_value_extractor, now);
|
||||
if (not_found_keys.empty())
|
||||
return;
|
||||
|
||||
@ -1503,54 +1476,17 @@ void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
|
||||
[&](const auto key)
|
||||
{
|
||||
for (const size_t row : not_found_keys[key])
|
||||
out[row] = get_default(row);
|
||||
out[row] = default_value_extractor[row];
|
||||
},
|
||||
getLifetime());
|
||||
}
|
||||
|
||||
void SSDComplexKeyCacheDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
|
||||
{
|
||||
const auto index = getAttributeIndex(attribute_name);
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
|
||||
|
||||
const auto null_value = StringRef{std::get<String>(null_values[index])};
|
||||
|
||||
getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t) { return null_value; });
|
||||
}
|
||||
|
||||
void SSDComplexKeyCacheDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns, const DataTypes & key_types,
|
||||
const ColumnString * const def, ColumnString * const out) const
|
||||
{
|
||||
const auto index = getAttributeIndex(attribute_name);
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t row) { return def->getDataAt(row); });
|
||||
}
|
||||
|
||||
void SSDComplexKeyCacheDictionary::getString(
|
||||
const std::string & attribute_name,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const String & def,
|
||||
ColumnString * const out) const
|
||||
{
|
||||
const auto index = getAttributeIndex(attribute_name);
|
||||
checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
|
||||
|
||||
getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t) { return StringRef{def}; });
|
||||
}
|
||||
|
||||
template <typename DefaultGetter>
|
||||
void SSDComplexKeyCacheDictionary::getItemsStringImpl(
|
||||
const size_t attribute_index,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
ColumnString * out,
|
||||
DefaultGetter && get_default) const
|
||||
DictionaryDefaultValueExtractor<String> & default_value_extractor) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
@ -1576,7 +1512,7 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
|
||||
{
|
||||
if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
|
||||
{
|
||||
auto to_insert = get_default(row);
|
||||
auto to_insert = default_value_extractor[row];
|
||||
out->insertData(to_insert.data, to_insert.size);
|
||||
++default_index;
|
||||
}
|
||||
@ -1619,7 +1555,7 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
|
||||
SCOPE_EXIT(tmp_keys_pool.rollback(key));
|
||||
if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
|
||||
{
|
||||
auto to_insert = get_default(row);
|
||||
auto to_insert = default_value_extractor[row];
|
||||
out->insertData(to_insert.data, to_insert.size);
|
||||
++default_index;
|
||||
}
|
||||
@ -1633,26 +1569,31 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
|
||||
}
|
||||
else
|
||||
{
|
||||
auto to_insert = get_default(row);
|
||||
auto to_insert = default_value_extractor[row];
|
||||
out->insertData(to_insert.data, to_insert.size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SSDComplexKeyCacheDictionary::has(
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
PaddedPODArray<UInt8> & out) const
|
||||
ColumnUInt8::Ptr SSDComplexKeyCacheDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
const auto rows_num = key_columns.front()->size();
|
||||
|
||||
auto result = ColumnUInt8::create(rows_num);
|
||||
auto& out = result->getData();
|
||||
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
out[row] = false;
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
std::unordered_map<KeyRef, std::vector<size_t>> not_found_keys;
|
||||
TemporalComplexKeysPool not_found_pool;
|
||||
storage.has(key_columns, key_types, out, not_found_keys, not_found_pool, now);
|
||||
storage.hasKeys(key_columns, key_types, out, not_found_keys, not_found_pool, now);
|
||||
if (not_found_keys.empty())
|
||||
return;
|
||||
return result;
|
||||
|
||||
std::vector<KeyRef> required_keys(not_found_keys.size());
|
||||
std::transform(std::begin(not_found_keys), std::end(not_found_keys), std::begin(required_keys), [](const auto & pair) { return pair.first; });
|
||||
@ -1681,6 +1622,8 @@ void SSDComplexKeyCacheDictionary::has(
|
||||
out[row] = false;
|
||||
},
|
||||
getLifetime());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BlockInputStreamPtr SSDComplexKeyCacheDictionary::getBlockInputStream(
|
||||
|
@ -2,11 +2,13 @@
|
||||
|
||||
#if defined(OS_LINUX) || defined(__FreeBSD__)
|
||||
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <list>
|
||||
#include <shared_mutex>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/Arena.h>
|
||||
@ -19,13 +21,11 @@
|
||||
#include <Dictionaries/BucketCache.h>
|
||||
#include <ext/scope_guard.h>
|
||||
#include <IO/HashingWriteBuffer.h>
|
||||
#include <list>
|
||||
#include <pcg_random.hpp>
|
||||
#include <Poco/Logger.h>
|
||||
#include <shared_mutex>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "IDictionary.h"
|
||||
#include "IDictionarySource.h"
|
||||
#include "DictionaryStructure.h"
|
||||
#include "DictionaryHelpers.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -313,7 +313,7 @@ public:
|
||||
template <typename Out, typename GetDefault>
|
||||
void getValue(const size_t attribute_index,
|
||||
const Columns & key_columns, const DataTypes & key_types,
|
||||
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
|
||||
ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & default_value_extractor,
|
||||
std::chrono::system_clock::time_point now) const;
|
||||
|
||||
void getString(const size_t attribute_index,
|
||||
@ -321,7 +321,7 @@ public:
|
||||
StringRefs & refs, ArenaWithFreeLists & arena, std::vector<bool> & found,
|
||||
std::vector<size_t> & default_ids, std::chrono::system_clock::time_point now) const;
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types,
|
||||
void hasKeys(const Columns & key_columns, const DataTypes & key_types,
|
||||
ResultArrayType<UInt8> & out, std::vector<bool> & found,
|
||||
std::chrono::system_clock::time_point now) const;
|
||||
|
||||
@ -459,7 +459,7 @@ public:
|
||||
TemporalComplexKeysPool & not_found_pool,
|
||||
std::vector<size_t> & default_ids, std::chrono::system_clock::time_point now) const;
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
|
||||
void hasKeys(const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
|
||||
std::unordered_map<KeyRef, std::vector<size_t>> & not_found,
|
||||
TemporalComplexKeysPool & not_found_pool, std::chrono::system_clock::time_point now) const;
|
||||
|
||||
@ -569,88 +569,20 @@ public:
|
||||
|
||||
std::exception_ptr getLastException() const override { return storage.getLastException(); }
|
||||
|
||||
DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
|
||||
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_values_column) const override;
|
||||
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = SSDComplexKeyCacheStorage::ResultArrayType<T>;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const Columns & key_columns,
|
||||
const DataTypes & key_types, ColumnString * out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const PaddedPODArray<TYPE> & def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const Columns & key_columns,
|
||||
const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const;
|
||||
|
||||
#define DECLARE(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const Columns & key_columns, \
|
||||
const DataTypes & key_types, \
|
||||
const TYPE def, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE(UInt8)
|
||||
DECLARE(UInt16)
|
||||
DECLARE(UInt32)
|
||||
DECLARE(UInt64)
|
||||
DECLARE(UInt128)
|
||||
DECLARE(Int8)
|
||||
DECLARE(Int16)
|
||||
DECLARE(Int32)
|
||||
DECLARE(Int64)
|
||||
DECLARE(Float32)
|
||||
DECLARE(Float64)
|
||||
DECLARE(Decimal32)
|
||||
DECLARE(Decimal64)
|
||||
DECLARE(Decimal128)
|
||||
#undef DECLARE
|
||||
|
||||
void getString(const std::string & attribute_name, const Columns & key_columns,
|
||||
const DataTypes & key_types, const String & def, ColumnString * const out) const;
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
private:
|
||||
@ -661,17 +593,20 @@ private:
|
||||
AttributeValueVariant createAttributeNullValueWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
void createAttributes();
|
||||
|
||||
template <typename AttributeType, typename OutputType, typename DefaultGetter>
|
||||
template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
|
||||
void getItemsNumberImpl(
|
||||
const size_t attribute_index,
|
||||
const Columns & key_columns, const DataTypes & key_types,
|
||||
ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
ResultArrayType<OutputType> & out,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename DefaultGetter>
|
||||
void getItemsStringImpl(
|
||||
const size_t attribute_index,
|
||||
const Columns & key_columns, const DataTypes & key_types,
|
||||
ColumnString * out, DefaultGetter && get_default) const;
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
ColumnString * out,
|
||||
DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
|
||||
|
||||
const std::string name;
|
||||
const DictionaryStructure dict_struct;
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include "registerDictionaries.h"
|
||||
|
||||
#if USE_ODBC
|
||||
# include <Poco/Data/ODBC/Connector.h>
|
||||
# include <Poco/Data/ODBC/Connector.h> // Y_IGNORE
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
|
@ -6,31 +6,25 @@ LIBRARY()
|
||||
PEERDIR(
|
||||
clickhouse/src/Common
|
||||
contrib/libs/poco/Data
|
||||
contrib/libs/poco/Data/ODBC
|
||||
contrib/libs/poco/MongoDB
|
||||
contrib/libs/poco/Redis
|
||||
contrib/libs/sparsehash
|
||||
)
|
||||
|
||||
IF (USE_ODBC)
|
||||
PEERDIR(contrib/libs/poco/Data/ODBC)
|
||||
ENDIF ()
|
||||
|
||||
NO_COMPILER_WARNINGS()
|
||||
|
||||
|
||||
SRCS(
|
||||
CacheDictionary.cpp
|
||||
CacheDictionary_generate1.cpp
|
||||
CacheDictionary_generate2.cpp
|
||||
CacheDictionary_generate3.cpp
|
||||
CassandraBlockInputStream.cpp
|
||||
CassandraDictionarySource.cpp
|
||||
CassandraHelpers.cpp
|
||||
ClickHouseDictionarySource.cpp
|
||||
ComplexKeyCacheDictionary.cpp
|
||||
ComplexKeyCacheDictionary_createAttributeWithType.cpp
|
||||
ComplexKeyCacheDictionary_generate1.cpp
|
||||
ComplexKeyCacheDictionary_generate2.cpp
|
||||
ComplexKeyCacheDictionary_generate3.cpp
|
||||
ComplexKeyCacheDictionary_setAttributeValue.cpp
|
||||
ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp
|
||||
ComplexKeyDirectDictionary.cpp
|
||||
ComplexKeyHashedDictionary.cpp
|
||||
DictionaryBlockInputStreamBase.cpp
|
||||
|
@ -5,12 +5,15 @@ LIBRARY()
|
||||
PEERDIR(
|
||||
clickhouse/src/Common
|
||||
contrib/libs/poco/Data
|
||||
contrib/libs/poco/Data/ODBC
|
||||
contrib/libs/poco/MongoDB
|
||||
contrib/libs/poco/Redis
|
||||
contrib/libs/sparsehash
|
||||
)
|
||||
|
||||
IF (USE_ODBC)
|
||||
PEERDIR(contrib/libs/poco/Data/ODBC)
|
||||
ENDIF ()
|
||||
|
||||
NO_COMPILER_WARNINGS()
|
||||
|
||||
|
||||
|
@ -146,20 +146,32 @@ Block MySQLBlockInputStream::readImpl()
|
||||
const auto value = row[position_mapping[index]];
|
||||
const auto & sample = description.sample_block.getByPosition(index);
|
||||
|
||||
bool is_type_nullable = description.types[index].second;
|
||||
|
||||
if (!value.isNull())
|
||||
{
|
||||
if (description.types[index].second)
|
||||
if (is_type_nullable)
|
||||
{
|
||||
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[index]);
|
||||
const auto & data_type = assert_cast<const DataTypeNullable &>(*sample.type);
|
||||
insertValue(*data_type.getNestedType(), column_nullable.getNestedColumn(), description.types[index].first, value);
|
||||
column_nullable.getNullMapData().emplace_back(0);
|
||||
column_nullable.getNullMapData().emplace_back(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
insertValue(*sample.type, *columns[index], description.types[index].first, value);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
insertDefaultValue(*columns[index], *sample.column);
|
||||
|
||||
if (is_type_nullable)
|
||||
{
|
||||
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[index]);
|
||||
column_nullable.getNullMapData().back() = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
++num_rows;
|
||||
|
@ -504,7 +504,7 @@ private:
|
||||
using namespace traits_;
|
||||
using namespace impl_;
|
||||
|
||||
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
|
||||
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
|
||||
class FunctionBinaryArithmetic : public IFunction
|
||||
{
|
||||
static constexpr const bool is_plus = IsOperation<Op>::plus;
|
||||
@ -542,16 +542,54 @@ class FunctionBinaryArithmetic : public IFunction
|
||||
>(type, std::forward<F>(f));
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
static bool castTypeNoFloats(const IDataType * type, F && f)
|
||||
{
|
||||
return castTypeToEither<
|
||||
DataTypeUInt8,
|
||||
DataTypeUInt16,
|
||||
DataTypeUInt32,
|
||||
DataTypeUInt64,
|
||||
DataTypeUInt256,
|
||||
DataTypeInt8,
|
||||
DataTypeInt16,
|
||||
DataTypeInt32,
|
||||
DataTypeInt64,
|
||||
DataTypeInt128,
|
||||
DataTypeInt256,
|
||||
DataTypeDate,
|
||||
DataTypeDateTime,
|
||||
DataTypeDecimal<Decimal32>,
|
||||
DataTypeDecimal<Decimal64>,
|
||||
DataTypeDecimal<Decimal128>,
|
||||
DataTypeDecimal<Decimal256>,
|
||||
DataTypeFixedString
|
||||
>(type, std::forward<F>(f));
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
static bool castBothTypes(const IDataType * left, const IDataType * right, F && f)
|
||||
{
|
||||
return castType(left, [&](const auto & left_)
|
||||
if constexpr (valid_on_float_arguments)
|
||||
{
|
||||
return castType(right, [&](const auto & right_)
|
||||
return castType(left, [&](const auto & left_)
|
||||
{
|
||||
return f(left_, right_);
|
||||
return castType(right, [&](const auto & right_)
|
||||
{
|
||||
return f(left_, right_);
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
return castTypeNoFloats(left, [&](const auto & left_)
|
||||
{
|
||||
return castTypeNoFloats(right, [&](const auto & right_)
|
||||
{
|
||||
return f(left_, right_);
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static FunctionOverloadResolverPtr
|
||||
@ -1319,11 +1357,11 @@ public:
|
||||
};
|
||||
|
||||
|
||||
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
|
||||
class FunctionBinaryArithmeticWithConstants : public FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>
|
||||
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
|
||||
class FunctionBinaryArithmeticWithConstants : public FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>
|
||||
{
|
||||
public:
|
||||
using Base = FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>;
|
||||
using Base = FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>;
|
||||
using Monotonicity = typename Base::Monotonicity;
|
||||
|
||||
static FunctionPtr create(
|
||||
@ -1488,7 +1526,7 @@ private:
|
||||
DataTypePtr return_type;
|
||||
};
|
||||
|
||||
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
|
||||
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
|
||||
class BinaryArithmeticOverloadResolver : public IFunctionOverloadResolverImpl
|
||||
{
|
||||
public:
|
||||
@ -1512,14 +1550,14 @@ public:
|
||||
|| (arguments[1].column && isColumnConst(*arguments[1].column))))
|
||||
{
|
||||
return std::make_unique<DefaultFunction>(
|
||||
FunctionBinaryArithmeticWithConstants<Op, Name, valid_on_default_arguments>::create(
|
||||
FunctionBinaryArithmeticWithConstants<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::create(
|
||||
arguments[0], arguments[1], return_type, context),
|
||||
ext::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
|
||||
return_type);
|
||||
}
|
||||
|
||||
return std::make_unique<DefaultFunction>(
|
||||
FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>::create(context),
|
||||
FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::create(context),
|
||||
ext::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
|
||||
return_type);
|
||||
}
|
||||
@ -1530,7 +1568,7 @@ public:
|
||||
throw Exception(
|
||||
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + ", should be 2",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
return FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>::getReturnTypeImplStatic(arguments, context);
|
||||
return FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::getReturnTypeImplStatic(arguments, context);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -212,4 +212,12 @@ checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments)
|
||||
return {nested_columns, offsets->data()};
|
||||
}
|
||||
|
||||
bool areTypesEqual(const DataTypePtr & lhs, const DataTypePtr & rhs)
|
||||
{
|
||||
const auto & lhs_name = lhs->getName();
|
||||
const auto & rhs_name = rhs->getName();
|
||||
|
||||
return lhs_name == rhs_name;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -152,4 +152,8 @@ void validateFunctionArgumentTypes(const IFunction & func, const ColumnsWithType
|
||||
std::pair<std::vector<const IColumn *>, const ColumnArray::Offset *>
|
||||
checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments);
|
||||
|
||||
|
||||
/// Check if two types are equal
|
||||
bool areTypesEqual(const DataTypePtr & lhs, const DataTypePtr & rhs);
|
||||
|
||||
}
|
||||
|
@ -38,8 +38,8 @@ void registerFunctionsExternalDictionaries(FunctionFactory & factory)
|
||||
factory.registerFunction<FunctionDictGetDateTimeOrDefault>();
|
||||
factory.registerFunction<FunctionDictGetUUIDOrDefault>();
|
||||
factory.registerFunction<FunctionDictGetStringOrDefault>();
|
||||
factory.registerFunction<FunctionDictGetNoType>();
|
||||
factory.registerFunction<FunctionDictGetNoTypeOrDefault>();
|
||||
factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::get>>();
|
||||
factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::getOrDefault>>();
|
||||
}
|
||||
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -101,7 +101,8 @@ struct IntegerRoundingComputation
|
||||
return scale;
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE T computeImpl(T x, T scale)
|
||||
/// Integer overflow is Ok.
|
||||
static ALWAYS_INLINE_NO_SANITIZE_UNDEFINED T computeImpl(T x, T scale)
|
||||
{
|
||||
switch (rounding_mode)
|
||||
{
|
||||
|
@ -21,7 +21,7 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline constexpr bool is_gcd_lcm_implemeted = !(is_big_int_v<T> || std::is_floating_point_v<T>);
|
||||
inline constexpr bool is_gcd_lcm_implemeted = !is_big_int_v<T>;
|
||||
|
||||
template <typename A, typename B, typename Impl, typename Name>
|
||||
struct GCDLCMImpl
|
||||
@ -33,7 +33,7 @@ struct GCDLCMImpl
|
||||
static inline std::enable_if_t<!is_gcd_lcm_implemeted<Result>, Result>
|
||||
apply(A, B)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not implemented for big integers and floats", Name::name);
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not implemented for big integers", Name::name);
|
||||
}
|
||||
|
||||
template <typename Result = ResultType>
|
||||
|
@ -353,6 +353,9 @@ bool FunctionArrayEnumerateExtended<Derived>::execute128bit(
|
||||
keys_bytes += key_sizes[j];
|
||||
}
|
||||
|
||||
if (keys_bytes > 16)
|
||||
return false;
|
||||
|
||||
executeMethod<MethodFixed>(offsets, columns, key_sizes, nullptr, res_values);
|
||||
return true;
|
||||
}
|
||||
|
@ -37,7 +37,7 @@ struct BitAndImpl
|
||||
};
|
||||
|
||||
struct NameBitAnd { static constexpr auto name = "bitAnd"; };
|
||||
using FunctionBitAnd = BinaryArithmeticOverloadResolver<BitAndImpl, NameBitAnd, true>;
|
||||
using FunctionBitAnd = BinaryArithmeticOverloadResolver<BitAndImpl, NameBitAnd, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -36,7 +36,7 @@ struct BitOrImpl
|
||||
};
|
||||
|
||||
struct NameBitOr { static constexpr auto name = "bitOr"; };
|
||||
using FunctionBitOr = BinaryArithmeticOverloadResolver<BitOrImpl, NameBitOr, true>;
|
||||
using FunctionBitOr = BinaryArithmeticOverloadResolver<BitOrImpl, NameBitOr, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -43,7 +43,7 @@ struct BitRotateLeftImpl
|
||||
};
|
||||
|
||||
struct NameBitRotateLeft { static constexpr auto name = "bitRotateLeft"; };
|
||||
using FunctionBitRotateLeft = BinaryArithmeticOverloadResolver<BitRotateLeftImpl, NameBitRotateLeft>;
|
||||
using FunctionBitRotateLeft = BinaryArithmeticOverloadResolver<BitRotateLeftImpl, NameBitRotateLeft, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -42,7 +42,7 @@ struct BitRotateRightImpl
|
||||
};
|
||||
|
||||
struct NameBitRotateRight { static constexpr auto name = "bitRotateRight"; };
|
||||
using FunctionBitRotateRight = BinaryArithmeticOverloadResolver<BitRotateRightImpl, NameBitRotateRight>;
|
||||
using FunctionBitRotateRight = BinaryArithmeticOverloadResolver<BitRotateRightImpl, NameBitRotateRight, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -42,7 +42,7 @@ struct BitShiftLeftImpl
|
||||
};
|
||||
|
||||
struct NameBitShiftLeft { static constexpr auto name = "bitShiftLeft"; };
|
||||
using FunctionBitShiftLeft = BinaryArithmeticOverloadResolver<BitShiftLeftImpl, NameBitShiftLeft>;
|
||||
using FunctionBitShiftLeft = BinaryArithmeticOverloadResolver<BitShiftLeftImpl, NameBitShiftLeft, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -42,7 +42,7 @@ struct BitShiftRightImpl
|
||||
};
|
||||
|
||||
struct NameBitShiftRight { static constexpr auto name = "bitShiftRight"; };
|
||||
using FunctionBitShiftRight = BinaryArithmeticOverloadResolver<BitShiftRightImpl, NameBitShiftRight>;
|
||||
using FunctionBitShiftRight = BinaryArithmeticOverloadResolver<BitShiftRightImpl, NameBitShiftRight, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,7 @@ struct BitTestImpl
|
||||
};
|
||||
|
||||
struct NameBitTest { static constexpr auto name = "bitTest"; };
|
||||
using FunctionBitTest = BinaryArithmeticOverloadResolver<BitTestImpl, NameBitTest>;
|
||||
using FunctionBitTest = BinaryArithmeticOverloadResolver<BitTestImpl, NameBitTest, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -36,7 +36,7 @@ struct BitXorImpl
|
||||
};
|
||||
|
||||
struct NameBitXor { static constexpr auto name = "bitXor"; };
|
||||
using FunctionBitXor = BinaryArithmeticOverloadResolver<BitXorImpl, NameBitXor, true>;
|
||||
using FunctionBitXor = BinaryArithmeticOverloadResolver<BitXorImpl, NameBitXor, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -23,7 +23,7 @@ struct GCDImpl : public GCDLCMImpl<A, B, GCDImpl<A, B>, NameGCD>
|
||||
}
|
||||
};
|
||||
|
||||
using FunctionGCD = BinaryArithmeticOverloadResolver<GCDImpl, NameGCD, false>;
|
||||
using FunctionGCD = BinaryArithmeticOverloadResolver<GCDImpl, NameGCD, false, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -95,7 +95,7 @@ void geodistInit()
|
||||
|
||||
sphere_metric_meters_lut[i] = static_cast<float>(sqr((EARTH_DIAMETER * PI / 360) * cos(latitude)));
|
||||
|
||||
sphere_metric_lut[i] = cosf(latitude);
|
||||
sphere_metric_lut[i] = sqrf(cosf(latitude));
|
||||
}
|
||||
}
|
||||
|
||||
@ -182,7 +182,7 @@ float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg)
|
||||
/// (Remember how a plane flies from Moscow to New York)
|
||||
/// But if longitude is close but latitude is different enough, there is no difference between meridian and great circle line.
|
||||
|
||||
float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, KTABLE] indexes
|
||||
float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, METRIC_LUT_SIZE] indexes
|
||||
size_t latitude_midpoint_index = floatToIndex(latitude_midpoint) & (METRIC_LUT_SIZE - 1);
|
||||
|
||||
/// This is linear interpolation between two table items at index "latitude_midpoint_index" and "latitude_midpoint_index + 1".
|
||||
|
@ -54,7 +54,7 @@ struct LCMImpl : public GCDLCMImpl<A, B, LCMImpl<A, B>, NameLCM>
|
||||
}
|
||||
};
|
||||
|
||||
using FunctionLCM = BinaryArithmeticOverloadResolver<LCMImpl, NameLCM, false>;
|
||||
using FunctionLCM = BinaryArithmeticOverloadResolver<LCMImpl, NameLCM, false, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -69,6 +69,10 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion()
|
||||
boost::algorithm::to_lower(matched_region);
|
||||
region = matched_region;
|
||||
}
|
||||
else
|
||||
{
|
||||
region = Aws::Region::AWS_GLOBAL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -609,10 +609,10 @@ bool ActionsDAG::hasStatefulFunctions() const
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ActionsDAG::empty() const
|
||||
bool ActionsDAG::trivial() const
|
||||
{
|
||||
for (const auto & node : nodes)
|
||||
if (node.type != ActionType::INPUT)
|
||||
if (node.type == ActionType::FUNCTION || node.type == ActionType::ARRAY_JOIN)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
@ -223,7 +223,7 @@ public:
|
||||
|
||||
bool hasArrayJoin() const;
|
||||
bool hasStatefulFunctions() const;
|
||||
bool empty() const; /// If actions only contain inputs.
|
||||
bool trivial() const; /// If actions has no functions or array join.
|
||||
|
||||
const ActionsSettings & getSettings() const { return settings; }
|
||||
|
||||
|
@ -50,7 +50,6 @@
|
||||
#include <Interpreters/SystemLog.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/DDLWorker.h>
|
||||
#include <Common/DNSResolver.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/UncompressedCache.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
|
@ -51,7 +51,6 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
|
||||
#include <Interpreters/DNSCacheUpdater.h>
|
||||
#include <Common/SensitiveDataMasker.h>
|
||||
|
||||
#include <Processors/Transforms/LimitsCheckingTransform.h>
|
||||
|
56
src/Processors/QueryPlan/Optimizations/Optimizations.h
Normal file
56
src/Processors/QueryPlan/Optimizations/Optimizations.h
Normal file
@ -0,0 +1,56 @@
|
||||
#pragma once
|
||||
#include <Processors/QueryPlan/QueryPlan.h>
|
||||
#include <array>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace QueryPlanOptimizations
|
||||
{
|
||||
|
||||
/// This is the main function which optimizes the whole QueryPlan tree.
|
||||
void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes);
|
||||
|
||||
/// Optimization is a function applied to QueryPlan::Node.
|
||||
/// It can read and update subtree of specified node.
|
||||
/// It return the number of updated layers of subtree if some change happened.
|
||||
/// It must guarantee that the structure of tree is correct.
|
||||
///
|
||||
/// New nodes should be added to QueryPlan::Nodes list.
|
||||
/// It is not needed to remove old nodes from the list.
|
||||
struct Optimization
|
||||
{
|
||||
using Function = size_t (*)(QueryPlan::Node *, QueryPlan::Nodes &);
|
||||
const Function apply = nullptr;
|
||||
const char * name;
|
||||
};
|
||||
|
||||
/// Move ARRAY JOIN up if possible.
|
||||
size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
|
||||
|
||||
/// Move LimitStep down if possible.
|
||||
size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
|
||||
|
||||
/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
|
||||
size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes);
|
||||
|
||||
/// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep
|
||||
/// Replace chain `FilterStep -> ExpressionStep` to single FilterStep
|
||||
size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
|
||||
|
||||
inline const auto & getOptimizations()
|
||||
{
|
||||
static const std::array<Optimization, 4> optimizations =
|
||||
{{
|
||||
{tryLiftUpArrayJoin, "liftUpArrayJoin"},
|
||||
{tryPushDownLimit, "pushDownLimit"},
|
||||
{trySplitFilter, "splitFilter"},
|
||||
{tryMergeExpressions, "mergeExpressions"},
|
||||
}};
|
||||
|
||||
return optimizations;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
85
src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
Normal file
85
src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
Normal file
@ -0,0 +1,85 @@
|
||||
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
|
||||
#include <Processors/QueryPlan/FilterStep.h>
|
||||
#include <Processors/QueryPlan/ExpressionStep.h>
|
||||
#include <Processors/QueryPlan/ArrayJoinStep.h>
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
#include <Interpreters/ArrayJoinAction.h>
|
||||
|
||||
namespace DB::QueryPlanOptimizations
|
||||
{
|
||||
|
||||
size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
|
||||
{
|
||||
if (parent_node->children.size() != 1)
|
||||
return 0;
|
||||
|
||||
QueryPlan::Node * child_node = parent_node->children.front();
|
||||
|
||||
auto & parent = parent_node->step;
|
||||
auto & child = child_node->step;
|
||||
auto * expression_step = typeid_cast<ExpressionStep *>(parent.get());
|
||||
auto * filter_step = typeid_cast<FilterStep *>(parent.get());
|
||||
auto * array_join_step = typeid_cast<ArrayJoinStep *>(child.get());
|
||||
|
||||
if (!(expression_step || filter_step) || !array_join_step)
|
||||
return 0;
|
||||
|
||||
const auto & array_join = array_join_step->arrayJoin();
|
||||
const auto & expression = expression_step ? expression_step->getExpression()
|
||||
: filter_step->getExpression();
|
||||
|
||||
auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns);
|
||||
|
||||
/// No actions can be moved before ARRAY JOIN.
|
||||
if (split_actions.first->trivial())
|
||||
return 0;
|
||||
|
||||
auto description = parent->getStepDescription();
|
||||
|
||||
/// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin.
|
||||
if (split_actions.second->trivial())
|
||||
{
|
||||
auto expected_header = parent->getOutputStream().header;
|
||||
|
||||
/// Expression/Filter -> ArrayJoin
|
||||
std::swap(parent, child);
|
||||
/// ArrayJoin -> Expression/Filter
|
||||
|
||||
if (expression_step)
|
||||
child = std::make_unique<ExpressionStep>(child_node->children.at(0)->step->getOutputStream(),
|
||||
std::move(split_actions.first));
|
||||
else
|
||||
child = std::make_unique<FilterStep>(child_node->children.at(0)->step->getOutputStream(),
|
||||
std::move(split_actions.first),
|
||||
filter_step->getFilterColumnName(),
|
||||
filter_step->removesFilterColumn());
|
||||
|
||||
child->setStepDescription(std::move(description));
|
||||
|
||||
array_join_step->updateInputStream(child->getOutputStream(), expected_header);
|
||||
return 2;
|
||||
}
|
||||
|
||||
/// Add new expression step before ARRAY JOIN.
|
||||
/// Expression/Filter -> ArrayJoin -> Something
|
||||
auto & node = nodes.emplace_back();
|
||||
node.children.swap(child_node->children);
|
||||
child_node->children.emplace_back(&node);
|
||||
/// Expression/Filter -> ArrayJoin -> node -> Something
|
||||
|
||||
node.step = std::make_unique<ExpressionStep>(node.children.at(0)->step->getOutputStream(),
|
||||
std::move(split_actions.first));
|
||||
node.step->setStepDescription(description);
|
||||
array_join_step->updateInputStream(node.step->getOutputStream(), {});
|
||||
|
||||
if (expression_step)
|
||||
parent = std::make_unique<ExpressionStep>(array_join_step->getOutputStream(), split_actions.second);
|
||||
else
|
||||
parent = std::make_unique<FilterStep>(array_join_step->getOutputStream(), split_actions.second,
|
||||
filter_step->getFilterColumnName(), filter_step->removesFilterColumn());
|
||||
|
||||
parent->setStepDescription(description + " [split]");
|
||||
return 3;
|
||||
}
|
||||
|
||||
}
|
114
src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
Normal file
114
src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
Normal file
@ -0,0 +1,114 @@
|
||||
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
|
||||
#include <Processors/QueryPlan/ITransformingStep.h>
|
||||
#include <Processors/QueryPlan/LimitStep.h>
|
||||
#include <Processors/QueryPlan/TotalsHavingStep.h>
|
||||
#include <Processors/QueryPlan/MergingSortedStep.h>
|
||||
#include <Processors/QueryPlan/FinishSortingStep.h>
|
||||
#include <Processors/QueryPlan/MergeSortingStep.h>
|
||||
#include <Processors/QueryPlan/PartialSortingStep.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
namespace DB::QueryPlanOptimizations
|
||||
{
|
||||
|
||||
/// If plan looks like Limit -> Sorting, update limit for Sorting
|
||||
static bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit)
|
||||
{
|
||||
if (limit == 0)
|
||||
return false;
|
||||
|
||||
QueryPlanStepPtr & step = node->step;
|
||||
QueryPlan::Node * child = nullptr;
|
||||
bool updated = false;
|
||||
|
||||
if (auto * merging_sorted = typeid_cast<MergingSortedStep *>(step.get()))
|
||||
{
|
||||
/// TODO: remove LimitStep here.
|
||||
merging_sorted->updateLimit(limit);
|
||||
updated = true;
|
||||
child = node->children.front();
|
||||
}
|
||||
else if (auto * finish_sorting = typeid_cast<FinishSortingStep *>(step.get()))
|
||||
{
|
||||
/// TODO: remove LimitStep here.
|
||||
finish_sorting->updateLimit(limit);
|
||||
updated = true;
|
||||
}
|
||||
else if (auto * merge_sorting = typeid_cast<MergeSortingStep *>(step.get()))
|
||||
{
|
||||
merge_sorting->updateLimit(limit);
|
||||
updated = true;
|
||||
child = node->children.front();
|
||||
}
|
||||
else if (auto * partial_sorting = typeid_cast<PartialSortingStep *>(step.get()))
|
||||
{
|
||||
partial_sorting->updateLimit(limit);
|
||||
updated = true;
|
||||
}
|
||||
|
||||
/// We often have chain PartialSorting -> MergeSorting -> MergingSorted
|
||||
/// Try update limit for them also if possible.
|
||||
if (child)
|
||||
tryUpdateLimitForSortingSteps(child, limit);
|
||||
|
||||
return updated;
|
||||
}
|
||||
|
||||
size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
|
||||
{
|
||||
if (parent_node->children.size() != 1)
|
||||
return 0;
|
||||
|
||||
QueryPlan::Node * child_node = parent_node->children.front();
|
||||
|
||||
auto & parent = parent_node->step;
|
||||
auto & child = child_node->step;
|
||||
auto * limit = typeid_cast<LimitStep *>(parent.get());
|
||||
|
||||
if (!limit)
|
||||
return 0;
|
||||
|
||||
/// Skip LIMIT WITH TIES by now.
|
||||
if (limit->withTies())
|
||||
return 0;
|
||||
|
||||
const auto * transforming = dynamic_cast<const ITransformingStep *>(child.get());
|
||||
|
||||
/// Skip everything which is not transform.
|
||||
if (!transforming)
|
||||
return 0;
|
||||
|
||||
/// Special cases for sorting steps.
|
||||
if (tryUpdateLimitForSortingSteps(child_node, limit->getLimitForSorting()))
|
||||
return 0;
|
||||
|
||||
/// Special case for TotalsHaving. Totals may be incorrect if we push down limit.
|
||||
if (typeid_cast<const TotalsHavingStep *>(child.get()))
|
||||
return 0;
|
||||
|
||||
/// Now we should decide if pushing down limit possible for this step.
|
||||
|
||||
const auto & transform_traits = transforming->getTransformTraits();
|
||||
const auto & data_stream_traits = transforming->getDataStreamTraits();
|
||||
|
||||
/// Cannot push down if child changes the number of rows.
|
||||
if (!transform_traits.preserves_number_of_rows)
|
||||
return 0;
|
||||
|
||||
/// Cannot push down if data was sorted exactly by child stream.
|
||||
if (!child->getOutputStream().sort_description.empty() && !data_stream_traits.preserves_sorting)
|
||||
return 0;
|
||||
|
||||
/// Now we push down limit only if it doesn't change any stream properties.
|
||||
/// TODO: some of them may be changed and, probably, not important for following streams. We may add such info.
|
||||
if (!limit->getOutputStream().hasEqualPropertiesWith(transforming->getOutputStream()))
|
||||
return 0;
|
||||
|
||||
/// Input stream for Limit have changed.
|
||||
limit->updateInputStream(transforming->getInputStreams().front());
|
||||
|
||||
parent.swap(child);
|
||||
return 2;
|
||||
}
|
||||
|
||||
}
|
65
src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
Normal file
65
src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
Normal file
@ -0,0 +1,65 @@
|
||||
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
|
||||
#include <Processors/QueryPlan/FilterStep.h>
|
||||
#include <Processors/QueryPlan/ExpressionStep.h>
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
|
||||
namespace DB::QueryPlanOptimizations
|
||||
{
|
||||
|
||||
size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
|
||||
{
|
||||
if (parent_node->children.size() != 1)
|
||||
return false;
|
||||
|
||||
QueryPlan::Node * child_node = parent_node->children.front();
|
||||
|
||||
auto & parent = parent_node->step;
|
||||
auto & child = child_node->step;
|
||||
|
||||
auto * parent_expr = typeid_cast<ExpressionStep *>(parent.get());
|
||||
auto * parent_filter = typeid_cast<FilterStep *>(parent.get());
|
||||
auto * child_expr = typeid_cast<ExpressionStep *>(child.get());
|
||||
|
||||
if (parent_expr && child_expr)
|
||||
{
|
||||
const auto & child_actions = child_expr->getExpression();
|
||||
const auto & parent_actions = parent_expr->getExpression();
|
||||
|
||||
/// We cannot combine actions with arrayJoin and stateful function because we not always can reorder them.
|
||||
/// Example: select rowNumberInBlock() from (select arrayJoin([1, 2]))
|
||||
/// Such a query will return two zeroes if we combine actions together.
|
||||
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
|
||||
return 0;
|
||||
|
||||
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
|
||||
|
||||
auto expr = std::make_unique<ExpressionStep>(child_expr->getInputStreams().front(), merged);
|
||||
expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
|
||||
|
||||
parent_node->step = std::move(expr);
|
||||
parent_node->children.swap(child_node->children);
|
||||
return 1;
|
||||
}
|
||||
else if (parent_filter && child_expr)
|
||||
{
|
||||
const auto & child_actions = child_expr->getExpression();
|
||||
const auto & parent_actions = parent_filter->getExpression();
|
||||
|
||||
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
|
||||
return 0;
|
||||
|
||||
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
|
||||
|
||||
auto filter = std::make_unique<FilterStep>(child_expr->getInputStreams().front(), merged,
|
||||
parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn());
|
||||
filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
|
||||
|
||||
parent_node->step = std::move(filter);
|
||||
parent_node->children.swap(child_node->children);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user